├── .gitignore
├── .travis.yml
├── Changelog
├── LICENSE
├── MANIFEST.in
├── README.rst
├── appveyor.yml
├── datarray
    ├── LICENSE
    ├── __init__.py
    ├── datarray.py
    ├── print_grid.py
    ├── testing
    │   ├── __init__.py
    │   ├── testlib.py
    │   ├── tests
    │   │   └── test_utils.py
    │   └── utils.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_bugfixes.py
    │   ├── test_data_array.py
    │   └── test_print.py
    └── version.py
├── doc
    ├── Makefile
    ├── README.txt
    ├── devel
    │   └── make_release.rst
    ├── doc-requirements.txt
    └── source
    │   ├── basic_data_array.rst
    │   ├── conf.py
    │   ├── design
    │       ├── array_axes.svg
    │       ├── design.rst
    │       ├── index.rst
    │       └── issues.rst
    │   ├── generated
    │       └── .gitignore
    │   ├── index.rst
    │   ├── license.rst
    │   ├── licenses
    │       ├── numpydoc_license.rst
    │       └── pandas_license.rst
    │   ├── ndarray_methods.rst
    │   ├── other_projects
    │       ├── index.rst
    │       ├── larry_overview.rst
    │       └── pandas_overview.rst
    │   └── printing.rst
├── examples
    └── inference_algs.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tools
    └── release.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[oc]
 2 | *.so
 3 | __pycache__/
 4 | # setup.py working directory
 5 | build
 6 | # setup.py dist directory
 7 | dist/
 8 | # Documentation build files
 9 | doc/build
10 | # Editor temporary/working/backup files
11 | *$
12 | .*.sw[nop]
13 | .sw[nop]
14 | *~
15 | [#]*#
16 | .#*
17 | *.bak
18 | *.tmp
19 | *.tgz
20 | *.rej
21 | *.org
22 | .project
23 | *.diff
24 | .settings/
25 | # Egg metadata
26 | ./*.egg-info
27 | # The shelf plugin uses this dir
28 | ./.shelf
29 | # Mac droppings
30 | .DS_Store
31 | 
32 | # Build products
33 | MANIFEST
34 | *.egg-info/
35 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
  1 | language: python
  2 | 
  3 | cache:
  4 |   directories:
  5 |     - $HOME/.cache/pip
  6 | 
  7 | env:
  8 |     global:
  9 |         - DEPENDS="numpy"
 10 |         - INSTALL_TYPE="setup"
 11 |         - MANYLINUX_URL=https://nipy.bic.berkeley.edu/manylinux
 12 | 
 13 | python:
 14 |     - 3.3
 15 |     - 3.4
 16 |     - 3.5
 17 | 
 18 | matrix:
 19 |   include:
 20 |     - python: 2.6
 21 |       env:
 22 |         # Last networkx version that runs on 2.6
 23 |         - NETWORKX_VER_SPEC="==1.9"
 24 |     - python: 2.7
 25 |       env:
 26 |         - COVERAGE=1
 27 |         - DOCTESTS=1
 28 |         - DOC_DOCTEST=1
 29 |     # Absolute minimum dependencies
 30 |     - python: 2.7
 31 |       env:
 32 |         # Check numpy minimum version in datarray/version.py
 33 |         - DEPENDS="numpy==1.7.0"
 34 |     - python: 2.7
 35 |       env:
 36 |         - INSTALL_TYPE=sdist
 37 |     - python: 2.7
 38 |       env:
 39 |         - INSTALL_TYPE=wheel
 40 |     - python: 2.7
 41 |       env:
 42 |         - INSTALL_TYPE=requirements
 43 |     # test against pre-release builds
 44 |     - python: 3.5
 45 |       env:
 46 |         - EXTRA_PIP_FLAGS="--pre"
 47 | 
 48 | before_install:
 49 |     - virtualenv --python=python venv
 50 |     - source venv/bin/activate
 51 |     - pip install -U pip wheel
 52 |     - pip install -f $MANYLINUX_URL $EXTRA_PIP_FLAGS $DEPENDS
 53 |     - if [ "${COVERAGE}" == "1" ]; then
 54 |       pip install coverage;
 55 |       pip install coveralls;
 56 |       fi
 57 | 
 58 | install:
 59 |     - |
 60 |       if [ "$INSTALL_TYPE" == "setup" ]; then
 61 |           python setup.py install
 62 |       elif [ "$INSTALL_TYPE" == "sdist" ]; then
 63 |           python setup.py egg_info  # check egg_info while we're here
 64 |           python setup.py sdist
 65 |           pip install -f $MANYLINUX_URL dist/*.tar.gz
 66 |       elif [ "$INSTALL_TYPE" == "wheel" ]; then
 67 |           pip install wheel
 68 |           python setup.py bdist_wheel
 69 |           pip install -f $MANYLINUX_URL dist/*.whl
 70 |       elif [ "$INSTALL_TYPE" == "requirements" ]; then
 71 |           pip install -f $MANYLINUX_URL -r requirements.txt
 72 |           python setup.py install
 73 |       fi
 74 | 
 75 | script:
 76 |     - pip install nose
 77 |     # Change into an innocuous directory and find tests from installation
 78 |     - mkdir for_testing
 79 |     - cd for_testing
 80 |     - if [ "${COVERAGE}" == "1" ]; then
 81 |       cp ../.coveragerc .;
 82 |       COVER_ARGS="--with-coverage --cover-package datarray";
 83 |       fi
 84 |     - if [ "${DOCTESTS}" == "1" ]; then
 85 |       DOCTEST_ARGS="--with-doctest";
 86 |       fi
 87 |     # Run unit tests
 88 |     - nosetests $COVER_ARGS $DOCTEST_ARGS datarray
 89 |     # Run example to check for errors
 90 |     - pip install networkx${NETWORKX_VER_SPEC}
 91 |     - python ../examples/inference_algs.py
 92 |     # Run doc doctests
 93 |     - if [ "${DOC_DOCTEST}" == "1" ]; then
 94 |       pip install sphinx;
 95 |       (cd ../doc && make doctest);
 96 |       fi
 97 | 
 98 | after_success:
 99 |     - if [ "${COVERAGE}" == "1" ]; then coveralls; fi
100 | 


--------------------------------------------------------------------------------
/Changelog:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. vim:ft=rst
 3 | 
 4 | .. _changelog:
 5 | 
 6 | ###################
 7 | Datarray change log
 8 | ###################
 9 | 
10 | The main authors of datarray are:
11 | 
12 | * Fernando Perez (FP);
13 | * Matthew Brett (MB);
14 | * Mike Trumpis (MT);
15 | * Jonathan Terhorst (JT);
16 | * Keith Goodman (KG).
17 | 
18 | ********
19 | Releases
20 | ********
21 | 
22 | * 0.1.0 (TBA)
23 | 
24 |   * Fixed bug in axis sorting leading to unpredictable errors slicing
25 |     DataArrays;
26 |   * Added 'any' and 'all' as reduction methods, fixing incorrect retention of
27 |     axes with these methods;
28 |   * Port to Python >= 3.3 in common codebase with Python 2;
29 |   * Move from fperez personal github account to BIDS github organization.
30 | 
31 | * 0.0.6 (Wednesday November 10 2010)
32 | 
33 | * 0.0.5 (Friday October 8 2010)
34 | 
35 | * 0.0.4 (Wednesday October 6 2010)
36 | 
37 | * 0.0.3 (Thursday July 29 2010)
38 | 
39 | * 0.0.2 (Wednesday July 28 2010)
40 | 
41 | * 0.0.1 (Tuesday July 27 2010)
42 | 
43 |   * Initial release
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Note: the full license file is datarray/LICENSE
2 | 
3 | This is so that the license can be installed by distutils along with the real
4 | package for end users.
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include LICENSE
 2 | 
 3 | graft datarray
 4 | 
 5 | graft doc
 6 | exclude doc/\#*
 7 | exclude doc/man/*.1
 8 | 
 9 | # docs subdirs we want to skip
10 | prune doc/build
11 | 
12 | global-exclude *~
13 | global-exclude *.flc
14 | global-exclude *.pyc
15 | global-exclude .dircopy.log
16 | global-exclude .git
17 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://travis-ci.org/BIDS/datarray.svg?branch=master
  2 |     :target: https://travis-ci.org/BIDS/datarray
  3 | 
  4 | ######################################
  5 | Datarray: Numpy arrays with named axes
  6 | ######################################
  7 | 
  8 | Scientists, engineers, mathematicians and statisticians don't just work with
  9 | matrices; they often work with structured data, just like you'd find in a
 10 | table. However, functionality for this is missing from Numpy, and there are
 11 | efforts to create something to fill the void.  This is one of those efforts.
 12 | 
 13 | .. warning::
 14 | 
 15 |    This code is currently experimental, and its API *will* change!  It is meant
 16 |    to be a place for the community to understand and develop the right
 17 |    semantics and have a prototype implementation that will ultimately
 18 |    (hopefully) be folded back into Numpy.
 19 | 
 20 | Datarray provides a subclass of Numpy ndarrays that support:
 21 | 
 22 | - individual dimensions (axes) being labeled with meaningful descriptions
 23 | - labeled 'ticks' along each axis
 24 | - indexing and slicing by named axis
 25 | - indexing on any axis with the tick labels instead of only integers
 26 | - reduction operations (like .sum, .mean, etc) support named axis arguments
 27 |   instead of only integer indices.
 28 | 
 29 | *********
 30 | Prior Art
 31 | *********
 32 | 
 33 | In no particular order:
 34 | 
 35 | * `xarray <http://xarray.pydata.org/en/stable>`_ - very close in spirit to
 36 |   this package, xarray implements named ND array axes and tick labels.  It
 37 |   integrates with (and depends on) Pandas.  If you are doing production work,
 38 |   and don't mind the pandas dependency, please use xarray rather than this
 39 |   package.  Xarray used to be called "xray".
 40 | 
 41 | * `pandas <http://pandas.pydata.org>`_ is based around a number of
 42 |   DataFrame-esque datatypes.
 43 | 
 44 | * `Tabular <http://bitbucket.org/elaine/tabular/src>`_ implements a
 45 |   spreadsheet-inspired datatype, with rows/columns, csv/etc. IO, and fancy
 46 |   tabular operations.
 47 | 
 48 | * `scikits.statsmodels <http://scikits.appspot.com/statsmodels>`_ sounded as
 49 |   though it had some features we'd like to eventually see implemented on top of
 50 |   something such as datarray, and `Skipper <http://scipystats.blogspot.com>`_
 51 |   seemed pretty interested in something like this himself.
 52 | 
 53 | * `scikits.timeseries <http://scikits.appspot.com/timeseries>`_ also has a
 54 |   time-series-specific object that's somewhat reminiscent of labeled arrays.
 55 | 
 56 | * `pydataframe <https://pypi.python.org/pypi/pydataframe>`_ is supposed to be a
 57 |   clone of R's data.frame.
 58 | 
 59 | * `larry <http://github.com/kwgoodman/la>`_, or "labeled array," often comes up
 60 |   in discussions alongside pandas.
 61 | 
 62 | * `divisi <http://github.com/commonsense/divisi2>`_ includes labeled sparse and
 63 |   dense arrays.
 64 | 
 65 | * `pymvpa <https://github.com/PyMVPA/PyMVPA>`_ provides Dataset class
 66 |   encapsulating the data together with matching in length sets of attributes
 67 |   for the first two (samples and features) dimensions.  Dataset is not a
 68 |   subclass of numpy array to allow other data structures (e.g. sparse
 69 |   matrices).
 70 | 
 71 | * `ptsa <http://git.debian.org/?p=pkg-exppsy/ptsa.git>`_ subclasses
 72 |   ndarray to provide attributes per dimensions aiming to ease slicing/indexing
 73 |   given the values of the axis attributes
 74 | 
 75 | *************
 76 | Project Goals
 77 | *************
 78 | 
 79 | 1. Get something akin to this in the numpy core;
 80 | 2. Stick to basic functionality such that projects like scikits.statsmodels can
 81 |    use it as a base datatype;
 82 | 3. Make an interface that allows for simple, pretty manipulation that doesn't
 83 |    introduce confusion;
 84 | 4. Oh, and make sure that the base numpy array is still accessible.
 85 | 
 86 | ****
 87 | Code
 88 | ****
 89 | 
 90 | You can find our sources and single-click downloads:
 91 | 
 92 | * `Main repository`_ on Github;
 93 | * Documentation_ for the current release;
 94 | * Download the `current trunk`_ as a tar/zip file;
 95 | * Downloads of all `available releases`_.
 96 | 
 97 | The latest released version is always available from `pypi
 98 | <https://pypi.python.org/pypi/datarray>`_.
 99 | 
100 | *******
101 | Support
102 | *******
103 | 
104 | Please put up issues on the `datarray issue tracker
105 | <https://github.com/bids/datarray/issues>`_.
106 | 
107 | .. _main repository: http://github.com/bids/datarray
108 | .. _Documentation: http://bids.github.com/datarray
109 | .. _current trunk: http://github.com/bids/datarray/archives/master
110 | .. _available releases: http://github.com/bids/datarray/releases
111 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # vim ft=yaml
 2 | # CI on Windows via appveyor
 3 | # Largely from:
 4 | # https://github.com/ogrisel/python-appveyor-demo/blob/master/appveyor.yml
 5 | 
 6 | environment:
 7 | 
 8 |     matrix:
 9 | 
10 |     - PYTHON: "C:\\Python27"
11 |       PYTHON_VERSION: "2.7.x" # currently 2.7.10
12 |       PYTHON_ARCH: "32"
13 | 
14 |     - PYTHON: "C:\\Python27-x64"
15 |       PYTHON_VERSION: "2.7.x" # currently 2.7.10
16 |       PYTHON_ARCH: "64"
17 | 
18 |     - PYTHON: "C:\\Python34"
19 |       PYTHON_VERSION: "3.4.x" # currently 3.4.3
20 |       PYTHON_ARCH: "32"
21 | 
22 |     - PYTHON: "C:\\Python34-x64"
23 |       PYTHON_VERSION: "3.4.x" # currently 3.4.3
24 |       PYTHON_ARCH: "64"
25 | 
26 |     - PYTHON: "C:\\Python35"
27 |       PYTHON_VERSION: "3.5.x" # currently 3.5.0
28 |       PYTHON_ARCH: "32"
29 | 
30 |     - PYTHON: "C:\\Python35-x64"
31 |       PYTHON_VERSION: "3.5.x" # currently 3.5.0
32 |       PYTHON_ARCH: "64"
33 | 
34 | install:
35 |   - cmd: echo "Using cmd"
36 |   # Prepend newly installed Python to the PATH of this build (this cannot be
37 |   # done from inside the powershell script as it would require to restart
38 |   # the parent CMD process).
39 |   - SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%
40 |   # Check that we have the expected version and architecture for Python
41 |   - python --version
42 |   - python -c "import struct; print(struct.calcsize('P') * 8)"
43 |   # Upgrade pip
44 |   - python -m pip install --upgrade pip
45 | 
46 | build_script:
47 |   # Install with dependencies
48 |   - pip install nose numpy
49 |   - pip install .
50 | 
51 | test_script:
52 |   # Run the project tests
53 |   - mkdir tmp_for_test
54 |   - cd tmp_for_test
55 |   - nosetests datarray
56 |   - cd ..
57 | 


--------------------------------------------------------------------------------
/datarray/LICENSE:
--------------------------------------------------------------------------------
  1 | =======
  2 | License
  3 | =======
  4 | 
  5 | The ``datarray`` package is distributed under a Simplified BSD license. Parts
  6 | of NumPy, larry and numpydoc, which all have BSD licenses, are included in
  7 | datarray.
  8 | 
  9 | datarray license
 10 | ----------------
 11 | 
 12 | Copyright (c) 2009-2016, NumPy Developers.
 13 | All rights reserved.
 14 | 
 15 | Redistribution and use in source and binary forms, with or without
 16 | modification, are permitted provided that the following conditions are
 17 | met:
 18 | 
 19 |     * Redistributions of source code must retain the above copyright
 20 |       notice, this list of conditions and the following disclaimer.
 21 | 
 22 |     * Redistributions in binary form must reproduce the above
 23 |       copyright notice, this list of conditions and the following
 24 |       disclaimer in the documentation and/or other materials provided
 25 |       with the distribution.
 26 | 
 27 |     * Neither the name of the NumPy Developers nor the names of any
 28 |       contributors may be used to endorse or promote products derived
 29 |       from this software without specific prior written permission.
 30 | 
 31 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 32 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 33 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 34 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 35 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 36 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 37 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 38 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 39 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 40 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 41 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 42 | 
 43 | 
 44 | la license
 45 | ----------
 46 | 
 47 | Copyright (c) 2008, 2009, 2010, Archipel Asset Management AB.
 48 | All rights reserved.
 49 | 
 50 | Redistribution and use in source and binary forms, with or without
 51 | modification, are permitted provided that the following conditions are met:
 52 | 
 53 |     * Redistributions of source code must retain the above copyright notice,
 54 |       this list of conditions and the following disclaimer.
 55 | 
 56 |     * Redistributions in binary form must reproduce the above copyright
 57 |       notice, this list of conditions and the following disclaimer in the
 58 |       documentation and/or other materials provided with the distribution.
 59 | 
 60 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 61 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 62 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 63 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 64 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 65 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 66 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 67 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 68 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 69 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 70 | POSSIBILITY OF SUCH DAMAGE.
 71 | 
 72 | 
 73 | NumPy license
 74 | -------------
 75 | 
 76 | Copyright (c) 2005-2009, NumPy Developers.
 77 | All rights reserved.
 78 | 
 79 | Redistribution and use in source and binary forms, with or without
 80 | modification, are permitted provided that the following conditions are
 81 | met:
 82 | 
 83 |     * Redistributions of source code must retain the above copyright
 84 |       notice, this list of conditions and the following disclaimer.
 85 | 
 86 |     * Redistributions in binary form must reproduce the above
 87 |       copyright notice, this list of conditions and the following
 88 |       disclaimer in the documentation and/or other materials provided
 89 |       with the distribution.
 90 | 
 91 |     * Neither the name of the NumPy Developers nor the names of any
 92 |       contributors may be used to endorse or promote products derived
 93 |       from this software without specific prior written permission.
 94 | 
 95 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 96 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 97 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 98 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 99 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
100 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
101 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
102 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
103 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
104 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
105 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
106 | 


--------------------------------------------------------------------------------
/datarray/__init__.py:
--------------------------------------------------------------------------------
 1 | """Arrays with rich geometric semantics.
 2 | """
 3 | #-----------------------------------------------------------------------------
 4 | # Imports
 5 | #-----------------------------------------------------------------------------
 6 | # Stdlib
 7 | from __future__ import print_function
 8 | 
 9 | import distutils.version as v
10 | 
11 | # Third-party
12 | import numpy as np
13 | # datarray uses the __array_prepare__ method introduced in numpy 1.4.0
14 | if v.LooseVersion(np.__version__) < v.LooseVersion('1.4'):
15 |     raise ImportError('Numpy version >= 1.4 is required to use datarray')
16 | 
17 | # Our own
18 | try:
19 |     from .testing.testlib import test
20 | except ImportError:
21 |     print("No datarray unit testing available.")
22 |     
23 | from .version import __version__
24 | from .datarray import DataArray
25 | 


--------------------------------------------------------------------------------
/datarray/print_grid.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for pretty-printing tabular data, such as a DataArray, as a grid.
  3 | """
  4 | import sys
  5 | if sys.version_info[0] < 3:
  6 |     range = xrange
  7 | 
  8 | import numpy as np
  9 | 
 10 | class GridDataFormatter(object):
 11 |     """
 12 |     A GridDataFormatter takes an ndarray of objects and represents them as
 13 |     equal-length strings. It is flexible about what string length to use,
 14 |     and can make suggestions about the string length based on the data it
 15 |     will be asked to render.
 16 | 
 17 |     Each GridDataFormatter instance specifies:
 18 | 
 19 |     - `min_width`, the smallest acceptable width
 20 |     - `standard_width`, a reasonable width when putting many items on the
 21 |       screen
 22 |     - `max_width`, the width it prefers if space is not limited
 23 | 
 24 |     This top-level class specifies reasonable defaults for a formatter, and
 25 |     subclasses refine it for particular data types.
 26 |     """
 27 |     def __init__(self, data=None):
 28 |         self.data = data
 29 | 
 30 |     def min_width(self):
 31 |         return 1
 32 |     
 33 |     def standard_width(self):
 34 |         return min(9, self.max_width)
 35 | 
 36 |     def max_width(self):
 37 |         if self.data is None:
 38 |             # no information, so just use all the space we're given
 39 |             return 100
 40 |         return max([len(str(val)) for val in self.data.flat])
 41 | 
 42 |     def format(self, value, width=None):
 43 |         """
 44 |         Formats a given value to a fixed width.
 45 |         """
 46 |         if width is None: width = self.standard_width()
 47 |         return '{0:<{width}}'.format(value, width=width)[:width]
 48 |     
 49 |     def format_all(self, values, width=None):
 50 |         """
 51 |         Formats an array of values to a fixed width, returning a string array.
 52 |         """
 53 |         if width is None: width = self.standard_width()
 54 |         out = np.array([self.format(value, width) for value in values.flat])
 55 |         return out.reshape(values.shape)
 56 | 
 57 | class FloatFormatter(GridDataFormatter):
 58 |     """
 59 |     Formats floating point numbers either in standard or exponential notation,
 60 |     whichever fits better and represents the numbers better in the given amount
 61 |     of space.
 62 |     """
 63 |     def __init__(self, data, sign=False, strip_zeros=True):
 64 |         GridDataFormatter.__init__(self, data)
 65 |         flat = data.flatten()
 66 |         absolute = np.abs(flat.compress((flat != 0) & ~np.isnan(flat) & ~np.isinf(flat)))
 67 |         if sign: self.sign = '+'
 68 |         else: self.sign = ' '
 69 |         self.strip_zeros = strip_zeros
 70 |         if len(absolute):
 71 |             self.max_val = np.max(absolute)
 72 |             self.min_val = np.min(absolute)
 73 |             self.leading_digits = max(1, int(np.log10(self.max_val)) + 1)
 74 |             self.leading_zeros = max(0, int(np.ceil(-np.log10(self.min_val))))
 75 |         else:
 76 |             self.max_val = self.min_val = 0
 77 |             self.leading_digits = 1
 78 |             self.leading_zeros = 0
 79 |         self.large_exponent = (self.leading_digits >= 101) or (self.leading_zeros >= 100)
 80 | 
 81 |     def min_width(self):
 82 |         return min(self._min_width_standard(), self._min_width_exponential())
 83 | 
 84 |     def _min_width_standard(self):
 85 |         # 1 character for sign
 86 |         # enough room for all the leading digits
 87 |         # 1 character for decimal point
 88 |         # enough room for all the leading zeros
 89 |         # 1 more digit
 90 |         return self.leading_digits + self.leading_zeros + 3
 91 | 
 92 |     def _min_width_exponential(self):
 93 |         # enough room for -3.1e+nn or -3.1e+nnn
 94 |         return self.large_exponent + 8
 95 | 
 96 |     def standard_width(self):
 97 |         return self.min_width() + 2
 98 | 
 99 |     def max_width(self):
100 |         return min(self.leading_digits + 8, 16)
101 | 
102 |     def format(self, value, width=None):
103 |         if width is None: width = self.standard_width()
104 |         if self._use_exponential_format(width):
105 |             return self._format_exponential(value, width)
106 |         else:
107 |             return self._format_standard(value, width)
108 | 
109 |     def _format_exponential(self, value, width):
110 |         precision = max(1, width - 7 - self.large_exponent)
111 |         return '{0:<{sign}{width}.{precision}e}'.format(value,
112 |                                                         width=width,
113 |                                                         sign=self.sign,
114 |                                                         precision=precision)
115 | 
116 |     def _format_standard(self, value, width):
117 |         precision = max(1, width - 2 - self.leading_digits)
118 |         result = '{0:>{sign}{width}.{precision}f}'.format(value, width=width,
119 |                                                           sign=self.sign,
120 |                                                           precision=precision)
121 |         if self.strip_zeros:
122 |             return '{0:<{width}}'.format(result.rstrip('0'), width=width)
123 |         else: return result
124 |     
125 |     def _use_exponential_format(self, width):
126 |         """
127 |         The FloatFormatter will use exponential format if the standard format
128 |         cannot accurately represent all the numbers in the given width.
129 | 
130 |         This criterion favors standard format more than NumPy's arrayprint.
131 |         """
132 |         return (width < self._min_width_standard())
133 | 
134 |     def format_all(self, values, width=None):
135 |         """
136 |         Formats an array of values to a fixed width, returning a string array.
137 |         """
138 |         if width is None: width = self.standard_width()
139 |         if self._use_exponential_format(width):
140 |             formatter = self._format_exponential
141 |         else:
142 |             formatter = self._format_standard
143 | 
144 |         out = np.array([formatter(value, width) for value in values.flat])
145 |         return out.reshape(values.shape)
146 | 
147 | class IntFormatter(FloatFormatter):
148 |     """
149 |     The IntFormatter tries to just print all the digits of the ints, but falls
150 |     back on being an exponential FloatFormatter if there isn't room.
151 |     """
152 |     def _min_width_standard(self):
153 |         return self.leading_digits + 1
154 |     
155 |     def standard_width(self):
156 |         return self._min_width_standard()
157 | 
158 |     def _format_standard(self, value, width):
159 |         return '{0:>{sign}{width}d}'.format(value, width=width, sign=self.sign)
160 | 
161 | class BoolFormatter(GridDataFormatter):
162 |     """
163 |     The BoolFormatter prints 'True' and 'False' if there is room, and
164 |     otherwise prints 'T' and '-' ('T' and 'F' are too visually similar).
165 |     """
166 |     def standard_width(self):
167 |         return 5
168 | 
169 |     def max_width(self):
170 |         return 5
171 | 
172 |     def format(self, value, width=5):
173 |         if width < 5:
174 |             if value: return 'T'
175 |             else: return '-'
176 |         else:
177 |             if value: return ' True'
178 |             else: return 'False'
179 | 
180 | class StrFormatter(GridDataFormatter):
181 |     """
182 |     A StrFormatter's behavior is almost entirely defined by the default.
183 |     When it must truncate strings, it insists on showing at least 3
184 |     characters.
185 |     """
186 |     def min_width(self):
187 |         return min(3, self.max_width())
188 | 
189 | class ComplexFormatter(GridDataFormatter):
190 |     """
191 |     A ComplexFormatter uses two FloatFormatters side by side. This can make
192 |     its min_width fairly large.
193 |     """
194 |     def __init__(self, data):
195 |         GridDataFormatter.__init__(self, data)
196 |         self.real_format = FloatFormatter(data, strip_zeros=False)
197 |         self.imag_format = FloatFormatter(data, strip_zeros=False, 
198 |                                           sign=True)
199 | 
200 |     def min_width(self):
201 |         return max(self.real_format.min_width(),
202 |                    self.imag_format.min_width())*2 + 1
203 | 
204 |     def standard_width(self):
205 |         return max(self.real_format.standard_width(),
206 |                    self.imag_format.standard_width())*2 + 1
207 | 
208 |     def max_width(self):
209 |         return max(self.real_format.max_width(),
210 |                    self.imag_format.max_width())*2
211 |     
212 |     def format(self, value, width=None):
213 |         #TODO: optimize
214 |         if width is None: width = self.standard_width()
215 |         part_width = (width-1)//2
216 |         real_part = self.real_format.format(value.real, part_width)
217 |         imag_part = self.imag_format.format(value.imag, part_width)
218 |         result = '{0}{1}j'.format(real_part, imag_part)
219 |         return '{0:<{width}}'.format(result, width=width)
220 | 
221 | 
222 | # Formatters for numpy dtype kinds
223 | _KIND2FORMAT = dict(b = BoolFormatter,
224 |                     u = IntFormatter,
225 |                     i = IntFormatter,
226 |                     f = FloatFormatter,
227 |                     c = ComplexFormatter)
228 | 
229 | 
230 | def get_formatter(arr):
231 |     """
232 |     Get a formatter for this array's data type, and prime it on this array.
233 |     """
234 |     return _KIND2FORMAT.get(arr.dtype.kind, StrFormatter)(arr)
235 | 
236 | 
237 | def grid_layout(arr, width=75, height=10):
238 |     """
239 |     Given a 2-D non-empty array, turn it into a list of lists of strings to be
240 |     joined.
241 | 
242 |     This uses plain lists instead of a string array, because certain
243 |     formatting tricks might want to join columns, resulting in a ragged-
244 |     shaped array.
245 |     """
246 |     # get the maximum possible amount we'd be able to display
247 |     array_sample = arr[:height, :width//2]
248 |     formatter = get_formatter(arr)
249 |     
250 |     # first choice: show the whole array at full width
251 |     cell_width = formatter.max_width()
252 |     columns_shown = arr.shape[1]
253 |     column_ellipsis = False
254 | 
255 |     if (cell_width+1) * columns_shown > width+1:
256 |         # second choice: show the whole array at at least standard width
257 |         standard_width = formatter.standard_width()
258 |         cell_width = (width+1) // (columns_shown) - 1
259 |         if cell_width < standard_width:
260 |             # third choice: show at least 5 columns at standard width
261 |             column_ellipsis = True
262 |             cell_width = standard_width
263 |             columns_shown = (width-3) // (cell_width+1)
264 |             if columns_shown < 5:
265 |                 # fourth choice: as many columns as possible at minimum width
266 |                 cell_width = formatter.min_width()
267 |                 columns_shown = max(1, (width-3) // (cell_width+1))
268 |     cells_shown = arr[:height, :columns_shown]
269 |     layout = formatter.format_all(cells_shown, cell_width)
270 |     
271 |     ungrid = [list(row) for row in layout]
272 |     
273 |     if column_ellipsis:
274 |         ungrid[0].append('...')
275 | 
276 |     if height < arr.shape[0]: # row ellipsis
277 |         ungrid.append(['...'])
278 |     
279 |     return ungrid, cells_shown
280 | 
281 | def labeled_layout(arr, width=75, height=10, row_label_width=9):
282 |     """
283 |     Given a 2-D non-empty array that may have labeled axes, rows, or columns,
284 |     render the array as strings to be joined and attach the axes in visually
285 |     appropriate places.
286 | 
287 |     Returns a list of lists of strings to be joined.
288 |     """
289 |     inner_width, inner_height = width, height
290 |     if arr.axes[0].labels:
291 |         inner_width = width - row_label_width-1
292 |     if arr.axes[1].labels:
293 |         inner_height -= 1
294 |     row_header = (arr.axes[0].labels and arr.axes[0].name)
295 |     col_header = (arr.axes[1].labels and arr.axes[1].name)
296 |     if row_header or col_header:
297 |         inner_height -= 2
298 | 
299 |     layout, cells_shown = grid_layout(arr, inner_width, inner_height)
300 |     cell_width = len(layout[0][0])
301 |     label_formatter = StrFormatter()
302 |     
303 |     if arr.axes[1].labels:
304 |         # use one character less than available, to make axes more visually
305 |         # separate
306 | 
307 |         col_label_layout = [label_formatter.format(str(name)[:cell_width-1],
308 |                              cell_width) for name in cells_shown.axes[1].labels]
309 |         layout = [col_label_layout] + layout
310 | 
311 |     if arr.axes[0].labels:
312 |         layout = [[' '*row_label_width] + row for row in layout]
313 |         labels = cells_shown.axes[0].labels
314 |         offset = 0
315 |         if arr.axes[1].labels: offset = 1
316 |         for r in range(cells_shown.shape[0]):
317 |             layout[r+offset][0] = label_formatter.format(str(labels[r]), row_label_width)
318 |     
319 |     if row_header or col_header:
320 |         header0 = []
321 |         header1 = []
322 |         if row_header:
323 |             header0.append(label_formatter.format(row_header, row_label_width))
324 |             header1.append('-' * row_label_width)
325 |         elif arr.axes[0].labels:
326 |             header0.append(' ' * row_label_width)
327 |             header1.append(' ' * row_label_width)
328 |         if col_header:
329 |             # We can use all remaining columns. How wide are they?
330 |             offset = 0
331 |             if arr.axes[0].labels: offset = 1
332 |             merged_width = len(' '.join(layout[0][offset:]))
333 |             header0.append(label_formatter.format(col_header, merged_width))
334 |             header1.append('-' * merged_width)
335 |         layout = [header0, header1] + layout
336 | 
337 |     return layout
338 | 
339 | def layout_to_string(layout):
340 |     return '\n'.join([' '.join(row) for row in layout])
341 | 
342 | def array_to_string(arr, width=75, height=10):
343 |     """
344 |     Get a 2-D text representation of a NumPy array.
345 |     """
346 |     assert arr.ndim <= 2
347 |     while arr.ndim < 2:
348 |         arr = arr[np.newaxis, ...]
349 |     return layout_to_string(grid_layout(arr, width, height))
350 | 
351 | def datarray_to_string(arr, width=75, height=10):
352 |     """
353 |     Get a 2-D text representation of a datarray.
354 |     """
355 |     assert arr.ndim <= 2
356 |     while arr.ndim < 2:
357 |         arr = arr[np.newaxis, ...]
358 |     return layout_to_string(labeled_layout(arr, width, height))
359 | 
360 | 


--------------------------------------------------------------------------------
/datarray/testing/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .utils import *
3 | 


--------------------------------------------------------------------------------
/datarray/testing/testlib.py:
--------------------------------------------------------------------------------
 1 | """Module defining the main test entry point exposed at the top level.
 2 | """
 3 | #-----------------------------------------------------------------------------
 4 | # Imports
 5 | #-----------------------------------------------------------------------------
 6 | 
 7 | # Stdlib
 8 | import sys
 9 | 
10 | # Third-party
11 | import nose
12 | import nose.plugins.builtin
13 | from nose.core import TestProgram
14 | 
15 | #-----------------------------------------------------------------------------
16 | # Functions and classes
17 | #-----------------------------------------------------------------------------
18 | 
19 | def test(doctests=True, extra_argv=None, **kw):
20 |     """Run the nitime test suite using nose.
21 | 
22 |     Parameters
23 |     ----------
24 |     doctests : bool, optional  (default True)
25 |       If true, also run the doctests in all docstrings.
26 | 
27 |     kw : dict
28 |       Any other keywords are passed directly to nose.TestProgram(), which
29 |       itself is a subclass of unittest.TestProgram().
30 |     """
31 |     # We construct our own argv manually, so we must set argv[0] ourselves
32 |     argv = [ 'nosetests',
33 |              # Name the package to actually test, in this case nitime
34 |              'datarray',
35 |              
36 |              # extra info in tracebacks
37 |              '--detailed-errors',
38 | 
39 |              # We add --exe because of setuptools' imbecility (it blindly does
40 |              # chmod +x on ALL files).  Nose does the right thing and it tries
41 |              # to avoid executables, setuptools unfortunately forces our hand
42 |              # here.  This has been discussed on the distutils list and the
43 |              # setuptools devs refuse to fix this problem!
44 |              '--exe',
45 |              ]
46 | 
47 |     if doctests:
48 |         argv.append('--with-doctest')
49 | 
50 |     if extra_argv is not None:
51 |         argv.extend(extra_argv)
52 | 
53 |     # Now nose can run
54 |     TestProgram(argv=argv, exit=False, **kw)
55 | 
56 | 
57 | # Tell nose that the test() function itself isn't a test, otherwise we get a
58 | # recursive loop inside nose.
59 | test.__test__ = False
60 | 


--------------------------------------------------------------------------------
/datarray/testing/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | "Tests of datarray unit test utilities"
 2 | 
 3 | import numpy as np
 4 | from numpy.testing import assert_raises
 5 | 
 6 | from datarray.datarray import DataArray
 7 | from datarray.testing.utils import assert_datarray_equal
 8 | 
 9 | def test_assert_datarray_equal():
10 |     # Test assert_datarray_equal
11 |     
12 |     x = DataArray([1, 2])
13 |     y = DataArray([1, 2])
14 |     assert_datarray_equal(x, y, "Should not raise assertion")
15 |     y = DataArray([1, 3])
16 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
17 |     y = DataArray([1, 2, 3])
18 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
19 |     y = DataArray([1, 2], 'a')
20 |     assert_raises(AssertionError, assert_datarray_equal, x, y)    
21 |     y = DataArray([1, 2], [('a', ['a', 'b'])])
22 |     assert_raises(AssertionError, assert_datarray_equal, x, y)    
23 |     
24 |     x = DataArray([1, 2], 'a')
25 |     y = DataArray([1, 2], 'a')
26 |     assert_datarray_equal(x, y, "Should not raise assertion")
27 |     y = DataArray([1, 2], 'b')       
28 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
29 |     y = DataArray([1, 2], [('b', ['a', 'b'])])       
30 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
31 |         
32 |     x = DataArray([1, 2], 'a')    
33 |     y = DataArray([1, 2], [('a', None)])       
34 |     assert_datarray_equal(x, y, "Should not raise assertion")
35 |     
36 |     x = DataArray([[1, 2], [3, 4]], [('ax1', ['a', 'b']), ('ax2', ['a', 'b'])])
37 |     y = DataArray([[1, 2], [3, 4]], [('ax1', ['a', 'b']), ('ax2', ['a', 'b'])])
38 |     assert_datarray_equal(x, y, "Should not raise assertion")
39 |     y = DataArray([[1, 2], [3, 4]], [('ax1', ['X', 'b']), ('ax2', ['a', 'b'])])
40 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
41 |     y = DataArray([[1, 2], [3, 4]], [('ax1', ['a', 'b']), ('ax2', None)])    
42 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
43 |     y = DataArray([[9, 2], [3, 4]], [('ax1', ['a', 'b']), ('ax2', ['a', 'b'])])        
44 |     assert_raises(AssertionError, assert_datarray_equal, x, y)    
45 |     
46 |     x = DataArray([1, np.nan])
47 |     y = DataArray([1, np.nan])
48 |     assert_datarray_equal(x, y, "Should not raise assertion")
49 |     
50 |     x = DataArray([1, 2], 'a')
51 |     y = 1      
52 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
53 |     y = np.array([1, 2])
54 |     assert_raises(AssertionError, assert_datarray_equal, x, y)          
55 |     
56 |     x = 1
57 |     y = 2
58 |     assert_raises(AssertionError, assert_datarray_equal, x, y)
59 |     x = np.array([1])
60 |     y = np.array([2])
61 |     assert_raises(AssertionError, assert_datarray_equal, x, y)        
62 | 


--------------------------------------------------------------------------------
/datarray/testing/utils.py:
--------------------------------------------------------------------------------
  1 | """datarray unit testing utilities"""
  2 | #-----------------------------------------------------------------------------
  3 | # Imports
  4 | #-----------------------------------------------------------------------------
  5 | 
  6 | # Third-party
  7 | import numpy as np
  8 | from numpy.testing import assert_, assert_equal, assert_array_equal
  9 | 
 10 | # Our own
 11 | from datarray.datarray import DataArray
 12 | 
 13 | __all__ = ['assert_datarray_equal']
 14 | 
 15 | #-----------------------------------------------------------------------------
 16 | # Functions and classes
 17 | #-----------------------------------------------------------------------------
 18 | 
 19 | def assert_datarray_equal(x, y, err_msg='', verbose=True):
 20 |     """
 21 |     Raise an AssertionError if two datarrays are not equal.
 22 | 
 23 |     Given two datarrays, assert that the shapes are equal, axes are equal, and
 24 |     all elements of the datarrays are equal. Given two scalars assert equality.
 25 |     In contrast to the standard usage in numpy, NaNs are compared like numbers,
 26 |     no assertion is raised if both objects have NaNs in the same positions.
 27 | 
 28 |     The usual caution for verifying equality with floating point numbers is
 29 |     advised.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     x : {datarray, scalar}
 34 |         If you are testing a datarray method, for example, then this is the
 35 |         datarray (or scalar) returned by the method.   
 36 |     y : {datarray, scalar}
 37 |         This datarray represents the expected result. If `x` is not equal to
 38 |         `y`, then an AssertionError is raised.
 39 |     err_msg : str
 40 |         If `x` is not equal to `y`, then the string `err_msg` will be added to
 41 |         the top of the AssertionError message.
 42 |     verbose : bool
 43 |         If True, the conflicting values are appended to the error message.
 44 | 
 45 |     Returns
 46 |     -------
 47 |     None
 48 | 
 49 |     Raises
 50 |     ------
 51 |     AssertionError
 52 |         If actual and desired datarrays are not equal.
 53 | 
 54 |     Examples
 55 |     --------
 56 |     If the two datarrays are equal then None is returned:
 57 | 
 58 |     >>> from datarray.testing import assert_datarray_equal
 59 |     >>> from datarray.datarray import DataArray
 60 |     >>> x = DataArray([1, 2])
 61 |     >>> y = DataArray([1, 2])
 62 |     >>> assert_datarray_equal(x, y)
 63 | 
 64 |     If the two datarrays are not equal then an AssertionError is raised:
 65 | 
 66 |     >>> x = DataArray([1, 2], ('time',))
 67 |     >>> y = DataArray([1, 2], ('distance',))
 68 |     >>> assert_datarray_equal(x, y)
 69 |     Traceback (most recent call last):
 70 |       File "<stdin>", line 1, in <module>
 71 |       File "datarray/testing/utils.py", line 133, in assert_datarray_equal
 72 |         raise AssertionError, err_msg
 73 |     AssertionError:
 74 |     <BLANKLINE>
 75 |         ----------
 76 |         AXIS NAMES
 77 |         ----------
 78 |     <BLANKLINE>
 79 |         Items are not equal:
 80 |             item=0
 81 |     <BLANKLINE>
 82 |          ACTUAL: 'time'
 83 |          DESIRED: 'distance'
 84 |     <BLANKLINE>
 85 |     """
 86 |     # Initialize
 87 |     fail = []        
 88 |             
 89 |     # Function to make section headings
 90 |     def heading(text):
 91 |         line = '-' * len(text)
 92 |         return '\n\n' + line + '\n' + text + '\n' + line + '\n'
 93 |     
 94 |     # The assert depends on the type of x and y
 95 |     if np.isscalar(x) and np.isscalar(y):
 96 |     
 97 |         # Both x and y are scalars        
 98 |         try:
 99 |             assert_equal(x, y)
100 |         except AssertionError as err:
101 |             fail.append(heading('SCALARS') + str(err))
102 |             
103 |     elif (type(x) is np.ndarray) and (type(y) is np.ndarray):
104 |     
105 |         # Both x and y are scalars       
106 |         try:
107 |             assert_array_equal(x, y)
108 |         except AssertionError as err:
109 |             fail.append(heading('ARRAYS') + str(err))            
110 |                 
111 |     elif (type(x) == DataArray) + (type(y) == DataArray) == 1:
112 |     
113 |         # Only one of x and y are datarrays; test failed
114 |         try: 
115 |             assert_equal(type(x), type(y))
116 |         except AssertionError as err:
117 |             fail.append(heading('TYPE') + str(err))
118 |                                                    
119 |     else:
120 |         
121 |         # Both x and y are datarrays
122 |     
123 |         # shape
124 |         try:         
125 |             assert_equal(x.shape, y.shape)
126 |         except AssertionError as err:
127 |             fail.append(heading('SHAPE') + str(err))       
128 | 
129 |         # axis names
130 |         try:         
131 |             assert_equal(x.names, y.names)
132 |         except AssertionError as err:
133 |             fail.append(heading('AXIS NAMES') + str(err))
134 |             
135 |         # labels
136 |         for ax in range(x.ndim):
137 |             try:
138 |                 assert_equal(x.axes[ax].labels, y.axes[ax].labels)
139 |             except AssertionError as err:
140 |                 fail.append(heading('LABELS ALONG AXIS = %d' % ax) + str(err))                         
141 | 
142 |         # axes
143 |         for ax in range(x.ndim):
144 |             try:         
145 |                 assert_(x.axes[ax], y.axes[ax])
146 |             except AssertionError as err:
147 |                 fail.append(heading('AXIS OBJECT ALONG AXIS = %d' % ax) + str(err))
148 |                 fail.append('x: ' + str(x.axes[ax]))
149 |                 fail.append('y: ' + str(y.axes[ax]))
150 |                 
151 |         # data
152 |         try:         
153 |             assert_array_equal(x.base, y.base)
154 |         except AssertionError as err:
155 |             fail.append(heading('ARRAY') + str(err))                
156 |     
157 |     # Did the test pass?    
158 |     if len(fail) > 0:
159 |         # No
160 |         if verbose:
161 |             err_msgs = ''.join(fail)
162 |             err_msgs = err_msgs.replace('\n', '\n\t')
163 |             if len(err_msg):
164 |                 err_msg = heading("TEST: " + err_msg) + err_msgs
165 |             else:
166 |                 err_msg = err_msgs           
167 |             raise AssertionError(err_msg)
168 |         else:
169 |             raise AssertionError                    
170 |         
171 | 


--------------------------------------------------------------------------------
/datarray/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDS/datarray/1d53044a838874609b824ad6eeeb1b4e819f417b/datarray/tests/__init__.py


--------------------------------------------------------------------------------
/datarray/tests/test_bugfixes.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from datarray.datarray import Axis, DataArray, NamedAxisError, \
  4 |     _pull_axis, _reordered_axes
  5 | 
  6 | from datarray.testing.utils import assert_datarray_equal
  7 | import datarray.print_grid as print_grid
  8 | 
  9 | import nose.tools as nt
 10 | import numpy.testing as npt
 11 | 
 12 | def test_full_reduction():
 13 |     # issue #2
 14 |     nt.assert_equal(DataArray([1, 2, 3]).sum(axis=0),6)
 15 | 
 16 | def test_bug3():
 17 |     "Bug 3"
 18 |     x = np.array([1,2,3])
 19 |     y = DataArray(x, 'x')
 20 |     nt.assert_equal( x.sum(), y.sum() )
 21 |     nt.assert_equal( x.max(), y.max() )
 22 | 
 23 | def test_bug5():
 24 |     "Bug 5: Support 0d arrays"
 25 |     A = DataArray(10)
 26 |     # Empty tuples evaluate to false
 27 |     nt.assert_false(tuple(A.axes))
 28 |     nt.assert_equal(len(A.axes), 0)
 29 |     nt.assert_raises(IndexError, lambda: A.axes[0])
 30 |     nt.assert_false(A.names)
 31 | 
 32 | def test_1d_label_indexing():
 33 |     # issue #18
 34 |     cap_ax_spec = 'capitals', ['washington', 'london', 'berlin', 'paris', 'moscow']
 35 |     caps = DataArray(np.arange(5),[cap_ax_spec])
 36 |     caps.axes.capitals["washington"]
 37 | 
 38 | def test_bug22():
 39 |     "Bug 22: DataArray does not accepting array as ticks"
 40 |     A = DataArray([1, 2], [('time', ['a', 'b'])])
 41 |     B = DataArray([1, 2], [('time', np.array(['a', 'b']))])
 42 |     assert_datarray_equal(A, B)
 43 | 
 44 | def test_bug26():
 45 |     "Bug 26: check that axes names are computed on demand."
 46 |     a = DataArray([1,2,3])
 47 |     nt.assert_true(a.axes[0].name is None)
 48 |     a.axes[0].name = "a"
 49 |     nt.assert_equal(a.axes[0].name, "a")
 50 | 
 51 | def test_bug34():
 52 |     "Bug 34: datetime.date ticks not handled by datarray_to_string"
 53 |     from datarray.print_grid import datarray_to_string
 54 |     from datetime import date as D
 55 |     A = DataArray([[1,2],[3,4]], [('row', ('a', D(2010,1,1))),('col', 'cd')])
 56 |     exp_out = """row       col                
 57 | --------- -------------------
 58 |           c         d        
 59 | a                 1         2
 60 | 2010-01-0         3         4"""
 61 |     nt.assert_equal(datarray_to_string(A), exp_out)
 62 |     # Output for unsigned integers
 63 |     B = A.astype(np.uint32)
 64 |     nt.assert_equal(datarray_to_string(B), exp_out)
 65 | 
 66 | 
 67 | def test_bug35():
 68 |     "Bug 35"
 69 |     txt_array = DataArray(['a','b'], axes=['dummy'])
 70 |     #calling datarray_to_string on string arrays used to fail
 71 |     print_grid.datarray_to_string(txt_array)
 72 |     #because get_formatter returned the class not an instance
 73 |     assert isinstance(print_grid.get_formatter(txt_array),
 74 |                       print_grid.StrFormatter)
 75 | 
 76 | def test_bug38():
 77 |     "Bug 38: DataArray.__repr__ should parse as a single entity"
 78 |     # Calling repr() on an ndarray prepends array (instead of np.array)
 79 |     arys = (
 80 |         DataArray(np.random.randint(0, 10000, size=(1,2,3,4,5)), 'abcde'),
 81 |         DataArray(np.random.randint(0, 10000, size=(3,3,3))), # Try with missing axes
 82 |         DataArray(np.random.randint(0, 10000, (2,4,5,6)), # Try with ticks
 83 |             ('a', ('b', ('b1','b2','b3','b4')), 'c', 'd')),
 84 |         )
 85 |     # Load `array` into namespace for `eval`
 86 |     array = np.array
 87 |     for A in arys:
 88 |         assert_datarray_equal(A, eval(repr(A)))
 89 | 
 90 | def test_bug44():
 91 |     "Bug 44"
 92 |     # In instances where axis=None, the operation runs
 93 |     # on the flattened array. Here it makes sense to return
 94 |     # the op on the underlying np.ndarray.
 95 |     A = [[1,2,3],[4,5,6]]
 96 |     x = DataArray(A, 'xy').std()
 97 |     y = np.std(A)
 98 |     nt.assert_equal( x.sum(), y.sum() )
 99 | 
100 | 


--------------------------------------------------------------------------------
/datarray/tests/test_data_array.py:
--------------------------------------------------------------------------------
  1 | '''Tests for DataArray and friend'''
  2 | 
  3 | import sys
  4 | PY3 = sys.version_info[0] >= 3
  5 | 
  6 | import numpy as np
  7 | 
  8 | from datarray.datarray import (Axis, DataArray, NamedAxisError, AxesManager,
  9 |                                _pull_axis, _reordered_axes)
 10 | 
 11 | import nose.tools as nt
 12 | import numpy.testing as npt
 13 | 
 14 | DA = DataArray(np.random.randn(4, 2, 6), 'xyz')
 15 | YZ = AxesManager(DA, (Axis('y', 0, None), Axis('z', 1, None)))
 16 | XZ = AxesManager(DA, (Axis('x', 0, None), Axis('z', 1, None)))
 17 | XY = AxesManager(DA, (Axis('x', 0, None), Axis('y', 1, None)))
 18 | AXES_REMOVED = dict(x=YZ, y=XZ, z=XY)
 19 | 
 20 | 
 21 | def test_axis_equal():
 22 |     ax1 = Axis('aname', 0, None)
 23 |     ax2 = Axis('aname', 0, None)
 24 |     nt.assert_equal(ax1, ax2)
 25 |     # The array to which the axis points does not matter in comparison
 26 |     ax3 = Axis('aname', 0, np.arange(10))
 27 |     nt.assert_equal(ax1, ax3)
 28 |     # but the index does
 29 |     ax4 = Axis('aname', 1, None)
 30 |     nt.assert_not_equal(ax1, ax4)
 31 |     # so does the name
 32 |     ax5 = Axis('anothername', 0, None)
 33 |     nt.assert_not_equal(ax1, ax5)
 34 |     # and obviously both
 35 |     nt.assert_not_equal(ax4, ax5)
 36 |     # Try with labels
 37 |     ax6 = Axis('same', 0, None, labels=['a', 'b'])
 38 |     ax7 = Axis('same', 0, None, labels=['a', 'b'])
 39 |     nt.assert_equal(ax6, ax7)
 40 |     ax8 = Axis('same', 0, None, labels=['a', 'xx'])
 41 |     nt.assert_not_equal(ax6, ax8)
 42 | 
 43 | def test_bad_labels1():
 44 |     d = np.zeros(5)
 45 |     # bad labels length
 46 |     nt.assert_raises(ValueError, DataArray, d, axes=[('a', 'uvw')])
 47 | 
 48 | def test_bad_labels2():
 49 |     d = np.zeros(5)
 50 |     # uniqueness error
 51 |     nt.assert_raises(ValueError, DataArray, d, axes=[('a', ['u']*5)])
 52 | 
 53 | def test_bad_labels3():
 54 |     d = np.zeros(5)
 55 |     # type error
 56 |     nt.assert_raises(ValueError, DataArray, d, axes=[('a', [1, 1, 1, 1, 1])])
 57 |     
 58 | def test_basic():
 59 |     adata = [2,3]
 60 |     a = DataArray(adata, 'x', float)
 61 |     nt.assert_equal(a.names, ('x',))
 62 |     nt.assert_equal(a.dtype, np.dtype(float))
 63 |     b = DataArray([[1,2],[3,4],[5,6]], 'xy')
 64 |     nt.assert_equal(b.names, ('x','y'))
 65 |     # integer slicing
 66 |     b0 = b.axes.x[0]
 67 |     npt.assert_equal(b0, [1,2])
 68 |     # slice slicing
 69 |     b1 = b.axes.x[1:]
 70 |     npt.assert_equal(b1, [[3,4], [5,6]])
 71 | 
 72 | def test_bad_axes_axes():
 73 |     d = np.random.randn(3,2)
 74 |     nt.assert_raises(NamedAxisError, DataArray, d, axes='xx')
 75 | 
 76 | def test_combination():
 77 |     narr = DataArray(np.zeros((1,2,3)), axes=('a','b','c'))
 78 |     n3 = DataArray(np.ones((1,2,3)), axes=('x','b','c'))
 79 |     nt.assert_raises(NamedAxisError, np.add, narr, n3)
 80 |     # addition of scalar
 81 |     res = narr + 2
 82 |     nt.assert_true(isinstance(res, DataArray))
 83 |     nt.assert_equal(res.axes, narr.axes)
 84 |     # addition of matching size array, with matching names
 85 |     res = narr + narr
 86 |     nt.assert_equal(res.axes, narr.axes)
 87 | 
 88 | def test_label_change():
 89 |     a = DataArray([1,2,3])
 90 |     nt.assert_equal(a.names, (None,))
 91 |     a.axes[0].name = "test"
 92 |     nt.assert_equal(a.names, ("test",))
 93 | 
 94 | def test_1d():
 95 |     adata = [2,3]
 96 |     a = DataArray(adata, 'x', int)
 97 |     # Verify scalar extraction
 98 |     nt.assert_true(np.isscalar(a.axes.x[0]))
 99 |     nt.assert_equal(np.dtype(a.axes.x[0]), np.dtype(np.int))
100 |     # Verify indexing of axis
101 |     nt.assert_equals(a.axes.x.index, 0)
102 |     # Iteration checks
103 |     for i,val in enumerate(a.axes.x):
104 |         nt.assert_equals(val, adata[i])
105 |         nt.assert_true(np.isscalar(val))
106 |         nt.assert_equal(np.dtype(val), np.dtype(np.int))
107 | 
108 | def test_2d():
109 |     b = DataArray([[1,2],[3,4],[5,6]], 'xy')
110 |     nt.assert_equals(b.names, ('x', 'y'))
111 |     # Check row named slicing
112 |     rs = b.axes.x[0]
113 |     npt.assert_equal(rs, [1,2])
114 |     nt.assert_equal(rs.names, ('y',))
115 |     nt.assert_equal(tuple(rs.axes), (Axis('y', 0, rs),))
116 |     # Now, check that when slicing a row, we get the right names in the output
117 |     nt.assert_equal(b.axes.x[1:].names, ('x','y'))
118 |     # Check column named slicing
119 |     cs = b.axes.y[1]
120 |     npt.assert_equal(cs, [2, 4, 6])
121 |     nt.assert_equal(cs.names, ('x',))
122 |     nt.assert_equal(tuple(cs.axes), (Axis('x', 0, cs),))
123 |     # What happens if we do normal slicing?
124 |     rs = b[0]
125 |     npt.assert_equal(rs, [1, 2])
126 |     nt.assert_equal(rs.names, ('y',))
127 |     nt.assert_equal(tuple(rs.axes), (Axis('y', 0, rs),))
128 | 
129 | def test__pull_axis():
130 |     a = Axis('x', 0, None)
131 |     b = Axis('y', 1, None)
132 |     c = Axis('z', 2, None)
133 |     t_pos = Axis('y', 1, None)
134 |     t_neg = Axis('x', 5, None)
135 |     axes = [a, b, c]
136 |     nt.assert_true(t_pos in axes)
137 |     nt.assert_false(t_neg in axes)
138 |     nt.assert_equal(axes, _pull_axis(axes, t_neg))
139 |     nt.assert_equal(axes[:-1], _pull_axis(axes, c))
140 |     new_axes = [a, Axis('z', 1, None)]
141 |     nt.assert_equal(new_axes, _pull_axis(axes, t_pos))
142 | 
143 | def test__reordered_axes():
144 |     a = Axis('x', 0, None)
145 |     b = Axis('y', 1, None)
146 |     c = Axis('z', 2, None)
147 |     res = _reordered_axes([a,b,c], (1,2,0))
148 |     names_inds = [(ax.name, ax.index) for ax in res]
149 |     nt.assert_equal(set(names_inds), set([('y',0),('z',1),('x',2)]))
150 | 
151 | def test_axis_set_name():
152 |     a = DataArray(np.arange(20).reshape(2,5,2), 'xyz')
153 |     a.axes[0].set_name('u')
154 |     nt.assert_equal(a.axes[0].name, 'u', 'name change failed')
155 |     nt.assert_equal(a.axes.u, a.axes[0], 'name remapping failed')
156 |     nt.assert_equal(a.axes.u.index, 0, 'name remapping failed')
157 | 
158 | def test_array_set_name():
159 |     a = DataArray(np.arange(20).reshape(2,5,2), 'xyz')
160 |     a.set_name(0, 'u')
161 |     nt.assert_equal(a.axes[0].name, 'u', 'name change failed')
162 |     nt.assert_equal(a.axes.u, a.axes[0], 'name remapping failed')
163 |     nt.assert_equal(a.axes.u.index, 0, 'name remapping failed')
164 |     
165 | def test_axis_make_slice():
166 |     p_arr = np.random.randn(2,4,5)
167 |     ax_spec = 'capitals', ['washington', 'london', 'berlin', 'paris', 'moscow']
168 |     d_arr = DataArray(p_arr, [None, None, ax_spec])
169 |     a = d_arr.axes.capitals
170 |     sl = a.make_slice( slice('london', 'moscow')  )
171 |     should_be = ( slice(None), slice(None), slice(1,4) )
172 |     nt.assert_equal(should_be, sl, 'slicing tuple from labels not correct')
173 |     sl = a.make_slice( slice(1,4) )
174 |     nt.assert_equal(should_be, sl, 'slicing tuple from idx not correct')
175 | 
176 | # also test with the slicing syntax
177 | def test_labels_slicing():
178 |     p_arr = np.random.randn(2,4,5)
179 |     ax_spec = 'capitals', ['washington', 'london', 'berlin', 'paris', 'moscow']
180 |     d_arr = DataArray(p_arr, [None, None, ax_spec])
181 |     a = d_arr.axes.capitals
182 |     sub_arr = d_arr.axes.capitals['washington'::2]
183 |     nt.assert_equal(sub_arr.axes.capitals.labels,
184 |                     a.labels[0::2])
185 |     nt.assert_true((sub_arr == d_arr[:,:,0::2]).all())
186 | 
187 | # -- Tests for reshaping -----------------------------------------------------
188 | 
189 | def test_flatten_and_ravel():
190 |     "Test the functionality of ravel() and flatten() methods"
191 |     d = DataArray(np.arange(20).reshape(4,5), 'xy')
192 |     df = d.flatten()
193 |     nt.assert_true(type(df) is np.ndarray, 'Type error in flatten')
194 |     nt.assert_true(df.shape == (20,), 'Wrong shape in flatten')
195 |     df[:4] = 0
196 |     nt.assert_false((d[0,:4] == 0).all(), 'Copy not made in flatten')
197 | 
198 |     dr = d.ravel()
199 |     nt.assert_true(type(dr) is np.ndarray, 'Type error in ravel')
200 |     nt.assert_true(dr.shape == (20,), 'Wrong shape in ravel')
201 |     dr[:4] = 0
202 |     nt.assert_true((d[0,:4] == 0).all(), 'View not made in ravel')
203 | 
204 | def test_squeeze():
205 |     "Test squeeze method"
206 |     d = DataArray(np.random.randn(3,2,9), 'xyz')
207 |     d2 = d[None,:,None,:,:,None]
208 |     nt.assert_true(d2.shape == (1,3,1,2,9,1), 'newaxis slicing failed')
209 |     d3 = d.squeeze()
210 |     nt.assert_true(d3.shape == d.shape,
211 |                    'squeezing length-1 dimensions failed')
212 |     nt.assert_true(d3.names == d.names, 'Axes got lost in squeeze')
213 | 
214 | def test_reshape():
215 |     d = DataArray(np.random.randn(3,4,5), 'xyz')
216 |     new_shape = (1,3,1,4,5)
217 |     # Test padding the shape
218 |     d2 = d.reshape(new_shape)
219 |     new_labels = (None, 'x', None, 'y', 'z')
220 |     nt.assert_true(d2.names == new_labels,
221 |                    'Array with inserted dimensions has wrong labels')
222 |     nt.assert_true(d2.shape == new_shape, 'New shape wrong')
223 | 
224 |     # Test trimming the shape
225 |     d3 = d2.reshape(d.shape)
226 |     nt.assert_true(d3.names == d.names,
227 |                    'Array with removed dimensions has wrong labels')
228 |     nt.assert_true(d3.shape == d.shape, 'New shape wrong')
229 | 
230 |     # Test a combo of padding and trimming
231 |     d4 = d2.reshape(3,4,1,5,1)
232 |     new_labels = ('x', 'y', None, 'z', None)
233 |     nt.assert_true(
234 |         d4.names == new_labels,
235 |         'Array with inserted and removed dimensions has wrong labels')
236 |     nt.assert_true(d4.shape == (3, 4, 1, 5, 1), 'New shape wrong')
237 | 
238 | def test_reshape_corners():
239 |     "Test some corner cases for reshape"
240 |     d = DataArray(np.random.randn(3,4,5), 'xyz')
241 |     d2 = d.reshape(-1)
242 |     nt.assert_true(d2.shape == (60,), 'Flattened shape wrong')
243 |     nt.assert_true(type(d2) is np.ndarray, 'Flattened type wrong')
244 | 
245 |     d2 = d.reshape(60)
246 |     nt.assert_true(d2.shape == (60,), 'Flattened shape wrong')
247 |     nt.assert_true(type(d2) is np.ndarray, 'Flattened type wrong')
248 |     
249 | def test_axis_as_index():
250 |     narr = DataArray(np.array([[1, 2, 3], [4, 5, 6]]), axes=('a', 'b'))
251 |     npt.assert_array_equal(np.sum(narr, axis=narr.axes.a), [5, 7, 9])
252 | 
253 | # -- Tests for redefined methods ---------------------------------------------
254 |     
255 | def test_transpose():
256 |     b = DataArray([[1,2],[3,4],[5,6]], 'xy')
257 |     bt = b.T
258 |     c = DataArray([ [1,3,5], [2,4,6] ], 'yx')
259 |     nt.assert_true(bt.axes.x.index == 1 and bt.axes.y.index == 0)
260 |     nt.assert_true(bt.shape == (2,3))
261 |     nt.assert_true((bt==c).all())
262 | 
263 | def test_swapaxes():
264 |     n_arr = np.random.randn(2,4,3)
265 |     a = DataArray(n_arr, 'xyz')
266 |     b = a.swapaxes('x', 'z')
267 |     c = DataArray(n_arr.transpose(2,1,0), 'zyx')
268 |     nt.assert_true((c==b).all(), 'data not equal in swapaxes test')
269 |     for ax1, ax2 in zip(b.axes, c.axes):
270 |         nt.assert_true(ax1==ax2, 'axes not equal in swapaxes test')
271 | 
272 | # -- Tests for wrapped ndarray methods ---------------------------------------
273 | 
274 | reductions = ['mean', 'var', 'std', 'min',
275 |               'max', 'sum', 'prod', 'ptp', 'any', 'all',
276 |               'argmax', 'argmin']
277 | accumulations = ['cumprod', 'cumsum']
278 | 
279 | methods = reductions + accumulations
280 | 
281 | def check_data_axes(d_arr, op, axis, exp_axes, *args, **kwargs):
282 |     """ Check data and axes correct after operation `op`
283 |     """
284 |     from datarray.datarray import _names_to_numbers
285 |     super_opr = getattr(np.ndarray, op)
286 |     axis_idx = _names_to_numbers(d_arr.axes, [axis])[0]
287 |     d1 = super_opr(np.asarray(d_arr), axis_idx, *args, **kwargs)
288 |     opr = getattr(d_arr, op)
289 |     d_arr_out = opr(axis, *args, **kwargs)
290 |     nt.assert_equal(d_arr_out.axes, exp_axes)
291 |     d2 = np.asarray(d_arr_out)
292 |     npt.assert_equal(d1.shape, d2.shape)
293 |     npt.assert_array_equal(d1, d2)
294 | 
295 | 
296 | def test_wrapped_ops_data():
297 |     a = DataArray(np.random.randn(4,2,6), 'xyz')
298 |     for m in methods:
299 |         check_data_axes(a, m, 'x', YZ if m in reductions else DA.axes)
300 |         check_data_axes(a, m, 'y', XZ if m in reductions else DA.axes)
301 |         check_data_axes(a, m, 'z', XY if m in reductions else DA.axes)
302 | 
303 | 
304 | def test_reductions_keepdims():
305 |     names = 'xyz'
306 |     a = np.arange(24).reshape((2, 3, 4))
307 |     da = DataArray(a, names)
308 |     for idx, name in enumerate(names):
309 |         axes_removed = AXES_REMOVED[name]
310 |         # Test keepdims as kwarg
311 |         for method in reductions:
312 |             check_data_axes(da, method, name, axes_removed)
313 |             if method not in ('ptp', 'argmin', 'argmax'):
314 |                 # Reductions taking keepdims argument
315 |                 check_data_axes(da, method, name, DA.axes, keepdims=True)
316 |         # Test the individual functions with positional args
317 |         dt = np.dtype(float)
318 |         out = np.mean(da, axis=name)
319 |         kd_out = DataArray(np.mean(a, axis=idx, keepdims=True), names)
320 |         # Functions with signature axis, dtype, out, keepdims
321 |         for method in ('mean', 'sum', 'prod', 'all', 'any'):
322 |             check_data_axes(da, method, name, axes_removed, dt, out)
323 |             check_data_axes(da, method, name, DA.axes, dt, kd_out, True)
324 |         # Signature axis, out, dtype, ddof, keepdims
325 |         for method in ('var', 'std'):
326 |             check_data_axes(da, method, name, axes_removed, dt, out, 0)
327 |             check_data_axes(da, method, name, DA.axes, dt, kd_out, 0, True)
328 |         # Signature axis, out, keepdims
329 |         for method in ('min', 'max'):
330 |             check_data_axes(da, method, name, axes_removed, out)
331 |             check_data_axes(da, method, name, DA.axes, kd_out, True)
332 |         # Test reductions not using keepdims
333 |         out_int = out.astype(np.intp)  # argmin/max have integer output
334 |         for method in ('argmin', 'argmax'):
335 |             check_data_axes(da, method, name, axes_removed, out_int)
336 |         check_data_axes(da, 'ptp', name, axes_removed, out)
337 | 
338 | 
339 | # -- Tests for slicing with "newaxis" ----------------------------------------
340 | def test_newaxis_slicing():
341 |     b = DataArray([[1,2],[3,4],[5,6]], 'xy')
342 |     b2 = b[np.newaxis]
343 |     nt.assert_true(b2.shape == (1,) + b.shape)
344 |     nt.assert_true(b2.axes[0].name == None)
345 | 
346 |     b2 = b[:,np.newaxis]
347 |     nt.assert_true(b2.shape == (3,1,2))
348 |     nt.assert_true((b2[:,0,:]==b).all())
349 | 
350 | # -- Testing broadcasting features -------------------------------------------
351 | def test_broadcast():
352 |     b = DataArray([[1,2],[3,4],[5,6]], 'xy')
353 |     a = DataArray([1,0], 'y')
354 |     # both of these should work
355 |     c = b + a
356 |     nt.assert_true(c.names == ('x', 'y'), 'simple broadcast failed')
357 |     c = a + b
358 |     nt.assert_true(c.names == ('x', 'y'),
359 |                    'backwards simple broadcast failed')
360 |     
361 |     a = DataArray([1, 1, 1], 'x')
362 |     # this should work too
363 |     c = a[:,np.newaxis] + b
364 |     nt.assert_true(c.names == ('x', 'y'), 'forward broadcast1 failed')
365 |     c = b + a[:,np.newaxis] 
366 |     nt.assert_true(c.names == ('x', 'y'), 'forward broadcast2 failed')
367 | 
368 |     b = DataArray(np.random.randn(3,2,4), ['x', None, 'y'])
369 |     a = DataArray(np.random.randn(2,4), [None, 'y'])
370 |     # this should work
371 |     c = b + a
372 |     nt.assert_true(c.names == ('x', None, 'y'),
373 |                    'broadcast with unlabeled dimensions failed')
374 |     # and this
375 |     a = DataArray(np.random.randn(2,1), [None, 'y'])
376 |     c = b + a
377 |     nt.assert_true(
378 |         c.names == ('x', None, 'y'),
379 |         'broadcast with matched name, but singleton dimension failed')
380 |     # check that labeled Axis names the resulting Axis
381 |     b = DataArray(np.random.randn(3,2,4), ['x', 'z', 'y'])
382 |     a = DataArray(np.random.randn(2,4), [None, 'y'])
383 |     # this should work
384 |     c = b + a
385 |     nt.assert_true(c.names == ('x', 'z', 'y'),
386 |                    'broadcast with unlabeled dimensions failed')
387 | 
388 | 
389 | # -- Testing slicing failures ------------------------------------------------
390 | @nt.raises(NamedAxisError)
391 | def test_broadcast_fails1():
392 |     a = DataArray( np.random.randn(5,6), 'yz' )
393 |     b = DataArray( np.random.randn(5,6), 'xz' )
394 |     c = a + b
395 | 
396 | @nt.raises(ValueError)
397 | def test_broadcast_fails2():
398 |     a = DataArray( np.random.randn(2,5,6), 'xy' ) # last axis is unlabeled
399 |     b = DataArray( np.random.randn(2,6,6), 'xy' )
400 |     # this should fail simply because the dimensions are not matched
401 |     c = a + b
402 | 
403 | @nt.raises(IndexError)
404 | def test_indexing_fails():
405 |     "Ensure slicing non-existent dimension fails"
406 |     a = DataArray( np.random.randn(2,5,6), 'xy' )
407 |     a[:2,:1,:2,:5]
408 | 
409 | @nt.raises(IndexError)
410 | def test_ambiguous_ellipsis_fails():
411 |     a = DataArray( np.random.randn(2,5,6), 'xy' )
412 |     a[...,0,...]
413 | 
414 | def test_ellipsis_slicing():
415 |     a = DataArray( np.random.randn(2,5,6), 'xy' )
416 |     nt.assert_true((a[...,0] == a[:,:,0]).all(),
417 |                    'slicing with ellipsis failed')
418 |     nt.assert_true((a[0,...] == a[0]).all(),
419 |                    'slicing with ellipsis failed')
420 |     nt.assert_true((a[0,...,0] == a[0,:,0]).all(),
421 |                    'slicing with ellipsis failed')
422 | 
423 | def test_shifty_axes():
424 |     arr = np.random.randn(2,5,6)
425 |     a = DataArray( arr, 'xy' )
426 |     # slicing out the "x" Axis triggered the unlabeled axis to change
427 |     # name from "_2" to "_1".. make sure that this change is mapped
428 |     b = a[0,:2]
429 |     nt.assert_true((b == arr[0,:2]).all(), 'shifty axes strike again!')
430 |     
431 | # -- Testing utility functions -----------------------------------------------
432 | from datarray.datarray import _expand_ellipsis, _make_singleton_axes
433 | 
434 | def test_ellipsis_expansion():
435 |     slicing = ( slice(2), Ellipsis, 2 )
436 |     fixed = _expand_ellipsis(slicing, 4)
437 |     should_be = ( slice(2), slice(None), slice(None), 2 )
438 |     nt.assert_true(fixed==should_be, 'wrong slicer1')
439 |     fixed = _expand_ellipsis(slicing, 2)
440 |     should_be = ( slice(2), 2 )
441 |     nt.assert_true(fixed==should_be, 'wrong slicer2')
442 | 
443 | def test_singleton_axis_prep():
444 |     b = DataArray( np.random.randn(5,6), 'xz' )
445 |     slicing = ( None, )
446 |     shape, axes, key = _make_singleton_axes(b, slicing)
447 | 
448 |     key_should_be = (slice(None), ) # should be trimmed
449 |     shape_should_be = (1,5,6)
450 |     ax_should_be = [ Axis(l, i, b) for i, l in enumerate((None, 'x', 'z')) ]
451 | 
452 |     nt.assert_true(key_should_be==key, 'key translated poorly')
453 |     nt.assert_true(shape_should_be==shape, 'shape computed poorly')
454 |     nt.assert_true(all([a1==a2 for a1,a2 in zip(ax_should_be, axes)]),
455 |                    'axes computed poorly')
456 | 
457 | def test_singleton_axis_prep2():
458 |     # a little more complicated
459 |     b = DataArray( np.random.randn(5,6), 'xz' )
460 |     slicing = ( 0, None )
461 |     shape, axes, key = _make_singleton_axes(b, slicing)
462 | 
463 |     key_should_be = (0, ) # should be trimmed
464 |     shape_should_be = (5,1,6)
465 |     ax_should_be = [ Axis(l, i, b) for i, l in enumerate(('x', None, 'z')) ]
466 | 
467 |     nt.assert_true(key_should_be==key, 'key translated poorly')
468 |     nt.assert_true(shape_should_be==shape, 'shape computed poorly')
469 |     nt.assert_true(all([a1==a2 for a1,a2 in zip(ax_should_be, axes)]),
470 |                    'axes computed poorly')
471 |     
472 | # -- Test binary operations --------------------------------------------------
473 | 
474 | def test_label_mismatch():
475 |     dar1 = DataArray([1, 2], [('time', ['A1', 'B1'])])
476 |     dar2 = DataArray([1, 2], [('time', ['A2', 'B2'])])
477 |     nt.assert_raises(NamedAxisError, dar1.__add__, dar2)
478 |     nt.assert_raises(NamedAxisError, dar1.__sub__, dar2)
479 |     nt.assert_raises(NamedAxisError, dar1.__mul__, dar2)
480 |     nt.assert_raises(NamedAxisError, dar1.__floordiv__, dar2)
481 |     nt.assert_raises(NamedAxisError, dar1.__truediv__, dar2)
482 |     if not PY3:
483 |         nt.assert_raises(NamedAxisError, dar1.__div__, dar2)
484 |     
485 | # -- Test DataArray.axes
486 | class TestAxesManager(object):
487 |     def setUp(self):
488 |         self.axes_spec = ('date', ('stocks', ('aapl', 'ibm', 'goog', 'msft')), 'metric')
489 |         self.A = DataArray(np.random.randn(200, 4, 10), axes=self.axes_spec)
490 | 
491 |     def test_axes_name_collision(self):
492 |         "Test .axes object for attribute collisions with axis names"
493 |         A = DataArray(np.arange(6).reshape([1,2,3]), 
494 |                 ('_arr', '_axes', '_namemap'))
495 |         nt.assert_true(A.axes[0] is A.axes('_arr') is A.axes._arr)
496 |         nt.assert_true(A.axes[1] is A.axes('_axes') is A.axes._axes)
497 |         nt.assert_true(A.axes[2] is A.axes('_namemap') is A.axes._namemap)
498 |         
499 |         # Try to invoke some methods that use these attributes internally
500 |         B = A[np.newaxis, ...]
501 |         nt.assert_equal(B.shape, (1,1,2,3))
502 |         nt.assert_true(np.all(A + A == 2*A))
503 | 
504 |     def test_axes_numeric_access(self):
505 |         for i,spec in enumerate(self.axes_spec):
506 |             try:
507 |                 name,labels = spec
508 |             except ValueError:
509 |                 name,labels = spec,None
510 |             nt.assert_true(self.A.axes[i] == Axis(name=name, index=i,
511 |                 parent_arr=self.A, labels=labels))
512 | 
513 |     def test_axes_attribute_access(self):
514 |         for spec in self.axes_spec:
515 |             try:
516 |                 name,labels = spec
517 |             except ValueError:
518 |                 name,labels = spec,None
519 |             nt.assert_true(getattr(self.A.axes, name) is self.A.axes(name))
520 | 
521 |     def test_equality(self):
522 |         B = DataArray(np.random.randn(200, 4, 10), axes=self.axes_spec)
523 |         nt.assert_true(self.A.axes == B.axes)
524 |         # What if axes differ by labels only?
525 |         D = DataArray(np.random.randn(200, 4, 10), axes=('date', 'stocks', 'metric')) 
526 |         nt.assert_false(self.A.axes == D.axes)
527 | 


--------------------------------------------------------------------------------
/datarray/tests/test_print.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from datarray.datarray import DataArray
 3 | from datarray.print_grid import datarray_to_string
 4 | 
 5 | def test_2d_datarray_to_string():
 6 |     grid_string = """
 7 | country   year                                             
 8 | --------- -------------------------------------------------
 9 |           1994      1998      2002      2006      2010     
10 | Netherlan  0.        0.142857  0.285714  0.428571  0.571429
11 | Uruguay    0.714286  0.857143  1.        1.142857  1.285714
12 | Germany    1.428571  1.571429  1.714286  1.857143  2.      
13 | Spain      2.142857  2.285714  2.428571  2.571429  2.714286
14 |     """.strip()
15 |     
16 |     test_array = np.arange(20).reshape((4, 5)) / 7.0
17 |     row_spec = 'country', ['Netherlands', 'Uruguay', 'Germany', 'Spain']
18 |     col_spec = 'year', list(map(str, [1994, 1998, 2002, 2006, 2010]))
19 | 
20 |     d_arr = DataArray(test_array, [row_spec, col_spec])
21 |     assert datarray_to_string(d_arr) == grid_string
22 | 
23 | 
24 | def test_1d_datarray_to_string():
25 |     grid_string = """
26 | country                                
27 | ---------------------------------------
28 | Netherla  Uruguay   Germany   Spain    
29 |  0.        0.714286  1.428571  2.142857
30 |     """.strip()
31 |     
32 |     test_array = np.arange(20).reshape((4, 5)) / 7.0
33 |     row_spec = 'country', ['Netherlands', 'Uruguay', 'Germany', 'Spain']
34 |     col_spec = 'year', list(map(str, [1994, 1998, 2002, 2006, 2010]))
35 | 
36 |     d_arr = DataArray(test_array, [row_spec, col_spec])
37 |     assert datarray_to_string(d_arr.axes.year['1994']) == grid_string
38 | 
39 | 


--------------------------------------------------------------------------------
/datarray/version.py:
--------------------------------------------------------------------------------
  1 | """datarray version information"""
  2 | 
  3 | # Format expected by setup.py and doc/source/conf.py: string of form
  4 | # "X.Y.Zextra"
  5 | _version_major = 0
  6 | _version_minor = 2
  7 | _version_micro = 0
  8 | _version_extra = 'dev'  # development
  9 | # _version_extra = ''  # release
 10 | __version__ = "%s.%s.%s%s" % (_version_major,
 11 |                               _version_minor,
 12 |                               _version_micro,
 13 |                               _version_extra)
 14 | 
 15 | 
 16 | CLASSIFIERS = ["Development Status :: 3 - Alpha",
 17 |                "Environment :: Console",
 18 |                "Intended Audience :: Science/Research",
 19 |                "License :: OSI Approved :: BSD License",
 20 |                "Operating System :: OS Independent",
 21 |                "Programming Language :: Python",
 22 |                "Topic :: Scientific/Engineering"]
 23 | 
 24 | description = "NumPy arrays with named axes and named indices."
 25 | 
 26 | # Note: this long_description is actually a copy/paste from the top-level
 27 | # README.rst, so that it shows up nicely on PyPI.  So please remember to edit
 28 | # it only in one place and sync it correctly.  I (MB) edit both in vim windows
 29 | # and use vim diff mode to push the changes from one to the other.
 30 | long_description = """
 31 | ######################################
 32 | Datarray: Numpy arrays with named axes
 33 | ######################################
 34 | 
 35 | Scientists, engineers, mathematicians and statisticians don't just work with
 36 | matrices; they often work with structured data, just like you'd find in a
 37 | table. However, functionality for this is missing from Numpy, and there are
 38 | efforts to create something to fill the void.  This is one of those efforts.
 39 | 
 40 | .. warning::
 41 | 
 42 |    This code is currently experimental, and its API *will* change!  It is meant
 43 |    to be a place for the community to understand and develop the right
 44 |    semantics and have a prototype implementation that will ultimately
 45 |    (hopefully) be folded back into Numpy.
 46 | 
 47 | Datarray provides a subclass of Numpy ndarrays that support:
 48 | 
 49 | - individual dimensions (axes) being labeled with meaningful descriptions
 50 | - labeled 'ticks' along each axis
 51 | - indexing and slicing by named axis
 52 | - indexing on any axis with the tick labels instead of only integers
 53 | - reduction operations (like .sum, .mean, etc) support named axis arguments
 54 |   instead of only integer indices.
 55 | 
 56 | *********
 57 | Prior Art
 58 | *********
 59 | 
 60 | In no particular order:
 61 | 
 62 | * `xarray <http://xarray.pydata.org/en/stable>`_ - very close in spirit to
 63 |   this package, xarray implements named ND array axes and tick labels.  It
 64 |   integrates with (and depends on) Pandas.  If you are doing production work,
 65 |   and don't mind the pandas dependency, please use xarray rather than this
 66 |   package.  Xarray used to be called "xray".
 67 | 
 68 | * `pandas <http://pandas.pydata.org>`_ is based around a number of
 69 |   DataFrame-esque datatypes.
 70 | 
 71 | * `Tabular <http://bitbucket.org/elaine/tabular/src>`_ implements a
 72 |   spreadsheet-inspired datatype, with rows/columns, csv/etc. IO, and fancy
 73 |   tabular operations.
 74 | 
 75 | * `scikits.statsmodels <http://scikits.appspot.com/statsmodels>`_ sounded as
 76 |   though it had some features we'd like to eventually see implemented on top of
 77 |   something such as datarray, and `Skipper <http://scipystats.blogspot.com>`_
 78 |   seemed pretty interested in something like this himself.
 79 | 
 80 | * `scikits.timeseries <http://scikits.appspot.com/timeseries>`_ also has a
 81 |   time-series-specific object that's somewhat reminiscent of labeled arrays.
 82 | 
 83 | * `pandas <http://pandas.pydata.org>`_ is based around a number of
 84 |   DataFrame-esque datatypes.
 85 | 
 86 | * `pydataframe <https://pypi.python.org/pypi/pydataframe>`_ is supposed to be a
 87 |   clone of R's data.frame.
 88 | 
 89 | * `larry <http://github.com/kwgoodman/la>`_, or "labeled array," often comes up
 90 |   in discussions alongside pandas.
 91 | 
 92 | * `divisi <http://github.com/commonsense/divisi2>`_ includes labeled sparse and
 93 |   dense arrays.
 94 | 
 95 | * `pymvpa <https://github.com/PyMVPA/PyMVPA>`_ provides Dataset class
 96 |   encapsulating the data together with matching in length sets of attributes
 97 |   for the first two (samples and features) dimensions.  Dataset is not a
 98 |   subclass of numpy array to allow other data structures (e.g. sparse
 99 |   matrices).
100 | 
101 | * `ptsa <http://git.debian.org/?p=pkg-exppsy/ptsa.git>`_ subclasses
102 |   ndarray to provide attributes per dimensions aiming to ease slicing/indexing
103 |   given the values of the axis attributes
104 | 
105 | *************
106 | Project Goals
107 | *************
108 | 
109 | 1. Get something akin to this in the numpy core;
110 | 2. Stick to basic functionality such that projects like scikits.statsmodels can
111 |    use it as a base datatype;
112 | 3. Make an interface that allows for simple, pretty manipulation that doesn't
113 |    introduce confusion;
114 | 4. Oh, and make sure that the base numpy array is still accessible.
115 | 
116 | ****
117 | Code
118 | ****
119 | 
120 | You can find our sources and single-click downloads:
121 | 
122 | * `Main repository`_ on Github;
123 | * Documentation_ for the current release;
124 | * Download the `current trunk`_ as a tar/zip file;
125 | * Downloads of all `available releases`_.
126 | 
127 | The latest released version is always available from `pypi
128 | <https://pypi.python.org/pypi/datarray>`_.
129 | 
130 | *******
131 | Support
132 | *******
133 | 
134 | Please put up issues on the `datarray issue tracker
135 | <https://github.com/bids/datarray/issues>`_.
136 | 
137 | .. _main repository: http://github.com/bids/datarray
138 | .. _Documentation: http://bids.github.com/datarray
139 | .. _current trunk: http://github.com/bids/datarray/archives/master
140 | .. _available releases: http://github.com/bids/datarray/releases
141 | """
142 | 
143 | 
144 | NAME                = 'datarray'
145 | MAINTAINER          = "Numpy Developers"
146 | MAINTAINER_EMAIL    = "numpy-discussion@scipy.org"
147 | DESCRIPTION         = description
148 | LONG_DESCRIPTION    = long_description
149 | URL                 = "http://github.com/bids/datarray"
150 | DOWNLOAD_URL        = "http://github.com/bids/datarray/archives/master"
151 | LICENSE             = "Simplified BSD"
152 | CLASSIFIERS         = CLASSIFIERS
153 | AUTHOR              = "Datarray developers"
154 | AUTHOR_EMAIL        = "numpy-discussion@scipy.org"
155 | PLATFORMS           = "OS Independent"
156 | MAJOR               = _version_major
157 | MINOR               = _version_minor
158 | MICRO               = _version_micro
159 | ISRELEASED          = False
160 | VERSION             = __version__
161 | PACKAGES            = ["datarray", "datarray/tests", "datarray/testing"]
162 | PACKAGE_DATA        = {'datarray': ['LICENSE']}
163 | REQUIRES            = ["numpy (>=1.7)"]
164 | INSTALL_REQUIRES    = ["numpy>=1.7"]
165 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | 
 15 | .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest all
 16 | 
 17 | all: html
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html      to make standalone HTML files"
 22 | 	@echo "  gh-pages  to make the docs in Github-pages form"
 23 | 	@echo "  dirhtml   to make HTML files named index.html in directories"
 24 | 	@echo "  pickle    to make pickle files"
 25 | 	@echo "  json      to make JSON files"
 26 | 	@echo "  htmlhelp  to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp    to make HTML files and a qthelp project"
 28 | 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 29 | 	@echo "  changes   to make an overview of all changed/added/deprecated items"
 30 | 	@echo "  linkcheck to check all external links for integrity"
 31 | 	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
 32 | 
 33 | clean:
 34 | 	-rm -rf $(BUILDDIR)/* source/generated/*
 35 | 
 36 | apidocs:
 37 | 	sphinx-apidoc -f -o source/generated ../datarray ../datarray/tests
 38 | 
 39 | html: apidocs
 40 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 41 | 	@echo
 42 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 43 | 
 44 | dirhtml:
 45 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 48 | 
 49 | pickle:
 50 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 51 | 	@echo
 52 | 	@echo "Build finished; now you can process the pickle files."
 53 | 
 54 | json:
 55 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 56 | 	@echo
 57 | 	@echo "Build finished; now you can process the JSON files."
 58 | 
 59 | htmlhelp:
 60 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 61 | 	@echo
 62 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 63 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 64 | 
 65 | qthelp:
 66 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 67 | 	@echo
 68 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 69 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 70 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/DataArrayDocs.qhcp"
 71 | 	@echo "To view the help file:"
 72 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/DataArrayDocs.qhc"
 73 | 
 74 | latex:
 75 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 76 | 	@echo
 77 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
 78 | 	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
 79 | 	      "run these through (pdf)latex."
 80 | 
 81 | changes:
 82 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
 83 | 	@echo
 84 | 	@echo "The overview file is in $(BUILDDIR)/changes."
 85 | 
 86 | linkcheck:
 87 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
 88 | 	@echo
 89 | 	@echo "Link check complete; look for any errors in the above output " \
 90 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
 91 | 
 92 | doctest:
 93 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
 94 | 	@echo "Testing of doctests in the sources finished, look at the " \
 95 | 	      "results in $(BUILDDIR)/doctest/output.txt."
 96 | 
 97 | github: html
 98 | 	touch $(BUILDDIR)/html/.nojekyll
 99 | 	ghp-import $(BUILDDIR)/html/
100 | 	git push -u origin gh-pages
101 | 	@echo
102 | 	@echo "Published to Github"
103 | 


--------------------------------------------------------------------------------
/doc/README.txt:
--------------------------------------------------------------------------------
 1 | ======================
 2 | datarray Documentation
 3 | ======================
 4 | 
 5 | This is the top level build directory for the datarray documentation.  All
 6 | of the documentation is written using Sphinx_, a Python documentation
 7 | system built on top of reST_.  In order to build the documentation,
 8 | you must have Sphinx v1.0 or greater installed.
 9 | 
10 | This directory contains:
11 | 
12 | * Makefile - the build script to build the HTML or PDF docs. Type
13 |   ``make help`` for a list of options.
14 | 
15 | * source - the directory containing the reST source
16 | 
17 | * source/links_names.inc - reST document with hyperlink targets for common
18 |   links used throughout the documentation
19 | 
20 | * source/conf.py - the sphinx configuration.
21 | 
22 | * source/_static - used by the sphinx build system.
23 | 
24 | * source/_templates - used by the sphinx build system.
25 | 
26 | Building the documentation
27 | --------------------------
28 | 
29 | You should first install the documentation dependencies.  From this directory::
30 | 
31 |     pip install -r doc-requirements.txt
32 | 
33 | Then::
34 | 
35 |     make html
36 | 
37 | .. Since this README.txt is not processed by Sphinx during the
38 | .. documentation build, I've included the links directly so it is at
39 | .. least a valid reST doc.
40 | 
41 | .. _Sphinx: http://sphinx.pocoo.org/
42 | .. _reST: http://docutils.sourceforge.net/rst.html
43 | .. _numpy: http://www.scipy.org/NumPy
44 | 


--------------------------------------------------------------------------------
/doc/devel/make_release.rst:
--------------------------------------------------------------------------------
  1 | .. _release-guide:
  2 | 
  3 | **********************************
  4 | Guide to making a datarray release
  5 | **********************************
  6 | 
  7 | A guide for developers making a datarray release.
  8 | 
  9 | .. _release-checklist:
 10 | 
 11 | Release checklist
 12 | =================
 13 | 
 14 | * Review the open list of `datarray issues`_.  Check whether there are
 15 |   outstanding issues that can be closed, and whether there are any issues that
 16 |   should delay the release.  Label them !
 17 | 
 18 | * Review and update the release notes.  Review and update the :file:`Changelog`
 19 |   file.  Get a partial list of contributors with something like::
 20 | 
 21 |       git shortlog -ns 0.6.0..
 22 | 
 23 |   where ``0.6.0`` was the last release tag name.
 24 | 
 25 |   Then manually go over ``git shortlog 0.6.0..`` to make sure the release
 26 |   notes are as complete as possible and that every contributor was recognized;
 27 | 
 28 | * Use the opportunity to update the ``.mailmap`` file if there are any
 29 |   duplicate authors listed from ``git shortlog -ns``;
 30 | 
 31 | * Add any new authors to the ``AUTHORS`` file.  Add any new entries to the
 32 |   ``THANKS`` file;
 33 | 
 34 | * Check the copyright years in ``doc/source/conf.py`` and
 35 |   ``datarray/LICENSE``;
 36 | 
 37 | * Check that the ``README.rst`` text is the same as the text in the
 38 |   ``long_description`` field in ``version.py``;
 39 | 
 40 | * If you have travis-ci_ building set up you might want to push the code in its
 41 |   current state to a branch that will build, e.g::
 42 | 
 43 |     git branch -D pre-release-test # in case branch already exists
 44 |     git co -b pre-release-test
 45 | 
 46 | * Clean::
 47 | 
 48 |     git clean -fxd
 49 | 
 50 | * Make sure all tests pass on your local machine (from the datarray root
 51 |   directory)::
 52 | 
 53 |     nosetests --with-doctest datarray
 54 | 
 55 |   Do this on a Python 2 and Python 3 setup.
 56 | 
 57 | * Consider running the same tests after installing into a virtualenv, to test
 58 |   that installing works correctly::
 59 | 
 60 |     mkvirtualenv datarray-test
 61 |     pip install nose wheel
 62 |     git clean -fxd
 63 |     python setup.py install
 64 |     mkdir for_test
 65 |     cd for_test
 66 |     nosetests --with-doctest datarray
 67 | 
 68 | * Check the documentation doctests::
 69 | 
 70 |     cd doc
 71 |     make doctest
 72 |     cd ..
 73 | 
 74 | * The release should now be ready.
 75 | 
 76 | Doing the release
 77 | =================
 78 | 
 79 | * Edit :file:`datarray/version.py` to set ``_version_*`` strings to the
 80 |   version you want.  Make ``_version_extra`` be the empty string for the
 81 |   release;
 82 | 
 83 | * Check you are getting the version / package name that you want by doing::
 84 | 
 85 |     git clean -fxd
 86 |     python setup.py sdist --formats=gztar,zip
 87 |     python setup.py bdist_wheel
 88 | 
 89 |   and checking the output filenames in ``dist/``;
 90 | 
 91 | * Make a signed tag for the release with tag of form ``0.6.0``::
 92 | 
 93 |     git tag -s -m 'Fifth public release' 0.6.0
 94 | 
 95 | * Once everything looks good, upload the source release to PyPi, using `twine
 96 |   <https://pypi.python.org/pypi/twine>`_::
 97 | 
 98 |     twine upload dist/datarray*
 99 | 
100 | * Remember you'll need your ``~/.pypirc`` file set up right for this to work.
101 |   See `setuptools intro`_.  The file should look something like this::
102 | 
103 |     [distutils]
104 |     index-servers =
105 |         pypi
106 | 
107 |     [pypi]
108 |     username:your.pypi.username
109 |     password:your-password
110 | 
111 |     [server-login]
112 |     username:your.pypi.username
113 |     password:your-password
114 | 
115 | * Check how everything looks on pypi - the description, the packages.
116 | 
117 | * Push the tag with ``git push origin 0.6.0``
118 | 
119 | * Push the documentation up to github with::
120 | 
121 |     cd doc
122 |     make github
123 | 
124 | * Edit ``datarray/version.py`` to set to the next upcoming version.  Set
125 |   ``_version_extra`` to ``dev``. Commit and push.
126 | 
127 | * Announce to the mailing lists.
128 | 
129 | .. datarray code stuff
130 | .. _datarray github: http://github.com/bids/datarray
131 | .. _datarray pypi: http://pypi.python.org/pypi/datarray
132 | .. _datarray issues: http://github.com/bids/datarray/issues
133 | .. _datarray travis-ci: https://travis-ci.org/bids/datarray
134 | 


--------------------------------------------------------------------------------
/doc/doc-requirements.txt:
--------------------------------------------------------------------------------
1 | # Requirements for building docs
2 | # Use with:
3 | #     pip install -r doc-requirements.txt
4 | 
5 | -r ../requirements.txt
6 | sphinx>=1.3
7 | ghp-import
8 | 


--------------------------------------------------------------------------------
/doc/source/basic_data_array.rst:
--------------------------------------------------------------------------------
  1 | .. testsetup::
  2 | 
  3 |     import numpy as np
  4 |     from datarray import DataArray
  5 | 
  6 | ============
  7 |  DataArrays
  8 | ============
  9 | 
 10 | .. _init_ufuncs:
 11 | 
 12 | 
 13 | Basic DataArray Creation And Mixing
 14 | ===================================
 15 | 
 16 | DataArrays are constructed with array-like sequences and axis names:
 17 | 
 18 | .. doctest::
 19 | 
 20 |     >>> narr = DataArray(np.zeros((1,2,3)), axes=('a', 'b', 'c'))
 21 |     >>> narr.names
 22 |     ('a', 'b', 'c')
 23 |     >>> narr.axes.a
 24 |     Axis(name='a', index=0, labels=None)
 25 |     >>> narr.axes.b
 26 |     Axis(name='b', index=1, labels=None)
 27 |     >>> narr.axes.c
 28 |     Axis(name='c', index=2, labels=None)
 29 |     >>> narr.shape
 30 |     (1, 2, 3)
 31 | 
 32 | Not all axes must necessarily be explicitly named, since None is a valid axis
 33 | name:
 34 | 
 35 | .. doctest::
 36 | 
 37 |     >>> narr2 = DataArray(np.zeros((1,2,3)), axes=('a', None, 'b' ))
 38 |     >>> narr2.names
 39 |     ('a', None, 'b')
 40 | 
 41 | If no name is given for an axis, None is implicitly assumed.  So trailing axes
 42 | without axes will be named as None:
 43 | 
 44 | .. doctest::
 45 | 
 46 |     >>> narr2 = DataArray(np.zeros((1,2,3,2)), axes=('a','b' ))
 47 |     >>> narr2.names
 48 |     ('a', 'b', None, None)
 49 | 
 50 | Combining named and unnamed arrays:
 51 | 
 52 | .. doctest::
 53 | 
 54 |     >>> narr = DataArray(np.zeros((1,2,3)), axes='abc')
 55 |     >>> res = narr + 5 # OK
 56 |     >>> res = narr + np.zeros((1,2,3)) # OK
 57 |     >>> n2 = DataArray(np.ones((1,2,3)), axes=('a','b','c'))
 58 |     >>> res = narr + n2 # OK
 59 | 
 60 |     >>> n3 = DataArray(np.ones((1,2,3)), axes=('x','b','c'))
 61 | 
 62 |     >>> res = narr + n3
 63 |     Traceback (most recent call last):
 64 |     ...
 65 |     NamedAxisError: Axis names are incompatible for a binary operation: ('a', 'b', 'c'), ('x', 'b', 'c')
 66 | 
 67 | 
 68 | Now, what about matching names, but different indices for the names?
 69 | 
 70 | .. doctest::
 71 | 
 72 |     >>> n4 = DataArray(np.ones((2,1,3)), axes=('b','a','c'))
 73 |     >>> res = narr + n4 # is this OK?
 74 |     Traceback (most recent call last):
 75 |     ...
 76 |     NamedAxisError: Axis names are incompatible for a binary operation: ('a', 'b', 'c'), ('b', 'a', 'c')
 77 | 
 78 | The names and the position have to be the same, and the above example should
 79 | raise an error.  At least for now we will raise an error, and review later.
 80 | 
 81 | With "labels"
 82 | -------------
 83 | 
 84 | Constructing a DataArray such that an Axis has labels, for example:
 85 | 
 86 | .. doctest::
 87 | 
 88 |     >>> cap_ax_spec = 'capitals', ['washington', 'london', 'berlin', 'paris', 'moscow']
 89 |     >>> time_ax_spec = 'time', ['0015', '0615', '1215', '1815']
 90 |     >>> time_caps = DataArray(np.arange(4*5).reshape(4,5), [time_ax_spec, cap_ax_spec])
 91 |     >>> time_caps.axes
 92 |     (Axis(name='time', index=0, labels=['0015', '0615', '1215', '1815']), Axis(name='capitals', index=1, labels=['washington', 'london', 'berlin', 'paris', 'moscow']))
 93 | 
 94 | .. _slicing:
 95 | 
 96 | Slicing
 97 | =======
 98 | 
 99 | A DataArray with simple named axes can be sliced many ways.
100 | 
101 | Per Axis:
102 | 
103 | .. doctest::
104 | 
105 |     >>> narr = DataArray(np.zeros((1,2,3)), axes=('a','b','c'))
106 |     >>> narr.axes.a
107 |     Axis(name='a', index=0, labels=None)
108 |     >>> narr.axes.a[0]
109 |     DataArray(array([[ 0.,  0.,  0.],
110 |            [ 0.,  0.,  0.]]),
111 |     ('b', 'c'))
112 |     >>> narr.axes.a[0].axes
113 |     (Axis(name='b', index=0, labels=None), Axis(name='c', index=1, labels=None))
114 | 
115 | By normal "numpy" slicing:
116 | 
117 | .. doctest::
118 | 
119 |     >>> narr[0].shape
120 |     (2, 3)
121 |     >>> narr[0].axes
122 |     (Axis(name='b', index=0, labels=None), Axis(name='c', index=1, labels=None))
123 |     >>> narr.axes.a[0].axes == narr[0,:].axes
124 |     True
125 | 
126 | Also, slicing with ``newaxis`` is implemented:
127 | 
128 | .. doctest::
129 | 
130 |     >>> arr = np.arange(24).reshape((3,2,4))
131 |     >>> b = DataArray(arr, ['x', 'y', 'z'])
132 |     >>> b[:,:,np.newaxis].shape
133 |     (3, 2, 1, 4)
134 |     >>> b[:,:,np.newaxis].names
135 |     ('x', 'y', None, 'z')
136 | 
137 | I can also slice with ``newaxis`` at each Axis.  The effect of this is always
138 | to insert an unnamed Axis with length-1 at the original index of the named
139 | Axis:
140 | 
141 | .. doctest::
142 | 
143 |     >>> b.axes
144 |     (Axis(name='x', index=0, labels=None), Axis(name='y', index=1, labels=None), Axis(name='z', index=2, labels=None))
145 |     >>> b.axes.y[np.newaxis].names
146 |     ('x', None, 'y', 'z')
147 |     >>> b.axes.y[np.newaxis].shape
148 |     (3, 1, 2, 4)
149 | 
150 | Slicing and labels
151 | ------------------
152 | 
153 | It is also possible to use labels in any of the slicing syntax above:
154 | 
155 | .. doctest::
156 | 
157 |     >>> time_caps #doctest: +NORMALIZE_WHITESPACE
158 |     DataArray(array([[ 0,  1,  2,  3,  4],
159 |            [ 5,  6,  7,  8,  9],
160 |            [10, 11, 12, 13, 14],
161 |            [15, 16, 17, 18, 19]]),
162 |     (('time', ('0015', '0615', '1215', '1815')), ('capitals', ('washington', 'london', 'berlin', 'paris', 'moscow'))))
163 |     >>> time_caps.axes.capitals['berlin'::-1] #doctest: +NORMALIZE_WHITESPACE
164 |     DataArray(array([[ 2,  1,  0],
165 |            [ 7,  6,  5],
166 |            [12, 11, 10],
167 |            [17, 16, 15]]),
168 |     (('time', ('0015', '0615', '1215', '1815')), ('capitals', ('berlin', 'london', 'washington'))))
169 |     >>> time_caps.axes.time['0015':'1815'] #doctest: +NORMALIZE_WHITESPACE
170 |     DataArray(array([[ 0,  1,  2,  3,  4],
171 |            [ 5,  6,  7,  8,  9],
172 |            [10, 11, 12, 13, 14]]),
173 |     (('time', ('0015', '0615', '1215')), ('capitals', ('washington', 'london', 'berlin', 'paris', 'moscow'))))
174 |     >>> time_caps[:, 'london':3] #doctest: +NORMALIZE_WHITESPACE
175 |     DataArray(array([[ 1,  2],
176 |            [ 6,  7],
177 |            [11, 12],
178 |            [16, 17]]),
179 |     (('time', ('0015', '0615', '1215', '1815')), ('capitals', ('london', 'berlin'))))
180 | 
181 | The .start and .stop attributes of the slice object can be either None, an
182 | integer index, or a valid tick. They may even be mixed. *The .step attribute,
183 | however, must be None or an nonzero integer.*
184 | 
185 | **Historical note: previously integer labels clobbered indices.** For example::
186 | 
187 |     >>> centered_data = DataArray(np.random.randn(6), [ ('c_idx', range(-3,3)) ])
188 |     >>> centered_data.axes.c_idx.make_slice( slice(0, 6, None) )
189 |     (slice(3, 6, None),)
190 | 
191 | .. note::
192 | 
193 |    The code above doesn't currently (as of Nov/2010) run, because integer
194 |    labels haven't been implemented.  See ticket gh-40.
195 |     
196 | make_slice() first tries to look up the key parameters as labels, and then sees
197 | if the key parameters can be used as simple indices. Thus 0 is found as index
198 | 3, and 6 is passed through as index 6.
199 | 
200 | Possible resolution 1
201 | ~~~~~~~~~~~~~~~~~~~~~
202 | 
203 | "larry" would make this distinction::
204 | 
205 |     >>> centered_data.axes.c_idx[ [0]:[2] ]
206 |     >>> < returns underlying array from [3:5] >
207 |     >>> centered_data.axes.c_idx[ 0:2 ]
208 |     >>> < returns underlying array from [0:2] >
209 | 
210 | And I believe mixing of labels and is valid also.
211 | 
212 | Possible resolution 2 (the winner)
213 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
214 | 
215 | Do not allow integer labels -- cast to float perhaps
216 | 
217 | **Note**: this will be the solution. When validating labels on an Axis, ensure
218 | that none of them ``isinstance(t, int)``
219 | 
220 | 
221 | Possible resolution 3
222 | ~~~~~~~~~~~~~~~~~~~~~
223 | 
224 | Restrict access to tick based slicing to another special slicing object.
225 | 
226 | .. _broadcasting:
227 | 
228 | Broadcasting
229 | ============
230 | 
231 | What about broadcasting between two named arrays, where the broadcasting
232 | adds an axis? All ordinary NumPy rules for shape compatibility apply.
233 | Additionally, DataArray imposes axis name consistency rules.
234 | 
235 | The broadcasted DataArray below, "a", takes on dummy dimensions that are taken
236 | to be compatible with the larger DataArray:
237 | 
238 | .. doctest::
239 | 
240 |     >>> b = DataArray(np.ones((3,3)), axes=('x','y'))
241 |     >>> a = DataArray(np.ones((3,)), axes=('y',))
242 |     >>> res = 2*b - a
243 |     >>> res    # doctest: +NORMALIZE_WHITESPACE
244 |     DataArray(array([[ 1.,  1.,  1.],
245 |            [ 1.,  1.,  1.],
246 |            [ 1.,  1.,  1.]]),
247 |     ('x', 'y'))
248 | 
249 | When there are unnamed dimensions, they also must be consistently oriented
250 | across arrays when broadcasting:
251 | 
252 | .. doctest::
253 | 
254 |     >>> b = DataArray(np.arange(24).reshape(3,2,4), ['x', None, 'y'])
255 |     >>> a = DataArray(np.arange(8).reshape(2,4), [None, 'y'])
256 |     >>> res = a + b
257 |     >>> res
258 |     DataArray(array([[[ 0,  2,  4,  6],
259 |             [ 8, 10, 12, 14]],
260 |     <BLANKLINE>
261 |            [[ 8, 10, 12, 14],
262 |             [16, 18, 20, 22]],
263 |     <BLANKLINE>
264 |            [[16, 18, 20, 22],
265 |             [24, 26, 28, 30]]]),
266 |     ('x', None, 'y'))
267 | 
268 | We already know that if the dimension names don't match, this won't be allowed
269 | (even though the shapes are correct):
270 | 
271 | .. doctest::
272 | 
273 |     >>> b = DataArray(np.ones((3,3)), axes=('x','y'))
274 |     >>> a = DataArray(np.ones((3,)), axes=('x',))
275 |     >>> res = 4*b - a
276 |     Traceback (most recent call last):
277 |     ...
278 |     NamedAxisError: Axis names are incompatible for a binary operation: ('x', 'y'), ('x',)
279 | 
280 | But a numpy idiom for padding dimensions helps us in this case:
281 | 
282 | .. doctest::
283 | 
284 |     >>> res = 2*b - a[:,None]
285 |     >>> res    # doctest: +NORMALIZE_WHITESPACE
286 |     DataArray(array([[ 1.,  1.,  1.],
287 |            [ 1.,  1.,  1.],
288 |            [ 1.,  1.,  1.]]),
289 |     ('x', 'y'))
290 | 
291 | In other words, this scenario is also a legal combination:
292 | 
293 | .. doctest::
294 | 
295 |     >>> a2 = a[:,None]
296 |     >>> a2.names
297 |     ('x', None)
298 |     >>> b + a2    # doctest: +NORMALIZE_WHITESPACE
299 |     DataArray(array([[ 2.,  2.,  2.],
300 |            [ 2.,  2.,  2.],
301 |            [ 2.,  2.,  2.]]),
302 |     ('x', 'y'))
303 | 
304 | The rule for dimension compatibility is that any two axes match if one of the following is true
305 | 
306 | * their (name, length) pairs are equal
307 | * their dimensions are broadcast-compatible, and their axes are equal
308 | * their dimensions are broadcast-compatible, and their axes are
309 |   non-conflicting (ie, one or both are None)
310 | 
311 | **Question** -- what about this situation:
312 | 
313 | .. doctest::
314 | 
315 |     >>> b = DataArray(np.ones((3,3)), axes=('x','y'))
316 |     >>> a = DataArray(np.ones((3,1)), axes=('x','y'))
317 |     >>> a+b          # doctest: +NORMALIZE_WHITESPACE
318 |     DataArray(array([[ 2.,  2.,  2.],
319 |            [ 2.,  2.,  2.],
320 |            [ 2.,  2.,  2.]]),
321 |     ('x', 'y'))
322 | 
323 | The broadcasting rules currently allow this combination. I'm inclined to allow
324 | it. Even though the axes are different lengths in ``a`` and ``b``, and
325 | therefore *might* be considered different logical axes, there is no actual
326 | information collision from ``a.axes.y``.
327 | 
328 | .. _iteration:
329 | 
330 | Iteration
331 | =========
332 | 
333 | seems to work:
334 | 
335 | .. doctest::
336 | 
337 |     >>> for foo in time_caps:
338 |     ...     print foo
339 |     ...     print foo.axes
340 |     ...
341 |     DataArray([0 1 2 3 4],
342 |     (('capitals', ('washington', 'london', 'berlin', 'paris', 'moscow')),))
343 |     (Axis(name='capitals', index=0, labels=['washington', 'london', 'berlin', 'paris', 'moscow']),)
344 |     DataArray([5 6 7 8 9],
345 |     (('capitals', ('washington', 'london', 'berlin', 'paris', 'moscow')),))
346 |     (Axis(name='capitals', index=0, labels=['washington', 'london', 'berlin', 'paris', 'moscow']),)
347 |     DataArray([10 11 12 13 14],
348 |     (('capitals', ('washington', 'london', 'berlin', 'paris', 'moscow')),))
349 |     (Axis(name='capitals', index=0, labels=['washington', 'london', 'berlin', 'paris', 'moscow']),)
350 |     DataArray([15 16 17 18 19],
351 |     (('capitals', ('washington', 'london', 'berlin', 'paris', 'moscow')),))
352 |     (Axis(name='capitals', index=0, labels=['washington', 'london', 'berlin', 'paris', 'moscow']),)
353 | 
354 |     >>> for foo in time_caps.T:
355 |     ...    print foo
356 |     ...    print foo.axes
357 |     ...
358 |     DataArray([ 0  5 10 15],
359 |     (('time', ('0015', '0615', '1215', '1815')),))
360 |     (Axis(name='time', index=0, labels=['0015', '0615', '1215', '1815']),)
361 |     DataArray([ 1  6 11 16],
362 |     (('time', ('0015', '0615', '1215', '1815')),))
363 |     (Axis(name='time', index=0, labels=['0015', '0615', '1215', '1815']),)
364 |     DataArray([ 2  7 12 17],
365 |     (('time', ('0015', '0615', '1215', '1815')),))
366 |     (Axis(name='time', index=0, labels=['0015', '0615', '1215', '1815']),)
367 |     DataArray([ 3  8 13 18],
368 |     (('time', ('0015', '0615', '1215', '1815')),))
369 |     (Axis(name='time', index=0, labels=['0015', '0615', '1215', '1815']),)
370 |     DataArray([ 4  9 14 19],
371 |     (('time', ('0015', '0615', '1215', '1815')),))
372 |     (Axis(name='time', index=0, labels=['0015', '0615', '1215', '1815']),)
373 | 
374 | Or even more conveniently:
375 | 
376 | .. doctest::
377 | 
378 |     >>> for foo in time_caps.axes.capitals:
379 |     ...     print foo
380 |     ...
381 |     DataArray([ 0  5 10 15],
382 |     (('time', ('0015', '0615', '1215', '1815')),))
383 |     DataArray([ 1  6 11 16],
384 |     (('time', ('0015', '0615', '1215', '1815')),))
385 |     DataArray([ 2  7 12 17],
386 |     (('time', ('0015', '0615', '1215', '1815')),))
387 |     DataArray([ 3  8 13 18],
388 |     (('time', ('0015', '0615', '1215', '1815')),))
389 |     DataArray([ 4  9 14 19],
390 |     (('time', ('0015', '0615', '1215', '1815')),))
391 | 
392 | .. _transposition:
393 | 
394 | Transposition of Axes
395 | =====================
396 | 
397 | Transposition of a DataArray preserves the dimension names, and updates the
398 | corresponding indices:
399 | 
400 | .. doctest::
401 | 
402 |     >>> b = DataArray(np.zeros((3, 2, 4)), axes=['x', None, 'y'])
403 |     >>> b.shape
404 |     (3, 2, 4)
405 |     >>> b.axes
406 |     (Axis(name='x', index=0, labels=None), Axis(name=None, index=1, labels=None), Axis(name='y', index=2, labels=None))
407 |     >>> b.T.shape
408 |     (4, 2, 3)
409 |     >>> b.T.axes
410 |     (Axis(name='y', index=0, labels=None), Axis(name=None, index=1, labels=None), Axis(name='x', index=2, labels=None))
411 | 
412 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # DataArray Docs documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri May 28 11:07:18 2010.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | # sys.path.append(os.path.abspath('.'))
 20 | 
 21 | # If your documentation needs a minimal Sphinx version, state it here.
 22 | needs_sphinx = '1.3'
 23 | 
 24 | # We load the release info into a dict by explicit execution
 25 | # Use exec on contents for Python 3 compatibility
 26 | rel = {}
 27 | ver_file = os.path.join('..', '..', 'datarray', 'version.py')
 28 | with open(ver_file, 'rt') as fobj:
 29 |     exec(fobj.read(), rel)
 30 | 
 31 | # -- General configuration -----------------------------------------------------
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be extensions
 34 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.doctest',
 36 |               'sphinx.ext.napoleon',
 37 |               # Only uncomment intersphinx if we really start using it, and in
 38 |               # that case it should probably be conditionally added only for
 39 |               # release builds, because it makes network lookups on every build
 40 |               # and can make the process annoyingly slow.
 41 |               #'sphinx.ext.intersphinx',
 42 |               ]
 43 | 
 44 | # Add any paths that contain templates here, relative to this directory.
 45 | templates_path = ['_templates']
 46 | 
 47 | # The suffix of source filenames.
 48 | source_suffix = '.rst'
 49 | 
 50 | # The encoding of source files.
 51 | #source_encoding = 'utf-8'
 52 | 
 53 | # The master toctree document.
 54 | master_doc = 'index'
 55 | 
 56 | # General information about the project.
 57 | project = u'DataArray Docs'
 58 | copyright = u'2010-2016, %(MAINTAINER)s <%(AUTHOR_EMAIL)s>' % rel
 59 | 
 60 | # The version info for the project you're documenting, acts as replacement for
 61 | # |version| and |release|, also used in various other places throughout the
 62 | # built documents.
 63 | #
 64 | # The short X.Y version.
 65 | version = rel['__version__']
 66 | # The full version, including alpha/beta/rc tags.
 67 | release = version
 68 | 
 69 | # The language for content autogenerated by Sphinx. Refer to documentation
 70 | # for a list of supported languages.
 71 | #language = None
 72 | 
 73 | # There are two options for replacing |today|: either, you set today to some
 74 | # non-false value, then it is used:
 75 | #today = ''
 76 | # Else, today_fmt is used as the format for a strftime call.
 77 | #today_fmt = '%B %d, %Y'
 78 | 
 79 | # List of documents that shouldn't be included in the build.
 80 | #unused_docs = []
 81 | 
 82 | # List of directories, relative to source directory, that shouldn't be searched
 83 | # for source files.
 84 | exclude_trees = []
 85 | 
 86 | # The reST default role (used for this markup: `text`) to use for all documents.
 87 | #default_role = None
 88 | 
 89 | # If true, '()' will be appended to :func: etc. cross-reference text.
 90 | #add_function_parentheses = True
 91 | 
 92 | # If true, the current module name will be prepended to all description
 93 | # unit titles (such as .. function::).
 94 | #add_module_names = True
 95 | 
 96 | # If true, sectionauthor and moduleauthor directives will be shown in the
 97 | # output. They are ignored by default.
 98 | #show_authors = False
 99 | 
100 | # The name of the Pygments (syntax highlighting) style to use.
101 | pygments_style = 'sphinx'
102 | 
103 | # A list of ignored prefixes for module index sorting.
104 | #modindex_common_prefix = []
105 | 
106 | 
107 | # -- Options for HTML output ---------------------------------------------------
108 | 
109 | # The theme to use for HTML and HTML Help pages.
110 | html_theme = 'alabaster'
111 | 
112 | # Theme options are theme-specific and customize the look and feel of a theme
113 | # further.  For a list of options available for each theme, see the
114 | # documentation.
115 | #html_theme_options = {}
116 | 
117 | # Add any paths that contain custom themes here, relative to this directory.
118 | #html_theme_path = []
119 | 
120 | # The name for this set of Sphinx documents.  If None, it defaults to
121 | # "<project> v<release> documentation".
122 | #html_title = None
123 | 
124 | # A shorter title for the navigation bar.  Default is the same as html_title.
125 | #html_short_title = None
126 | 
127 | # The name of an image file (relative to this directory) to place at the top
128 | # of the sidebar.
129 | #html_logo = None
130 | 
131 | # The name of an image file (within the static path) to use as favicon of the
132 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
133 | # pixels large.
134 | #html_favicon = None
135 | 
136 | # Add any paths that contain custom static files (such as style sheets) here,
137 | # relative to this directory. They are copied after the builtin static files,
138 | # so a file named "default.css" will overwrite the builtin "default.css".
139 | #html_static_path = ['_static']
140 | html_static_path = []
141 | 
142 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
143 | # using the given strftime format.
144 | #html_last_updated_fmt = '%b %d, %Y'
145 | 
146 | # If true, SmartyPants will be used to convert quotes and dashes to
147 | # typographically correct entities.
148 | #html_use_smartypants = True
149 | 
150 | # Custom sidebar templates, maps document names to template names.
151 | #html_sidebars = {}
152 | 
153 | # Additional templates that should be rendered to pages, maps page names to
154 | # template names.
155 | #html_additional_pages = {}
156 | 
157 | # If false, no module index is generated.
158 | #html_use_modindex = True
159 | 
160 | # If false, no index is generated.
161 | #html_use_index = True
162 | 
163 | # If true, the index is split into individual pages for each letter.
164 | #html_split_index = False
165 | 
166 | # If true, links to the reST sources are added to the pages.
167 | #html_show_sourcelink = True
168 | 
169 | # If true, an OpenSearch description file will be output, and all pages will
170 | # contain a <link> tag referring to it.  The value of this option must be the
171 | # base URL from which the finished HTML is served.
172 | #html_use_opensearch = ''
173 | 
174 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
175 | #html_file_suffix = ''
176 | 
177 | # Output file base name for HTML help builder.
178 | htmlhelp_basename = 'DataArrayDocsdoc'
179 | 
180 | 
181 | # -- Options for LaTeX output --------------------------------------------------
182 | 
183 | # The paper size ('letter' or 'a4').
184 | #latex_paper_size = 'letter'
185 | 
186 | # The font size ('10pt', '11pt' or '12pt').
187 | #latex_font_size = '10pt'
188 | 
189 | # Grouping the document tree into LaTeX files. List of tuples
190 | # (source start file, target name, title, author, documentclass [howto/manual]).
191 | latex_documents = [
192 |   ('index', 'DataArrayDocs.tex', u'DataArray Docs Documentation',
193 |    u'Mike Trumpis, Fernando Pérez, Kilian Koepseel', 'manual'),
194 | ]
195 | 
196 | # The name of an image file (relative to this directory) to place at the top of
197 | # the title page.
198 | #latex_logo = None
199 | 
200 | # For "manual" documents, if this is true, then toplevel headings are parts,
201 | # not chapters.
202 | #latex_use_parts = False
203 | 
204 | # Additional stuff for the LaTeX preamble.
205 | #latex_preamble = ''
206 | 
207 | # Documents to append as an appendix to all manuals.
208 | #latex_appendices = []
209 | 
210 | # If false, no module index is generated.
211 | #latex_use_modindex = True
212 | 
213 | 
214 | # Example configuration for intersphinx: refer to the Python standard library.
215 | intersphinx_mapping = {'http://docs.python.org/': None}
216 | 


--------------------------------------------------------------------------------
/doc/source/design/array_axes.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:xlink="http://www.w3.org/1999/xlink"
 11 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 12 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 13 |    width="744.09448819"
 14 |    height="1052.3622047"
 15 |    id="svg2"
 16 |    version="1.1"
 17 |    inkscape:version="0.47 r22583"
 18 |    sodipodi:docname="New document 1">
 19 |   <defs
 20 |      id="defs4">
 21 |     <marker
 22 |        inkscape:stockid="Arrow2Lend"
 23 |        orient="auto"
 24 |        refY="0.0"
 25 |        refX="0.0"
 26 |        id="Arrow2Lend"
 27 |        style="overflow:visible;">
 28 |       <path
 29 |          id="path3719"
 30 |          style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
 31 |          d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
 32 |          transform="scale(1.1) rotate(180) translate(1,0)" />
 33 |     </marker>
 34 |     <marker
 35 |        inkscape:stockid="Arrow2Mend"
 36 |        orient="auto"
 37 |        refY="0.0"
 38 |        refX="0.0"
 39 |        id="Arrow2Mend"
 40 |        style="overflow:visible;">
 41 |       <path
 42 |          id="path3725"
 43 |          style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
 44 |          d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
 45 |          transform="scale(0.6) rotate(180) translate(0,0)" />
 46 |     </marker>
 47 |     <marker
 48 |        inkscape:stockid="Arrow1Lend"
 49 |        orient="auto"
 50 |        refY="0.0"
 51 |        refX="0.0"
 52 |        id="Arrow1Lend"
 53 |        style="overflow:visible;">
 54 |       <path
 55 |          id="path3701"
 56 |          d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
 57 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
 58 |          transform="scale(0.8) rotate(180) translate(12.5,0)" />
 59 |     </marker>
 60 |     <inkscape:perspective
 61 |        sodipodi:type="inkscape:persp3d"
 62 |        inkscape:vp_x="0 : 526.18109 : 1"
 63 |        inkscape:vp_y="0 : 1000 : 0"
 64 |        inkscape:vp_z="744.09448 : 526.18109 : 1"
 65 |        inkscape:persp3d-origin="372.04724 : 350.78739 : 1"
 66 |        id="perspective10" />
 67 |     <inkscape:perspective
 68 |        id="perspective2905"
 69 |        inkscape:persp3d-origin="0.5 : 0.33333333 : 1"
 70 |        inkscape:vp_z="1 : 0.5 : 1"
 71 |        inkscape:vp_y="0 : 1000 : 0"
 72 |        inkscape:vp_x="0 : 0.5 : 1"
 73 |        sodipodi:type="inkscape:persp3d" />
 74 |   </defs>
 75 |   <sodipodi:namedview
 76 |      id="base"
 77 |      pagecolor="#ffffff"
 78 |      bordercolor="#666666"
 79 |      borderopacity="1.0"
 80 |      inkscape:pageopacity="0.0"
 81 |      inkscape:pageshadow="2"
 82 |      inkscape:zoom="0.35"
 83 |      inkscape:cx="-130.71429"
 84 |      inkscape:cy="520"
 85 |      inkscape:document-units="px"
 86 |      inkscape:current-layer="layer1"
 87 |      showgrid="false"
 88 |      inkscape:snap-global="false"
 89 |      inkscape:window-width="888"
 90 |      inkscape:window-height="829"
 91 |      inkscape:window-x="223"
 92 |      inkscape:window-y="68"
 93 |      inkscape:window-maximized="0" />
 94 |   <metadata
 95 |      id="metadata7">
 96 |     <rdf:RDF>
 97 |       <cc:Work
 98 |          rdf:about="">
 99 |         <dc:format>image/svg+xml</dc:format>
100 |         <dc:type
101 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
102 |         <dc:title></dc:title>
103 |       </cc:Work>
104 |     </rdf:RDF>
105 |   </metadata>
106 |   <g
107 |      inkscape:label="Layer 1"
108 |      inkscape:groupmode="layer"
109 |      id="layer1">
110 |     <image
111 |        y="175.21938"
112 |        x="167.14293"
113 |        id="image2907"
114 |        height="400"
115 |        width="300"
116 |        xlink:href="file:///home/mb312/tmp/array_grid.png" />
117 |     <path
118 |        style="fill:#000000"
119 |        d="m 937.36935,1023.8192 c -1.52816,-1.6886 -1.65499,-16.8847 -1.65499,-198.29999 0,-151.47765 0.27481,-196.74607 1.2,-197.67126 0.92242,-0.92242 35.12111,-1.2 147.84504,-1.2 140.7973,0 146.711,0.0729 148.3,1.82874 1.5281,1.6886 1.655,16.88474 1.655,198.3 0,151.47765 -0.2748,196.74611 -1.2,197.67131 -0.9225,0.9224 -35.1211,1.2 -147.845,1.2 -140.79736,0 -146.71106,-0.073 -148.30005,-1.8288 z m 93.34505,-50.17125 0,-43 -43.00004,0 -43,0 0,43 0,43.00005 43,0 43.00004,0 0,-43.00005 z m 101,0 0,-43 -46,0 -46,0 0,43 0,43.00005 46,0 46,0 0,-43.00005 z m 94,0 0,-43 -42.5,0 -42.5,0 0,43 0,43.00005 42.5,0 42.5,0 0,-43.00005 z m -195,-97 0,-45 -43.00004,0 -43,0 0,45 0,45 43,0 43.00004,0 0,-45 z m 101,0 0,-45 -46,0 -46,0 0,45 0,45 46,0 46,0 0,-45 z m 94,0 0,-45 -42.5,0 -42.5,0 0,45 0,45 42.5,0 42.5,0 0,-45 z m -195,-100 0,-46 -43.00004,0 -43,0 0,46 0,46 43,0 43.00004,0 0,-46 z m 101,0 0,-46 -46,0 -46,0 0,46 0,46 46,0 46,0 0,-46 z m 94,0 0,-46 -42.5,0 -42.5,0 0,46 0,46 42.5,0 42.5,0 0,-46 z m -195,-98 0,-43 -43.00004,0 -43,0 0,43 0,43 43,0 43.00004,0 0,-43 z m 101,0 0,-43 -46,0 -46,0 0,43 0,43 46,0 46,0 0,-43 z m 94,0 0,-43 -42.5,0 -42.5,0 0,43 0,43 42.5,0 42.5,0 0,-43 z"
120 |        id="path2913" />
121 |     <path
122 |        style="fill:none;stroke:#000000;stroke-width:5;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Mend);stroke-miterlimit:4;stroke-dasharray:none"
123 |        d="M 100,129.50504 97.142857,638.07647"
124 |        id="path2919"
125 |        inkscape:connector-type="polyline" />
126 |     <path
127 |        style="fill:none;stroke:#000000;stroke-width:5;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Mend);stroke-miterlimit:4;stroke-dasharray:none"
128 |        d="m 122.85714,92.362183 377.14286,0"
129 |        id="path2921"
130 |        inkscape:connector-type="polyline" />
131 |   </g>
132 | </svg>
133 | 


--------------------------------------------------------------------------------
/doc/source/design/design.rst:
--------------------------------------------------------------------------------
  1 | ==============================
  2 |  DataArray: some design notes
  3 | ==============================
  4 | 
  5 | A DataArray is a subclass of the basic Numpy ndarray object that provides an
  6 | explicit mechanism for attaching information to the *axes* of the underlying
  7 | numpy array.  This is achieved by attaching an Axis object to each dimension of
  8 | the array; an Axis object has an optional *name* as well as optional *labels*
  9 | (think of them as tick labels in a figure).
 10 | 
 11 | With Axis objects attached to an array, it becomes possible to manipulate the
 12 | array by named axis, to slice an axis by named label, etc.  These features
 13 | complement the rich semantics that numpy has for the *contents* of an array,
 14 | encapsulated its dtype machinery for structured/record arrays.
 15 | 
 16 | Arrays with named / labeled axes
 17 | ================================
 18 | 
 19 | ndarrays extended to have an explicit "hypercross" of axes, each with
 20 | names (possibly defaulted). 
 21 | 
 22 | * for methods in which an "axis" is denoted, an axis name may be used
 23 | 
 24 | * indexing/slicing along a named axis returns that slicing, at that axis,
 25 |   along with slice(None) slicing along all other axes    
 26 | 
 27 | * for all arithmetic/binary-op matters under which dimension numbers and
 28 |   lengths must match, also the hypercrosses must be consistent
 29 | 
 30 | * broadcasting will "inherit" labels from the super-hyper-cross
 31 |   (see np.broadcast)
 32 | 
 33 | * padding dimensions will insert "dummy" dimensions, eg::
 34 | 
 35 |    a = datarray( np.random.randn(10,10), ('time', 'temp') )
 36 |    a[:,None,:].axes --> ('time', None, 'temp') 
 37 | 
 38 | * axes may be transposed
 39 | 
 40 | Arrays with named axes, whose named axes have ticks
 41 | ===================================================
 42 | 
 43 | each named axis has tick labels
 44 | 
 45 | * numpy, fancy and slice-like indexing on each axis::
 46 | 
 47 |    x.named_axis[...]
 48 |    --> does any kind of numpy indexing on the axis
 49 |    x.named_axis.at( *args )
 50 |    --> returns essentially "fancy" indexing along the axis, at valid ticks in args
 51 |    x.named_axis.t_slice( start, stop, [step])
 52 |    --> where arguments are valid ticks, performs a slicing-like operation along the axis
 53 | 
 54 | * mixed indexing on the array::
 55 | 
 56 |    x.at( *args )
 57 |    --> len(args) <= x.ndim -- for each indexing spec in args, perform that indexing
 58 |           on the enumerated axes
 59 |    x.t_slice( *args )
 60 |    --> same as above, but perform t_slice slicing on the enumerated axes
 61 | 
 62 | (my thoughts on) What Is The DataArray?
 63 | =======================================
 64 | 
 65 | * 1st and foremost, **an ndarray**, in N dimensions, with any dtype
 66 | * has means to locate data more descriptively (IE, with custom names
 67 |   for dimensions/axes, and custom names for indices along any axis)
 68 | 
 69 | ::
 70 | 
 71 |   >>> darr = DataArray(np.random.randn(2,3,4), ('ex', 'why', 'zee'))
 72 |   >>> darr.sum(axis='ex')
 73 |   DataArray([[-0.39052695, -2.07493873,  1.19664474,  0.36681094],
 74 | 	 [-1.04287781,  0.5767191 , -0.35425298,  1.10468356],
 75 | 	 [ 0.08331866, -0.36532857,  0.12905265, -1.94559672]])
 76 |   ('why', 'zee')
 77 |   >>> for subarr in darr.axis.why:
 78 |   ...     print subarr.shape, subarr.labels
 79 |   ... 
 80 |   (2, 4) ('ex', 'zee')
 81 |   (2, 4) ('ex', 'zee')
 82 |   (2, 4) ('ex', 'zee')
 83 | 
 84 | * An axis "label" can always stand in for an axis number; an index
 85 |   "tick" can (in some TBD sense) stand in for an integer index
 86 | * if anything is **more restrictive** in operations, for example
 87 | 
 88 | ::
 89 | 
 90 |   >>> ndarr_ones = np.ones((10,10,10))
 91 |   >>> ndarr_twos = np.ones((10,10,10))*2
 92 |   >>> ndarr_3s = ndarr_ones + ndarr_twos # OK!
 93 |   >>> darr_abc = DataArray(ndarr_ones, ('a', 'b', 'c'))
 94 |   >>> darr_bac = DataArray(ndarr_twos, ('b', 'a', 'c'))
 95 |   >>> darr_wtf = darr_abc + darr_bac # BAD! frames are rotated
 96 | 
 97 | (and my very own thoughts on) What The DataArray Is Not
 98 | =======================================================
 99 | 
100 | Unions And Intersections
101 | ------------------------
102 | 
103 | DataArray may broadcast with certain union rules for adapting
104 | metadata, but it does not do any data union/intersection rule for
105 | operations. For example, the result of adding an array with axes ('a', 'c') with an
106 | array with axis 'c' takes on information from the "superset" of
107 | axes. This is analogous to ndarray taking on shape information from
108 | the superset of shapes.
109 | 
110 | ::
111 | 
112 |   >>> darr_abc[:,0,:]
113 |   DataArray([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
114 | 	 ...
115 | 	 [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]])
116 |   ('a', 'c')
117 |   >>> darr_bac[0,0]
118 |   DataArray([ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.])
119 |   ('c',)
120 |   >>> darr_abc[:,0,:] + darr_bac[0,0]
121 |   DataArray([[ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
122 | 	 ...
123 | 	 [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.]])
124 |   ('a', 'c')
125 | 
126 | But it will not fill or trim any dimension to fit the shape of a
127 | fellow operand's array (it seems this violation is simply caught at the C-level of an ndarray)::
128 | 
129 |   >>> darr_abc[:,0,:] + darr_bac[0,0,:5]
130 |   ------------------------------------------------------------
131 |   Traceback (most recent call last):
132 |     File "<ipython console>", line 1, in <module>
133 |   ValueError: shape mismatch: objects cannot be broadcast to a single shape
134 | 
135 | For me, this looks like the **domain of utility functions** (or
136 | possibly utility methods that yield new DataArrays).
137 | 
138 | Namespace
139 | ---------
140 | 
141 | It would be good practice to keep all the dynamically generated
142 | DataArray attributes (eg, Axis labels) removed from the top-level
143 | array attribute list. This is what we currently have as "axis". 
144 | 
145 | It might(?) be a good idea to put all future special purpose methods
146 | under that object too.
147 | 
148 |    
149 | Lessons Learned
150 | ===============
151 | 
152 | "Smart" Indexing
153 | ----------------
154 | 
155 | The smart indexing implemented by Larry is very full featured. I believe the
156 | design of using lists to separating labels from integers in mixed indexing is a
157 | good choice (and necessary). However, I think it illustrates the potential
158 | confusion created by mixed indexing and is a good argument for discouraging/not
159 | allowing it.
160 | 
161 | "Smart" Arithmetic
162 | ------------------
163 | 
164 | * Larry makes attempts to align its arrays when performing arithmetic, so as to
165 |   operate on identical coordinates.
166 | * It also might introduce intersections between arrays. 
167 | * It does not broadcast
168 | 
169 | Ideas
170 | =====
171 | 
172 | Axis Slicing
173 | ------------
174 | 
175 | Use Case: chained axis slicing
176 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177 | 
178 | slicing on an axis returns a new DataArray::
179 | 
180 |   arr = DataArray(np.random.randn(10,10), labels=('time', 'freq'))
181 |   arr.axis.time[:5] --> new DataArray with (time, freq) axes
182 | 
183 | However, slicing on the special slicing object "aix" returns a new Special
184 | Tuple (stuple). 
185 | 
186 | Stuple:
187 | 
188 | * is len-N, for ND arrays
189 | * only one entry is (potentially) not ``slice(None)``
190 | * has knowledge of its own index
191 | * has knowledge of other axes (static or dynamically generated attributes)
192 | * can be composed with other stuples in a special way (??) --
193 | 
194 | ::
195 | 
196 |   s1 --> ( slice(0,4), slice(None) )
197 |   s2 --> ( slice(None), slice(3,10) )
198 |   s1 <compose> s2 --> ( slice(0,4), slice(3,10) )
199 | 
200 | * can be given a "parent" stuple when constructed, into which the new stuple
201 |   merges its own slicing in ``__getitem__``
202 | 
203 | Constructor prototype::
204 | 
205 |   def __init__(self, *args, parent=None, index=None, name=None) ??
206 | 
207 | To chain slicing, the syntax would be like this::
208 | 
209 |   arr.aix.time[:4].freq[3:8]
210 |   --OR--
211 |   arr[ arr.aix.time[:4].freq[3:8] ]
212 | 
213 | Chaining an axis on itself **will not** be implemented yet (possibly ever)::
214 | 
215 |   arr.aix.time[:4].time[:2] --> raise error
216 | 
217 | 
218 | ============================================
219 |  The May 2011 DataArray summit at Enthought
220 | ============================================
221 | 
222 | How to handle datarray indexing
223 | ===============================
224 | 
225 | This document is a summary of the syntax and semantics that was agreed upon at
226 | the Data Array summit held at Enthought in May 2011.
227 | 
228 | The DataArray object will have a .axes attribute which exhibits the following
229 | behaviour::
230 | 
231 |     >>> a = DataArray( ..., axes=('date', ('stocks', ('aapl', 'ibm' 'goog', 'msft')), 'metric'))
232 |     
233 |     # get the axis object
234 |     >>> a.axes.stocks
235 |     
236 |     # the same as a[:,0:2,:]
237 |     >>> a.axes.stocks['aapl':'goog']
238 |     
239 |     # get the nth axis object (particularly if not named)
240 |     >>> a.axes[n]
241 |     
242 |     # get an "axes indexer" object for the indicated objects.
243 |     >>> a.axes('stocks', 'date')
244 | 
245 | This indexer object returns something that is meant to be indexed with as many
246 | dimensions as it was passed arguments, but that will, upon indexing, return
247 | arrays with dimensions ordered just like the original underlying array.
248 |     
249 | The information that is all available at the point where you are constructing
250 | the slicer, so you don't need to go rummaging around the code to find the
251 | correct order of the axes from where the array was originally defined.  It also
252 | potentially permits you to use underlying arrays with different axis orders in
253 | the same code unambiguously.
254 | 
255 | There was also the thought that with numerical arguments that this would fill a
256 | hole in the current numpy API for arbitrary re-ordering of axes in a view for
257 | slicing (essentially a super-generalized transpose-ish sort of thing)
258 | 
259 | The result of the slicing operation retains the original ordering, but the
260 | slices provided to a.axes()[] need to match the order of the arguments to
261 | a.axes. So in other words, when you do::
262 | 
263 |     >>> tslicer = a.axes('t')
264 | 
265 | then::
266 | 
267 |     >>> tslicer['a':'z']
268 | 
269 | returns an array with axes x, y, z, t in that order, but sliced as::
270 | 
271 |     a[:,:,:,'a':'z'] 
272 | 
273 | When you have::
274 | 
275 |     xyslicer = a.axes('x', 'y')
276 |     yxslicer = a.axes('y', 'x')
277 | 
278 | then I would expect to do::
279 | 
280 |     xyslicer[x1:x2, y1:y2]
281 | 
282 | but::
283 | 
284 |     yxslicer[y1:y2, x1:x2]
285 | 
286 | However, these are two equivalent ways of writing ``a[x1:x2, y1:y2, :, :]``.
287 | If explicit transposition of the returned data is desired, it can be done
288 | with::
289 | 
290 |     >>> a.transpose('stocks','date').axes('stocks','date')[...]
291 | 
292 |     # Now, actually do the slicing: equivalent to a[100, 0:2, :]
293 |     >>> a.axes('stocks', 'date')['aapl':'goog',100]
294 |     
295 |     # can supply an axis number as well
296 |     >>> a.axes(1, 'date')['aapl':'goog',100:200]
297 | 
298 | In addition axes can have the notion of a index mapper which allows indexing and
299 | slicing by labels or values other than strings and integers.  To use these, you
300 | have to supply a keyword argument to the axes call::
301 |     
302 |     # add a datetime.date -> index map
303 |     >>> date_mapper = DictMapper(...)
304 |     >>> a = DataArray( ..., axes=(('date', date_mapper), ... ))
305 |     
306 |     # do mapped indexing XXX - this might not have been the final decision
307 |     >>> a.axes('stocks', 'date', mapped=True)['aapl':'goog', datetime.date(2011, 1, 1):datetime.date(2011, 5, 14)]
308 | 
309 |     # For mapped indexing
310 |     
311 | The exact semantics of mapping are yet to be determined, but the thought is that
312 | there would be standard mappers to do things like interpolation, mapped integer
313 | indexing.
314 | 
315 | Other notes
316 | -----------
317 | 
318 | * Axis names can only be strings that are valid Python identifiers.
319 | * Labels can only be strings, and must be unique.
320 | * All other indexing cases are handled by mapping (however that will work).
321 | * Axes can have arbitrary aliases which do not have to be unique.
322 | * An axis can have an associated array of the same length as the set of labels
323 |   for additional data storage.
324 | 


--------------------------------------------------------------------------------
/doc/source/design/index.rst:
--------------------------------------------------------------------------------
 1 | ======
 2 | Design
 3 | ======
 4 | 
 5 | Contents:
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 | 
10 |    design.rst
11 |    issues.rst
12 | 
13 | 
14 | Indices and tables
15 | ==================
16 | 
17 | * :ref:`genindex`
18 | * :ref:`modindex`
19 | * :ref:`search`
20 | 
21 | 


--------------------------------------------------------------------------------
/doc/source/design/issues.rst:
--------------------------------------------------------------------------------
  1 | ======================================
  2 |  Issues, open questions and todo list
  3 | ======================================
  4 | 
  5 | Questions and issues about the datarray prototype.
  6 | 
  7 | .. contents::
  8 | 
  9 | 
 10 | Labels
 11 | ======
 12 | 
 13 | Labels are a relatively new addition to datarrays. The labels of a datarrays
 14 | identify the axes of the array. The labels of a datarray identify the elements
 15 | along an axis. Both labels and labels are optional.
 16 | 
 17 | Axis._label_dict is not updated when labels are changed
 18 | -------------------------------------------------------
 19 | 
 20 | Example::
 21 | 
 22 |     >> dar = DataArray([1, 2], [('time', ['A', 'B'])])
 23 |     >> dar.axis.time._label_dict
 24 |        {'A': 0, 'B': 1}
 25 |     >> dar.axis.time.labels[0] = 'X'
 26 |     >> dar.axis.time.labels
 27 |        ['X', 'B']
 28 |     >> dar.axis.time._label_dict
 29 |        {'A': 0, 'B': 1}
 30 | 
 31 | Possible solutions:
 32 | 
 33 | #. Don't allow labels to be changed
 34 | #. Only allow labels to be changed through a method that also updates _label_dict
 35 | #. Don't store _label_dict, create on the fly as needed
 36 | 
 37 | pandas, I believe, makes the labels immutable (#1). larry allows the labels to
 38 | be changed and calculates the mapping dict on the fly (#3).
 39 | 
 40 | 
 41 | Can I have labels without axis names?
 42 | -------------------------------------
 43 | 
 44 | I'd like to use labels without names. At the moment that is not possible::
 45 | 
 46 |     >>> DataArray([1, 2], [(None, ('a', 'b'))])
 47 |     <snip>
 48 |     ValueError: labels only supported when Axis has a name
 49 | 
 50 | Well, it is possible::
 51 | 
 52 |     >>> dar = DataArray([1, 2], [('tmp', ('a', 'b'))])
 53 |     >>> dar.set_name(0, None)
 54 |     >>> dar.axes
 55 |     (Axis(name=None, index=0, labels=('a', 'b')),)
 56 | 
 57 | 
 58 | Add a labels input parameter?
 59 | -----------------------------
 60 | 
 61 | What do you think of adding a ``labels`` parameter to DataArray?
 62 | 
 63 | Current behavior::
 64 | 
 65 |     >>> dar = DataArray([[1, 2], [3, 4]], (('row', ['A','B']), ('col', ['C', 'D'])))
 66 |     >>> dar.axes
 67 |     (Axis(name='row', index=0, labels=['A', 'B']),
 68 |      Axis(name='col', index=1, labels=['C', 'D']))
 69 | 
 70 | Proposed labels as separate input parameter::
 71 | 
 72 |     >>> DataArray([[1, 2], [3, 4]], names=('row', 'col'), labels=[['A', 'B'], ['C', 'D']])
 73 | 
 74 | I think this would make it easier for new users to construct a DataArray with
 75 | labels just from looking at the DataArray signature. It would match the
 76 | signature of Axis. My use case is to use labels only and not names axes (at
 77 | first), so::
 78 | 
 79 |     >>> DataArray([[1, 2], [3, 4]], labels=[['A', 'B'], ['C', 'D']])
 80 | 
 81 | instead of the current::
 82 | 
 83 |     >>> DataArray([[1, 2], [3, 4]], ((None, ['A','B']), (None, ['C', 'D'])))
 84 | 
 85 | It might also cause less typos (parentheses matching) at the command line.
 86 | 
 87 | Having separate names and labels input parameters would also leave the option
 88 | open to allow any hashable object, like a tuple, to be used as a name.
 89 | Currently tuples have a special meaning, the (names, labels) tuple.
 90 | 
 91 | Create Axis._label_dict when needed?
 92 | ------------------------------------
 93 | 
 94 | How about creating Axis._label_dict on the fly when needed (but not saving it)?
 95 | 
 96 | **Pros**
 97 | 
 98 | - Faster datarray creation (it does look like you get _label_dict for free
 99 |   since you need to check that the labels are unique anyway, but set()
100 |   is faster)
101 | - Faster datarray copy
102 | - Use less memory
103 | - Easier to archive
104 | - Simplify Axis
105 | - Prevent user from doing ``dar.axes[0]._label_dict['a'] = 10``
106 | - Catches (on calls to ``make_slice`` and ``keep``) user mischief like
107 |   dar.axes[0].labels = ('a', 'a')
108 | - No need to update Axis._label_dict when user changes labels
109 | 
110 | **Cons**
111 | 
112 | - Slower ``make_slice``
113 | - Slower ``keep``
114 | 
115 | 
116 | Axis, axes
117 | ==========
118 | 
119 | Datarrays were created from the need to name the axes of a numpy array.
120 | 
121 | datarray1 + datarrat2 = which axes?
122 | -----------------------------------
123 | 
124 | Which axes are returned by binary operations?
125 | 
126 | Make two datarrays::
127 | 
128 |     >> dar1 = DataArray([1, 2], [('time', ['A1', 'B1'])])
129 |     >> dar2 = DataArray([1, 2], [('time', ['A2', 'B2'])])
130 | 
131 | ``dar1`` on the left-hand side::
132 | 
133 |     >> dar12 = dar1 + dar2
134 |     >> dar12.axes
135 |        (Axis(name='time', index=0, labels=['A1', 'B1']),)
136 | 
137 | ``dar1`` on the right-hand side::
138 | 
139 |     >> dar21 = dar2 + dar1
140 |     >> dar21.axes
141 |        (Axis(name='time', index=0, labels=['A2', 'B2']),)
142 | 
143 | So a binary operation returns the axes from the left-hand side? No. Seems the
144 | left most non-None axes are used::
145 | 
146 |     >> dar3 = DataArray([1, 2])
147 |     >> dar31 = dar3 + dar1
148 |     >> dar31.axes
149 |        (Axis(name='time', index=0, labels=['A1', 'B1']),)
150 | 
151 | So binary operation may returns parts of both axes::
152 | 
153 |     >> dar1 = DataArray([[1, 2], [3, 4]], [None, ('col', ['A', 'B'])])
154 |     >> dar2 = DataArray([[1, 2], [3, 4]], [('row', ['a', 'b']), None])
155 |     >> dar12 = dar1 + dar2
156 |     >> dar12.axes
157 | 
158 |     (Axis(name='row', index=0, labels=['a', 'b']),
159 |      Axis(name='col', index=1, labels=['A', 'B']))
160 | 
161 | Is that the intended behavior?
162 | 
163 | Why does Axis.__eq__ require the index to be equal?
164 | ---------------------------------------------------
165 | 
166 | Example::
167 | 
168 |     >> dar1 = DataArray([[1, 2], [3, 4]], [('row', ['r0', 'r1']), ('col', ['c0', 'c1'])])
169 |     >> dar2 = DataArray([[1, 2], [3, 4]], [('col', ['c0', 'c1']), ('row', ['r0', 'r1'])])
170 |     >> dar1.axes[0] == dar2.axes[1]
171 |        False
172 | 
173 | Axis, axis, axes
174 | ----------------
175 | 
176 | The functions, classes, and methods that take care of axes are:
177 | 
178 | - Axis (class)
179 | - DataArray.axis (meth)
180 | - DataArray.axes (meth)
181 | - _reordered_axes (func)
182 | - _expand_ellipsis (func)
183 | - _make_singleton_axes (func)
184 | 
185 | I find having both DataArray.axis and DataArray.axes confusing at first. I
186 | wonder if it would simplify things if there was only:
187 | 
188 | - Axes (class)
189 | - Data.axes (instance of Axes)
190 | 
191 | That would consolidate everything in the Axes class. For example, in
192 | DataArray.__getitem__ this::
193 | 
194 |     if isinstance(key, tuple):
195 |         old_shape = self.shape
196 |         old_axes = self.axes
197 |         new_shape, new_axes, key = _make_singleton_axes(self, key)
198 |         # Will undo this later
199 |         self.shape = new_shape
200 |         _set_axes(self, new_axes)
201 |         # data is accessed recursively, starting with
202 |         # the full array
203 |         arr = self
204 | 
205 |         # We must copy of the names of the axes
206 |         # before looping through the elements of key,
207 |         # as the index of a given axis may change.
208 |         names = [a.name for a in self.axes]
209 | 
210 |         # If an Axis gets sliced out entirely, then any following
211 |         # unnamed Axis in the array will spontaneously change name.
212 |         # So anticipate the name change here.
213 |         reduction = 0
214 |         adjustments = []
215 |         for k in key:
216 |             adjustments.append(reduction)
217 |             if not isinstance(k, slice):
218 |                 # reduce the idx # on the remaining default names
219 |                 reduction -= 1
220 | 
221 |         names = [n if a.name else '_%d'%(a.index+r)
222 |                     for n, a, r in zip(names, self.axes, adjustments)]
223 | 
224 |         for slice_or_int, name in zip(key, names):
225 |             arr = arr.axis[name][slice_or_int]
226 | 
227 |         # restore old shape and axes
228 |         self.shape = old_shape
229 |         _set_axes(self, old_axes)
230 | 
231 | could be replaced with::
232 | 
233 |     if isinstance(key, tuple):
234 |         self.axes = self.axes[key]
235 | 
236 | So it would pull out the axes logic from DataArray and place it in Axes.
237 | 
238 | Should DataArray.axes be a list instead of a tuple?
239 | ---------------------------------------------------
240 | 
241 | Why not make DataArray.axes a list instead of a tuple? Then user can replace
242 | an axis from one datarray to another, can pop an Axis, etc.
243 | 
244 | 
245 | Can axis names be anything besides None or str?
246 | -----------------------------------------------
247 | 
248 | from http://projects.scipy.org/numpy/wiki/NdarrayWithNamedAxes: "Axis names
249 | (the name of a dimension) must be valid Python identifiers." I don't know
250 | what that means.
251 | 
252 | It would be nice if axis names could be anything hashable like str,
253 | datetime.date(), int, tuple.
254 | 
255 | But names must be strings to do indexing like this::
256 | 
257 |     >>> dar = DataArray([[1, 2], [3, 4]], (('row', ['A','B']), ('col', ['C', 'D'])))
258 |     >>> dar.axis.row['A']
259 |     DataArray([1, 2])
260 |     ('col',)
261 | 
262 | One way to make it work would be to rewrite the above as::
263 | 
264 |     >>> dar.axis['row']['A']
265 |     DataArray([1, 2])
266 |     ('col',)
267 | 
268 | which would also make it easier to loop through the axes by name::
269 | 
270 |     >>> for axisname in ['row', col']:
271 |    ....:    dar.axis[axisname][idx]
272 |    ....:    ...
273 | 
274 | 
275 | Performance
276 | ===========
277 | 
278 | Performance is not the primary concern during the prototype phase of datarray.
279 | But some attention to performance issue will help guide the development of
280 | datarrays.
281 | 
282 | How long does it take to create a datarray?
283 | -------------------------------------------
284 | 
285 | Set up data::
286 | 
287 |     >> import numpy as np
288 |     >> N = 100
289 |     >> arr = np.random.rand(N, N)
290 |     >> idx1 = map(str, range(N))
291 |     >> idx2 = map(str, range(N))
292 | 
293 | Time the creation of a datarray::
294 | 
295 |     >> from datarray import DataArray
296 |     >> import datarray
297 |     >> names = [('row', idx1), ('col', idx2)]
298 |     >> timeit datarray.DataArray(arr, names)
299 |     1000 loops, best of 3: 160 us per loop
300 | 
301 | Time the creation of a pandas DataMatrix. A DataMatrix it is also a subclass
302 | of numpy's ndarray, but it has been optimized so should be a proxy for how
303 | fast a datarray can become::
304 | 
305 |     >> import pandas
306 |     >> timeit pandas.DataMatrix(arr, idx1, idx2)
307 |     10000 loops, best of 3: 50.7 us per loop
308 | 
309 | larry is not a subclass of numpy's ndarray, I think that is one reason it is
310 | faster to create::
311 | 
312 |     >> import la
313 |     >> name = [idx1, idx2]
314 |     >> timeit la.larry(arr, name)
315 |     100000 loops, best of 3: 13.5 us per loop
316 |     >> timeit la.larry(arr, name, integrity=False)
317 |     1000000 loops, best of 3: 1.25 us per loop
318 | 
319 | Also both datarray and DataMatrix make a mapping dictionary when the data
320 | object is created---that takes time. larry makes a mapping dictionary on the
321 | fly, when needed.
322 | 
323 | Why is the time to create a datarray important? Because even an operation as
324 | simple as ``dar1 + dar2`` creates a datarray.
325 | 
326 | Direct access to array?
327 | -----------------------
328 | 
329 | Names and labels add overhead. Sometimes, after aligning my datarrays, I would
330 | like to work directly with the numpy arrays. Is there a way to do that with
331 | datarrays?
332 | 
333 | For example, with a named array, larry_, the underlying numpy array is always
334 | accessible as the attribute ``x``::
335 | 
336 |     >>> import la
337 |     >>> lar = la.larry([1, 2, 3])
338 |     >>> lar.x
339 |     array([1, 2, 3])
340 |     >>> lar.x = myfunc(lar.x)
341 | 
342 | .. _larry: http://github.com/kwgoodman/la
343 |     
344 | This might be one solution (base)::
345 | 
346 |     >> from datarray import DataArray
347 |     >> x = DataArray([[1,2],[3,4]], [('row', ['r1', 'r2']), ('col', ['c1', 'c2'])])
348 |     >> timeit x + x
349 |     10000 loops, best of 3: 61.4 us per loop
350 |     >> timeit x.base + x.base
351 |     100000 loops, best of 3: 2.16 us per loop
352 | 
353 | and::
354 | 
355 |     >> x = DataArray([1, 2])
356 |     >> x.base[0] = 9
357 |     >> x
358 | 
359 |     DataArray([9, 2])
360 |     (None,)
361 | 
362 | But base is not guaranteed to be a view. What's another solution? Could create
363 | an attribute at init time, but that slows down init.
364 | 
365 | 
366 | Alignment
367 | =========
368 | 
369 | Datarray may not handle alignment directly. But some users of datarrays would
370 | like an easy way to align datarrays.
371 | 
372 | Support for alignment?
373 | ----------------------
374 | 
375 | Will datarray provide any support for those who want binary operations between
376 | two datarrays to join names or labels using various join methods?
377 | 
378 | `A use case <http://larry.sourceforge.net/work.html#alignment>`_ from larry_:
379 | 
380 | By default, binary operations between two larrys use an inner join of the
381 | names (the intersection of the names)::
382 | 
383 |     >>> lar1 = larry([1, 2])
384 |     >>> lar2 = larry([1, 2, 3])
385 |     >>> lar1 + lar2
386 |     name_0
387 |         0
388 |         1
389 |     x
390 |     array([2, 4])
391 | 
392 | The sum of two larrys using an outer join (union of the names)::
393 | 
394 |     >>> la.add(lar1, lar2, join='outer')
395 |     name_0
396 |         0
397 |         1
398 |         2
399 |     x
400 |     array([  2.,   4.,  NaN])
401 | 
402 | The available join methods are inner, outer, left, right, and list. If the
403 | join method is specified as a list then the first element in the list is the
404 | join method for axis=0, the second element is the join method for axis=1, and
405 | so on.
406 | 
407 | How can datarrays be aligned?
408 | -----------------------------
409 | 
410 | What's an outer join (or inner, left, right) along an axis of two datarrays if
411 | one datarray has labels and the other doesn't?
412 | 
413 | Background:
414 | 
415 | It is often useful to align two datarrays before performing binary operations
416 | such as +, -, \*, /. Two datarrays are aligned when both datarrays have the same
417 | names and labels along all axes.
418 | 
419 | Aligned::
420 | 
421 |     >> dar1 = DataArray([1, 2])
422 |     >> dar2 = DataArray([3, 4])
423 |     >> dar1.axes == dar2.axes
424 |        True
425 | 
426 | Unaligned::
427 | 
428 |     >> dar1 = DataArray([1, 2], names=("time",))
429 |     >> dar2 = DataArray([3, 4], names=("distance",))
430 |     >> dar1.axes == dar2.axes
431 |        False
432 | 
433 | Unaligned but returns aligned since Axis.__eq__ doesn't (yet) check for
434 | equality of labels::
435 | 
436 |     >> dar1 = DataArray([1, 2], names=[("time", ['A', 'B'])])
437 |     >> dar2 = DataArray([1, 2], names=[("time", ['A', 'different'])])
438 |     >> dar1.axes == dar2.axes
439 |        True
440 | 
441 | Let's say we make an add function with user control of the join method::
442 | 
443 |     >>> add(dar1, dar2, join='outer')
444 | 
445 | Since datarray allows empty axis names (None) and labels (None), what does an
446 | outer join mean if dar1 has labels but dar2 doesn't::
447 | 
448 |     >>> dar1 = DataArray([1, 2], names=[("time", ['A', 'B'])])
449 |     >>> dar2 = DataArray([1, 2], names=[("time",)])
450 | 
451 | What would the following return?
452 | ::
453 | 
454 |     >>> add(dar1, dar2, join='outer')
455 | 
456 | larry requires all axes to have labels, if none are given then the labels default
457 | to range(n).
458 | 
459 | datarray.reshape
460 | ----------------
461 | 
462 | Reshape operations scramble names and labels. Some numpy functions and
463 | array methods use reshape. Should reshape convert a datarray to an array?
464 | 
465 | Looks like datarray will need unit tests for every numpy function and array
466 | method.
467 | 
468 | 
469 | Misc
470 | ====
471 | 
472 | Miscellaneous observation on datarrays.
473 | 
474 | How do I save a datarray in HDF5 using h5py?
475 | --------------------------------------------
476 | 
477 | `h5py <http://h5py.alfven.org>`_, which stores data in HDF5 format, can only
478 | save numpy arrays.
479 | 
480 | What are the parts of a datarray that need to be saved? And can they be stored
481 | as numpy arrays?
482 | 
483 | A datarray can be broken down to the following components:
484 | 
485 | - data (store directly as numpy array)
486 | - names (store as object array since it contains None and str and covert
487 |   back on load?)
488 | - labels (each axis stored as numpy array with axis number stored as HDF5
489 |   Dataset attribute, but then labels along any one axis must be homogeneous
490 |   in dtype)
491 | - Dictionary of label index mappings (ignore, recreate on load)
492 | 
493 | (I need to write a function that saves an Axis object to HDF5.)
494 | 
495 | If I don't save Axis._label_dict, would I have to worry about a user changing
496 | the mapping?
497 | ::
498 | 
499 |     >>> dar.axes[0]
500 |     Axis(name='one', index=0, labels=('a', 'b'))
501 |     >>> dar.axes[0]._label_dict
502 |     {'a': 0, 'b': 1}
503 |     >>> dar.axes[0]._label_dict['a'] = 10
504 |     >>> dar.axes[0]._label_dict
505 |     {'a': 10, 'b': 1}
506 | 
507 | 
508 | Can names and labels be changed?
509 | --------------------------------
510 | 
511 | Labels can be changed::
512 | 
513 |     >>> dar = DataArray([1, 2], [('row', ['A','B'])])
514 |     >>> dar.axes
515 |     (Axis(name='row', index=0, labels=['A', 'B']),)
516 |     >>> dar.axes[0].labels[0] = 'CHANGED'
517 |     >>> dar.axes
518 |     (Axis(name='row', index=0, labels=['CHANGED', 'B']),)
519 | 
520 | But Axis._label_dict is not updated when user changes labels.
521 | 
522 | And so can names::
523 | 
524 |     >>> dar.set_name(0, 'new name')
525 |     >>> dar
526 |     DataArray([1, 2])
527 |     ('new name',)
528 | 
529 | Fancy Indexing
530 | --------------
531 | 
532 | It's not implemented at all yet.
533 | 
534 | .. _name_updates:
535 | 
536 | Changing Names on DataArrays
537 | =============================
538 | 
539 | Tricky Attributes
540 | -----------------
541 | 
542 | * .names -- currently a mutable list of Axis.name attributes
543 | * .axes -- currently a mutable list of Axis objects
544 | * .axis -- a key-to-attribute dictionary
545 | 
546 | Need an event-ful way to change an Axis's label, such that all the above
547 | attributes are updated.
548 | 
549 | **Proposed solution**: 
550 | 
551 | 1. use a set_label() method. This will consequently update the parent array's 
552 |     (names, axes, axis) attributes. 
553 | 2. make the mutable lists into *tuples* to deny write access.
554 | 3. make the KeyStruct ``.axis`` have write-once access 
555 | 
556 | .. _todo:
557 | 
558 | ToDo
559 | ====
560 | 
561 | * Support DataArray instances with mixed axes: simple ones with no values 
562 |   and 'fancy' ones with data in them.  Syntax?
563 | 
564 | ``a = DataArray.from_names(data, axes=['a','b','c'])``
565 | 
566 | ``b = DataArray(data, axes=[('a',['1','2','3']), ('b',['one','two']), ('c',['red','black'])])``
567 | 
568 | ``c = DataArray(data, axes=[('a',['1','2','3']), ('b',None), ('c',['red','black'])])``
569 | 
570 | * Can a, b, and c be combined in binary operations, given the different tick
571 |   combinations?
572 | * How to handle complicated reshaping (not flattening or, padding/trimming with
573 |   1s) 
574 | * Units support (Darren's)
575 | * Jagged arrays? Kilian's suggestion.  Drop the base array altogether, and
576 |   access data via the .axis objects alone.
577 | * "Enum dtype", could be useful for event selection.
578 | * "Ordered factors"? Something R supports.
579 | * How many axis classes?
580 | 
581 | * Allowing non-string axis names?
582 | 
583 | - At least they must be hashable...
584 | - Serialization?
585 | 
586 | 
587 | * Allowing multiple names per axis?
588 | 
589 | 
590 | * Rob Speer's proposal for purely top-level, 'magical' attributes?
591 | 
592 | 
593 | * Finish the semantics of .lix indexing, especially with regards to what it
594 |   should do when integer labels are present.
595 | 
596 | * What should a.axis.x[object] do: .lix-style indexing or pure numpy indexing?
597 | 
598 | Indexing semantics possibilities
599 | --------------------------------
600 | 
601 | 1. .lix: Integers always labels.  a.lix[3:10] means labels 3 and 10 MUST exist.
602 | 
603 | 2. .nix: Integers are never treated as labels.
604 | 
605 | 3. .awful_ix: 1, then 2.
606 | 
607 | 
608 | Axis api
609 | --------
610 | If a is an axis from an array: a = x.axis.a
611 | 
612 | - a.at(key): return the slice at that key, with one less dimension than x
613 | - a.keep(keys): join slices for given keys, dims=dims(x)
614 | - a.drop(keys): like keep, but the opposite
615 | 
616 | a[i] valid cases:
617 | 
618 | - i: integer => normal numpy scalar indexing, one less dim than x
619 | - i: slice: numpy view slicing.  same dims as x, must recover the labels 
620 | - i: list/array: numpy fancy indexing, as long as the index list is 1d only.
621 | 


--------------------------------------------------------------------------------
/doc/source/generated/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | =======================================================
 2 | Welcome to the documentation for the Datarray prototype
 3 | =======================================================
 4 | 
 5 | Datarray is a prototype implementation of a numpy ndarray subclass with named
 6 | axes and optionally labeled ticks on said axes.
 7 | 
 8 | The datarray code is being collaboratively developed and hosted at:
 9 | 
10 | http://github.com/BIDS/datarray
11 | 
12 | The documentation for various releases and a current build can be found here:
13 | 
14 | http://bids.github.com/datarray
15 | 
16 | Contents:
17 | 
18 | .. toctree::
19 |    :maxdepth: 1
20 | 
21 |    basic_data_array
22 |    ndarray_methods
23 |    printing
24 |    design/index
25 |    other_projects/index
26 |    license
27 |    API docs <generated/modules>
28 | 
29 | Indices and tables
30 | ==================
31 | 
32 | * :ref:`genindex`
33 | * :ref:`modindex`
34 | * :ref:`search`
35 | 


--------------------------------------------------------------------------------
/doc/source/license.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../../datarray/LICENSE
 2 | 
 3 | Other Licenses
 4 | --------------
 5 | 
 6 | Throughout the writing of datarray we have relied heavily on Pandas, as well as
 7 | using numpydoc.  These are the relevant licenses:
 8 | 
 9 | :doc:`Pandas <licenses/pandas_license>`
10 | 
11 | :doc:`Numpydoc <licenses/numpydoc_license>`
12 | 
13 | .. toctree::
14 |    :hidden:
15 | 
16 |    licenses/numpydoc_license
17 |    licenses/pandas_license
18 | 


--------------------------------------------------------------------------------
/doc/source/licenses/numpydoc_license.rst:
--------------------------------------------------------------------------------
  1 | ==================
  2 |  Numpydoc License
  3 | ==================
  4 | 
  5 | The files:
  6 | 
  7 | - numpydoc.py
  8 | - autosummary.py
  9 | - autosummary_generate.py
 10 | - docscrape.py
 11 | - docscrape_sphinx.py
 12 | - phantom_import.py
 13 | 
 14 | have the following license::
 15 | 
 16 |     Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
 17 | 
 18 |     Redistribution and use in source and binary forms, with or without
 19 |     modification, are permitted provided that the following conditions are
 20 |     met:
 21 | 
 22 |      1. Redistributions of source code must retain the above copyright
 23 | 	notice, this list of conditions and the following disclaimer.
 24 |      2. Redistributions in binary form must reproduce the above copyright
 25 | 	notice, this list of conditions and the following disclaimer in
 26 | 	the documentation and/or other materials provided with the
 27 | 	distribution.
 28 | 
 29 |     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 30 |     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 31 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 32 |     DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 33 |     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 34 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 35 |     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 36 |     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 37 |     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 38 |     IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 39 |     POSSIBILITY OF SUCH DAMAGE.
 40 | 
 41 |     -------------------------------------------------------------------------------
 42 | 	The files
 43 | 	- compiler_unparse.py
 44 | 	- comment_eater.py
 45 | 	- traitsdoc.py
 46 | 	have the following license:
 47 | 
 48 |     This software is OSI Certified Open Source Software.
 49 |     OSI Certified is a certification mark of the Open Source Initiative.
 50 | 
 51 |     Copyright (c) 2006, Enthought, Inc.
 52 |     All rights reserved.
 53 | 
 54 |     Redistribution and use in source and binary forms, with or without
 55 |     modification, are permitted provided that the following conditions are met:
 56 | 
 57 |      * Redistributions of source code must retain the above copyright notice, this
 58 |        list of conditions and the following disclaimer.
 59 |      * Redistributions in binary form must reproduce the above copyright notice,
 60 |        this list of conditions and the following disclaimer in the documentation
 61 |        and/or other materials provided with the distribution.
 62 |      * Neither the name of Enthought, Inc. nor the names of its contributors may
 63 |        be used to endorse or promote products derived from this software without
 64 |        specific prior written permission.
 65 | 
 66 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 67 |     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 68 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 69 |     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 70 |     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 71 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 72 |     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 73 |     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 74 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 75 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 76 | 
 77 | 
 78 |     -------------------------------------------------------------------------------
 79 | 	The files
 80 | 	- only_directives.py
 81 | 	- plot_directive.py
 82 | 	originate from Matplotlib (http://matplotlib.sf.net/) which has
 83 | 	the following license:
 84 | 
 85 |     Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved.
 86 | 
 87 |     1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
 88 | 
 89 |     2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee.
 90 | 
 91 |     3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3.
 92 | 
 93 |     4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
 94 | 
 95 |     5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
 96 | 
 97 |     6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
 98 | 
 99 |     7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
100 | 
101 |     8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
102 | 
103 | 


--------------------------------------------------------------------------------
/doc/source/licenses/pandas_license.rst:
--------------------------------------------------------------------------------
 1 | ====================
 2 |  The Pandas License
 3 | ====================
 4 | 
 5 | ::
 6 | 
 7 |     Copyright (c) 2008-2009 AQR Capital Management, LLC
 8 |     All rights reserved.
 9 | 
10 |     Redistribution and use in source and binary forms, with or without
11 |     modification, are permitted provided that the following conditions are
12 |     met:
13 | 
14 | 	* Redistributions of source code must retain the above copyright
15 | 	   notice, this list of conditions and the following disclaimer.
16 | 
17 | 	* Redistributions in binary form must reproduce the above
18 | 	   copyright notice, this list of conditions and the following
19 | 	   disclaimer in the documentation and/or other materials provided
20 | 	   with the distribution.
21 | 
22 | 	* Neither the name of the copyright holder nor the names of any
23 | 	   contributors may be used to endorse or promote products derived
24 | 	   from this software without specific prior written permission.
25 | 
26 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 |     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 |     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 |     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 |     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 |     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 |     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 |     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 |     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 |     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 | 


--------------------------------------------------------------------------------
/doc/source/ndarray_methods.rst:
--------------------------------------------------------------------------------
  1 | .. testsetup::
  2 | 
  3 |     import numpy as np
  4 | 
  5 | =======
  6 | Methods
  7 | =======
  8 | 
  9 | Here is a list of the ``array`` methods:
 10 | 
 11 | .. we got the method names with
 12 | 
 13 |     >>> a = np.random.randn(3,4)
 14 |     >>> filter(lambda x: type(getattr(a,x))==type(a.min), dir(a))
 15 | 
 16 | * '__array__',
 17 | * :ref:`'__array_prepare__',<explicitly_redef>`
 18 | * :ref:`'__array_wrap__',<explicitly_redef>`
 19 | * '__copy__',
 20 | * '__deepcopy__',
 21 | * :ref:`'__new__',<explicitly_redef>`
 22 | * '__reduce__',
 23 | * '__reduce_ex__',
 24 | * '__setstate__',
 25 | * 'all',
 26 | * 'any',
 27 | * :ref:`'argmax', <wrapped_reduction_special>`
 28 | * :ref:`'argmin', <wrapped_reduction_special>`
 29 | * :ref:`'argsort',<sorting_methods>`
 30 | * 'astype',
 31 | * 'byteswap',
 32 | * :ref:`'choose',<wtf_methods>`
 33 | * 'clip',
 34 | * 'compress',
 35 | * 'conj',
 36 | * 'conjugate',
 37 | * 'copy',
 38 | * :ref:`'cumprod',<incomplete_reductions>`
 39 | * :ref:`'cumsum',<incomplete_reductions>`
 40 | * :ref:`'diagonal',<wtf_methods>`
 41 | * 'dump',
 42 | * 'dumps',
 43 | * 'fill',
 44 | * :ref:`'flatten',<reshaping_methods>`
 45 | * 'getfield',
 46 | * 'item',
 47 | * 'itemset',
 48 | * :ref:`'max',<wrapped_reduction>`
 49 | * :ref:`'mean',<wrapped_reduction>`
 50 | * :ref:`'min',<wrapped_reduction>`
 51 | * 'newbyteorder',
 52 | * 'nonzero',
 53 | * :ref:`'prod',<wrapped_reduction>`
 54 | * :ref:`'ptp',<wrapped_reduction>`
 55 | * 'put',
 56 | * :ref:`'ravel',<reshaping_methods>`
 57 | * :ref:`'repeat',<incomplete_reductions>`
 58 | * :ref:`'reshape',<reshaping_methods>`
 59 | * :ref:`'resize',<reshaping_methods>`
 60 | * 'round',
 61 | * :ref:`'searchsorted',<wtf_methods>`
 62 | * 'setfield',
 63 | * 'setflags',
 64 | * :ref:`'sort',<sorting_methods>`
 65 | * :ref:`'squeeze',<reshaping_methods>`
 66 | * :ref:`'std',<wrapped_reduction>`
 67 | * :ref:`'sum',<wrapped_reduction>`
 68 | * :ref:`'swapaxes',<explicitly_redef>`
 69 | * :ref:`'take',<incomplete_reductions>`
 70 | * 'tofile',
 71 | * 'tolist',
 72 | * 'tostring',
 73 | * 'trace',
 74 | * :ref:`'transpose',<explicitly_redef>`
 75 | * :ref:`'var',<wrapped_reduction>`
 76 | * 'view']
 77 | 
 78 | .. _sorting_methods:
 79 | 
 80 | Sorting
 81 | -------
 82 | 
 83 | sort() and argsort()
 84 | 
 85 | These methods default to sorting the flattened array (returning an
 86 | ndarray). If given an axis keyword, then it is possible to preserve
 87 | the axes meta-data *only if* there are no ticks on the sorted
 88 | Axis. Otherwise, an ndarray is returned.
 89 | 
 90 | .. _explicitly_redef:
 91 | 
 92 | Explicitly overloaded
 93 | ---------------------
 94 | 
 95 | These methods do not fit into a simple pattern, and are explicitly overloaded
 96 | in the DataArray class definition.
 97 | 
 98 | .. _wrapped_reduction:
 99 | 
100 | Regular reductions (eg, min)
101 | ----------------------------
102 | 
103 | These methods are wrapped in a generic runner that pays attention to which axis
104 | is being trimmed out (if only one), and then sets the remaining axes on the
105 | resulting array. It also allows for the translation of Axis-name to Axis-index.
106 | 
107 | .. _wrapped_reduction_special:
108 | 
109 | Special reductions (eg, argmin)
110 | -------------------------------
111 | 
112 | These methods are currently wrapped as a generic reduction. 
113 | 
114 | These methods return an index, or an array of indices into the array in
115 | question. That significantly changes the model of the array in question. Should
116 | the return type here NOT be DataArray?
117 | 
118 | .. _incomplete_reductions:
119 | 
120 | Accumulations
121 | -------------
122 | 
123 | These methods are wrapped in a generic accumulator.
124 | 
125 | These methods have the property of taking an "axis" keyword argument, and yet
126 | not eliminating that axis. They also default to working on the flattened array
127 | if the axis parameter is left unspecified.
128 | 
129 | .. _wtf_methods:
130 | 
131 | Not-applicable methods
132 | ----------------------
133 | 
134 | Possibly N/A methods?
135 | 
136 | .. _reshaping_methods:
137 | 
138 | Reshapes
139 | --------
140 | 
141 | Reshaping is prickly.. I've already implemented certain slicing
142 | mechanisms that can insert unlabeled axes with length-1. This seems
143 | legitimate. Also squeezing out length-1 seems legitimate (**even if
144 | the Axis is labeled?**). 
145 | 
146 | The reshaping currently only trims or pads the array shape with 1s, or
147 | flattens the array entirely (returning an ndarray).
148 | 
149 | 


--------------------------------------------------------------------------------
/doc/source/other_projects/index.rst:
--------------------------------------------------------------------------------
 1 | ================
 2 |  Other projects
 3 | ================
 4 | 
 5 | The following are closely related projects that have heavily influenced the
 6 | design of datarray.  Both Larry and Pandas target slightly higher level
 7 | problems than datarray,  but the intended outcome is for datarray to provide
 8 | a base object on which projects like these can more easily build their
 9 | domain-specific tools with a common foundation.
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    larry_overview.rst
15 |    pandas_overview.rst
16 | 


--------------------------------------------------------------------------------
/doc/source/other_projects/larry_overview.rst:
--------------------------------------------------------------------------------
  1 | ====================
  2 |  Larray (aka Larry)
  3 | ====================
  4 | 
  5 | Overview
  6 | ^^^^^^^^
  7 | 
  8 | Larray offers the notion of "ticks", but the axes themselves are not named. The
  9 | model seems to be something like *data with coordinates*
 10 | 
 11 | Importantly,
 12 | 
 13 | * Pure Python implementation
 14 | * Is **not** an ndarray
 15 | 
 16 |   * therefore, lots of redefined functionality
 17 |   * also lots of presumed intention of data (shuffling labels, group means, ...)
 18 |   * not lightweight
 19 | 
 20 | * Does **not** offer named axes
 21 | * **Only one (class of) dtype!!**
 22 | * Can do n-D
 23 | * Good mixed indexing
 24 | 
 25 | 
 26 | Construction
 27 | ************
 28 | 
 29 | Larrays can be constructed from an array-like object and tick names for each
 30 | axis. Alternatively, Larrays can be constructed from a number of
 31 | data-with-coordinates representations.
 32 | 
 33 | 
 34 | Here's how to create a larry using **fromtuples** (note the cast to float, and
 35 | the filled-in NaN)::
 36 | 
 37 |     >>> data = [('a', 'a', 1), ('a', 'b', 2), ('b', 'a', 3)]
 38 |     >>> larry.fromtuples(data)
 39 |     label_0
 40 | 	a
 41 | 	b
 42 |     label_1
 43 | 	a
 44 | 	b
 45 |     x
 46 |     array([[  1.,   2.],
 47 | 	   [  3.,  NaN]])
 48 | 
 49 | Here are examples of **fromdict** and **fromlist**::
 50 | 
 51 |     >>> data = {('a', 'c'): 1, ('a', 'd'): 2, ('b', 'c'): 3, ('b', 'd'): 4}
 52 |     >>> larry.fromdict(data)
 53 |     label_0
 54 | 	a
 55 | 	b
 56 |     label_1
 57 | 	c
 58 | 	d
 59 |     x
 60 |     array([[ 1.,  2.],
 61 | 	   [ 3.,  4.]])
 62 | 
 63 |     >>> data = [[1, 2, 3, 4], [('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd')]]
 64 |     >>> larry.fromlist(data)
 65 |     label_0
 66 | 	a
 67 | 	b
 68 |     label_1
 69 | 	c
 70 | 	d
 71 |     x
 72 |     array([[ 1.,  2.],
 73 | 	   [ 3.,  4.]])           
 74 | 
 75 | Indexing
 76 | ********
 77 | 
 78 | Indexing using the bracket syntax arr[ <slicer> ] seems to return you exactly
 79 | what numpy would slice out of the underlying array. All slicing works, with the
 80 | exception of "fancy" indexing, and ellipsis indexing, and the use of
 81 | **np.newaxis**.
 82 | 
 83 | There is also a smart slicer riding along with the larrays that can slice with
 84 | label information. It seems to nicely blend labels and regular integer slicing.
 85 | To disambiguate possible integer labels and integer indexing, labels always
 86 | must be enclosed in a list::
 87 | 
 88 |     >>> arr = la.larry(np.arange(6).reshape(2,3), [ ['u', 'v'], [2,5,3], ])
 89 |     >>> arr
 90 |     label_0
 91 | 	u
 92 | 	v
 93 |     label_1
 94 | 	2
 95 | 	5
 96 | 	3
 97 |     x
 98 |     array([[0, 1, 2],
 99 | 	   [3, 4, 5]])
100 |     >>> arr.lix[['u']]
101 |     label_0
102 | 	2
103 | 	5
104 | 	3
105 |     x
106 |     array([0, 1, 2])
107 |     >>> arr.lix[['u'],2:5]
108 |     2
109 |     >>> arr.lix[['u'],[2]:[5]]
110 |     0
111 |     >>> arr.lix[['u'],[2]:[3]]
112 |     label_0
113 | 	2
114 | 	5
115 |     x
116 |     array([0, 1])
117 | 
118 | 
119 | Binary Operations (arithmetic)
120 | ******************************
121 | 
122 | Binary operations are not, in general, numpy-thonic
123 | 
124 | Alignment
125 | ---------
126 | 
127 | Larray seems to want to only make binary operations on data with identical
128 | coordinates. Furthermore, it will re-align the data if necessary. Therefore,
129 | this example is ok::
130 | 
131 |     >>> y1 = larry([1, 2], [['a', 'z']])
132 |     >>> y2 = larry([1, 2], [['z', 'a']])
133 |     
134 | What is ``y1 + y2``?
135 | ::
136 | 
137 |     >>> y1 + y2
138 |     label_0
139 |         a
140 |         z
141 |     x
142 |     array([3, 3])
143 | 
144 | But this fails::
145 | 
146 |     >>> z1 = larry([1, 2], [['a', 'b']])
147 |     >>> z2 = larry([3, 4], [['c', 'd']])
148 | 
149 |     >>> z1 + z2
150 |     Traceback (most recent call last):
151 |       File "<stdin>", line 1, in <module>
152 |       File "la/la/deflarry.py", line 494, in __add__
153 |         x, y, label = self.__align(other)
154 |       File "la/la/deflarry.py", line 731, in __align
155 |         raise IndexError, 'A dimension has no matching labels'
156 |     IndexError: A dimension has no matching labels
157 | 
158 | Intersections and Broadcasting
159 | ------------------------------
160 | 
161 | Binary ops can introduce an implicit intersection operation, for example (this
162 | would be illegal code in numpy)::
163 | 
164 |     >>> arr = la.larry(np.arange(6).reshape(2,3), [ ['u', 'v'], ['x','y','z']])
165 |     >>> arr2 = la.larry(np.arange(9).reshape(3,3), [ ['u', 'v', 'w'], ['x', 'y', 'z']] )
166 |     >>> arr2 + arr
167 |     label_0
168 | 	u
169 | 	v
170 |     label_1
171 | 	x
172 | 	y
173 | 	z
174 |     x
175 |     array([[ 0,  2,  4],
176 | 	   [ 6,  8, 10]])
177 | 
178 | 
179 | According to the matched-coordinates operation rule, broadcasting does not happen::
180 | 
181 |     >>> arr3 = la.larry([4,5,6], [['x','y','z']])
182 |     >>> arr3 + arr
183 |     ------------------------------------------------------------
184 |     Traceback (most recent call last):
185 |       File "<ipython console>", line 1, in <module>
186 |       File "/Users/mike/usr/lib/python2.5/site-packages/la/deflarry.py", line 583, in __add__
187 | 	x, y, label = self.__align(other)
188 |       File "/Users/mike/usr/lib/python2.5/site-packages/la/deflarry.py", line 820, in __align
189 | 	raise IndexError, msg
190 |     IndexError: Binary operation on two larrys with different dimension
191 | 


--------------------------------------------------------------------------------
/doc/source/other_projects/pandas_overview.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 |  Pandas
 3 | ========
 4 | 
 5 | Overview
 6 | ^^^^^^^^
 7 | 
 8 | Pandas provides a timeseries and stack-of-timeseries objects. They
 9 | seem heavily geared towards financial data. Despite the fact of
10 | **being** an ndarray, Pandas objects seem to be a specialized
11 | alternative to ndarrays rather than an augmentation of them.
12 | 
13 | Features
14 | 
15 | * **Is** an ndarray 
16 | * axes are not named
17 | * Is dict-like, with respect to its indices (ticks)
18 | * If ticks are indices, semantics of indexing are ambiguous
19 | * Separate objects from 1D and 2D, no support for n>2
20 | 
21 | 
22 | Indexing
23 | ********
24 | 
25 | Point-indexing syntax can use ticks or integer indices. Range indexing
26 | only works with integers, but uses the same syntax
27 | 
28 | Semantic Ambiguity
29 | ------------------
30 | 
31 | Integer tick values interfere with integer indexing, for example::
32 | 
33 |     >>> t = pandas.Series.fromValue(1.0, range(5,0,-1), 'i')
34 |     >>> t[:] = np.random.randint(100, size=5)
35 |     >>> t
36 |     5    23
37 |     4    62
38 |     3    66
39 |     2    91
40 |     1    91
41 |     >>> t[2] = 0
42 |     >>> t
43 |     5    23
44 |     4    62
45 |     3    66
46 |     2    0
47 |     1    91
48 | 
49 | 
50 | 
51 | Binary Operations
52 | *****************
53 | 
54 | Alignment
55 | ---------
56 | 
57 | If data is partially aligned, missing data is filled with NaNs. This
58 | introduces a union with respect to the "range" of the data. This also
59 | will **cast** the data to floating point.::
60 | 
61 |    >>> t.dtype
62 |     dtype('int32')
63 |    >>> t - t[:3]
64 |    5    0.0
65 |    4    0.0
66 |    3    0.0
67 |    2    NaN
68 |    1    NaN
69 | 


--------------------------------------------------------------------------------
/doc/source/printing.rst:
--------------------------------------------------------------------------------
 1 | Printing Datarrays
 2 | ==================
 3 | 
 4 | One of the most important ways to understand what's going on in a labeled
 5 | array is to be able to see a pretty text representation of it. In Divisi2 I
 6 | stole the __str__ method from PySparse to accomplish this, but NumPy arrays are
 7 | more varied than PySparse (where everything is two-dimensional and made of
 8 | floats).
 9 | 
10 | Can we build on NumPy's str?
11 | ----------------------------
12 | 
13 | NumPy has provided somewhat-pretty text representations for a long time, but
14 | the code in numpy.core.arrayprint is
15 | 
16 | - difficult to extend
17 | - undocumented
18 | - kind of spaghetti, frankly
19 | - largely untouched for the last 13 years!
20 | 
21 | Its output can be aesthetically suboptimal in some cases. When printing large
22 | arrays of floats, for example, it will wrap every line like this::
23 | 
24 |     [[  0.00000000e+00   1.00000000e-04   2.00000000e-04 ...,   4.70000000e-03
25 |         4.80000000e-03   4.90000000e-03]
26 |      [  5.00000000e-03   5.10000000e-03   5.20000000e-03 ...,   9.70000000e-03
27 |         9.80000000e-03   9.90000000e-03]
28 |      [  1.00000000e-02   1.01000000e-02   1.02000000e-02 ...,   1.47000000e-02
29 |         1.48000000e-02   1.49000000e-02]
30 |      ..., 
31 |      [  3.35000000e-01   3.35100000e-01   3.35200000e-01 ...,   3.39700000e-01
32 |         3.39800000e-01   3.39900000e-01]
33 |      [  3.40000000e-01   3.40100000e-01   3.40200000e-01 ...,   3.44700000e-01
34 |         3.44800000e-01   3.44900000e-01]
35 |      [  3.45000000e-01   3.45100000e-01   3.45200000e-01 ...,   3.49700000e-01
36 |         3.49800000e-01   3.49900000e-01]]
37 | 
38 | The user can understand what that means, but it'll be hard to stick labels on.
39 | 
40 | My conclusion is that it will be better to build this representation from the
41 | ground up.
42 | 
43 | The 2D pretty-printer
44 | ---------------------
45 | Screens are 2D, so everything is a variant of the 2D case. What we need is a
46 | class designed for printing strings in a grid. This class will then:
47 | 
48 | - Find a formatter for the dtype of the matrix (the "cell formatter").
49 | - Make an array (a string array? might as well) of equal-width string
50 |   representations
51 | - Attach row and column labels as the first row and column of the array
52 | - Join together everything into a correctly-aligned, multi-line string
53 | 
54 | The width of each cell is a negotiation between the grid formatter and the cell
55 | formatter:
56 | 
57 | - Cell: I can print these floats in 5 to 15 characters. More characters is
58 |   better, of course.
59 | - Grid: I'll give you 7.
60 | - Cell: Stingy bastard.
61 | 
62 | Maybe this could be accomplished with "small", "medium", and "large" options
63 | for each formatter, allowing us to reuse arrayprint formatters:
64 | 
65 | - float: large = high precision, medium = lower precision, small = lower
66 |   precision and suppress_small
67 | - int: large = max number of digits, medium/small = exponential notation
68 | - str: large = maximum length, medium = truncate
69 | - bool: large = ' True'/'False', medium/small = 'T'/'-' (to be visually
70 |   distinct)
71 | 
72 | Brackets are _not_ printed (it's too hard to work them in with the labels).
73 | 
74 | The 1D pretty-printer
75 | ---------------------
76 | It's the 2D printer with only one row.
77 | 
78 | The 3D pretty-printer
79 | ---------------------
80 | When people work with n-dimensional labeled data and n>2, what they often do
81 | is flatten it out into 2 dimensions. The rows are single data points, and the
82 | columns are all the indices followed by the value. Show a few of these from the
83 | beginning of the matrix, dots, and a few of these from the end of the matrix.
84 | 
85 | Then put all those back into the grid-maker.
86 | 
87 | If there are more than 30 or so dimensions, we are sad.
88 | 


--------------------------------------------------------------------------------
/examples/inference_algs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function
  2 | 
  3 | import sys
  4 | if sys.version_info[0] < 3:  # Use range iterator for Python 2
  5 |     range = xrange
  6 | from functools import reduce
  7 | 
  8 | import operator
  9 | from itertools import combinations
 10 | 
 11 | import networkx as nx
 12 | import numpy as np
 13 | 
 14 | from datarray import DataArray
 15 | 
 16 | from numpy.testing import assert_almost_equal
 17 | 
 18 | 
 19 | def test_pearl_network():
 20 |     """ From Russell and Norvig, "Artificial Intelligence, A Modern Approach,"
 21 |     Section 15.1 originally from Pearl.
 22 | 
 23 |     "Consider the following situation. You have a new burglar alarm installed
 24 |     at home. It is fairly reliable at detecting a burglary, but also responds
 25 |     on occasion to minor earthquakes. You also have two neighbors, John and
 26 |     Mary, who have promised to call you at work when they hear the alarm. John
 27 |     always calls when he hears the alarm, but sometimes confuses the telephone
 28 |     ringing with the alarm and calls then, too. Mary on the other hand, likes
 29 |     rather loud music and sometimes misses the alarm altogether. Given the
 30 |     evidence of who has or has not called, we would like to estimate the
 31 |     probability of a burglary.
 32 | 
 33 |                     Burglary         Earthquake
 34 | 
 35 |                            \         /
 36 |                            _\|     |/_
 37 | 
 38 |                               Alarm
 39 | 
 40 |                             /     \  
 41 |                           |/_     _\|
 42 | 
 43 |                     Johncalls        Marycalls
 44 | 
 45 |     This test function uses four different algorithms to calculate 
 46 | 
 47 |         P(burglary | johncalls = 1, marycalls = 1) 
 48 | 
 49 |     In increasing order of sophistication: 
 50 |         1. Simple (calculate joint distribution and marginalize) 
 51 |         2. Elimination (strategically marginalize over one variable at a time) 
 52 |         3. Sum-product algorithm on factor graph 
 53 |         4. Junction tree algorithm
 54 |     """
 55 |     burglary = DataArray([.999,.001], axes=["burglary"])
 56 |     earthquake = DataArray([.998,.002], axes=["earthquake"])
 57 |     alarm = DataArray([ [[.05,.95], [.06,.94]],                      
 58 |                         [[.71,.29], [.999,.001]] ],
 59 |         ["burglary","earthquake","alarm"])
 60 | 
 61 |     johncalls = DataArray([[.10,.90],[.95,.05]],["alarm","johncalls"])
 62 |     marycalls = DataArray([[.30,.70],[.01,.99]],["alarm","marycalls"])
 63 | 
 64 |     cpts = [burglary, earthquake, alarm, johncalls, marycalls]
 65 | 
 66 |     evidence = {"johncalls":0, "marycalls":0}
 67 | 
 68 |     margs1,lik1 = calc_marginals_simple(cpts,evidence)
 69 |     p_burglary,lik2 = digraph_eliminate(cpts,evidence,["burglary"])
 70 |     margs3, lik3 = calc_marginals_sumproduct(cpts, evidence, 'burglary')
 71 | 
 72 |     # TODO: This version is disabled until I can dig up the reference to figure
 73 |     # out how it works. -jt
 74 |     # margs4,lik4 = calc_marginals_jtree(cpts,evidence)
 75 | 
 76 |     # Check that all four calculations give the same p(burglary) and
 77 |     # likelihood, up to numerical error
 78 |     for (marg,lik) in \
 79 |             [(p_burglary, lik2), (margs3["burglary"], lik3)]: # , (margs4["burglary"],lik4)]:
 80 |         assert_almost_equal(marg,margs1["burglary"])
 81 |         assert_almost_equal(lik,lik1)
 82 |     
 83 |     print("p(burglary) = %s" % margs1["burglary"].__array__())
 84 |     print("likelihood of observations = %.3f" % lik1)
 85 |     
 86 | ####### DataArray utilities ################
 87 | 
 88 | def match_shape(x,yshape,axes):
 89 |     """
 90 |     Creates a view v on x with the same number of dimensions as y.
 91 |     The axes of x are copied into the axes of v specified by the axes argument.
 92 |     
 93 |     Example
 94 |     ---------
 95 |     >>> x = np.arange(3)
 96 |     >>> match_shape(x,(2,3,2),(1,))
 97 |     array([[[0, 0],
 98 |             [1, 1],
 99 |             [2, 2]],
100 |     <BLANKLINE>
101 |            [[0, 0],
102 |             [1, 1],
103 |             [2, 2]]])
104 | 
105 |     """
106 |     if isinstance(axes,int): axes = [axes]
107 |     assert len(x.shape) == len(axes)
108 |     assert all(xsize == yshape[yax] for xsize,yax in zip(x.shape,axes))
109 |     strides = np.zeros(len(yshape), dtype=np.intp)
110 |     for yax,xstride in zip(axes,x.strides):
111 |         strides[yax] = xstride
112 |     return np.ndarray.__new__(np.ndarray, strides=strides, shape=yshape, buffer=x, dtype=x.dtype)
113 |    
114 | def multiply_potentials(*DAs):
115 |     """
116 |     Multiply DataArrays in the way that we multiply functions, 
117 |     e.g. h(i,j,k,l) = f(i,j,k) g(k,l)
118 |     
119 |     Parameters
120 |     -------------
121 |     DA1,DA2,... : DataArrays with variable names as axis labels
122 |     
123 |     Returns
124 |     ---------
125 |     product
126 |     
127 |     example
128 |     ---------
129 |     >>> f_of_a = DataArray([1, 2],"a")
130 |     >>> g_of_b = DataArray([1,-1],"b")
131 |     >>> multiply_potentials(f_of_a, g_of_b)
132 |     DataArray([[ 1, -1],
133 |            [ 2, -2]])
134 |     ('a', 'b')
135 |     >>> multiply_potentials(f_of_a, f_of_a)
136 |     DataArray([1, 4])
137 |     ('a',)
138 | 
139 |     
140 |     """
141 |     if len(DAs) == 0: return 1
142 |     
143 |     full_names, full_shape = [],[]
144 |     for axis,size in zip(_sum(list(DA.axes) for DA in DAs), _sum(DA.shape for DA in DAs)):
145 |         if axis.name not in full_names:
146 |             full_names.append(axis.name)
147 |             full_shape.append(size)
148 | 
149 |     return DataArray(
150 |             _prod(match_shape(DA.copy(), full_shape, 
151 |                 [full_names.index(axis.name) for axis in DA.axes]) for DA in DAs), 
152 |         axes=full_names)
153 | 
154 | def sum_over_axes(DA, axis_names):
155 |     Out = DA
156 |     for axname in axis_names:
157 |         Out = Out.sum(axis=axname)
158 |     return Out
159 | 
160 | def set_slices(DA,**axes2inds):
161 |     """
162 |     return a copy of DataArray DA, where several slices are taken along named axes,
163 |     specified by keys ax1=ind1, ax2=ind2, etc.
164 |     """
165 |     Out = DA
166 |     for (ax,ind) in axes2inds.items():
167 |         Out = Out.axis[ax][ind:(ind+1)]
168 |     return Out
169 |     
170 | def sum_over_other_axes(DA, kept_axis_name):
171 |     "sum all axes of DataArray DA except for ax"
172 |     return sum_over_axes(DA, 
173 |             [axname for axname in DA.names if axname != kept_axis_name])
174 | 
175 | def _sum(seq): return reduce(operator.add, seq)
176 | def _prod(seq): return reduce(operator.mul, seq)
177 | 
178 | ####### Simple marginalization #############
179 |     
180 | def calc_marginals_simple(cpts,evidence):
181 |     """
182 |     Calculate the marginal probabilities the simple simple way. Calculate joint
183 |     distribution of all variables and then marginalize. This algorithm becomes
184 |     inefficient when there are a lot of variables, and the joint distribution
185 |     becomes high-dimensional.
186 |     
187 |     Parameters
188 |     -----------
189 |     cpts : a list of DataArray. Gives conditional probability of variable with axis=-1
190 |     evidence : a dictionary of variable -> value
191 |         
192 |     Returns
193 |     --------
194 |     marginals : dictionary of variable -> prob_table
195 |     likelihood : likelihood of observations in the model
196 |     """
197 |     joint_dist = multiply_potentials(*cpts)
198 |     joint_dist = joint_dist.axes.johncalls[evidence['johncalls']].axes.marycalls[evidence['marycalls']]
199 |     return (dict((ax.name, normalize(sum_over_other_axes(joint_dist, ax.name))) 
200 |                 for ax in joint_dist.axes),
201 |             joint_dist.sum())
202 | 
203 | 
204 | ############# Elimination #############
205 | 
206 | def digraph_eliminate(cpts,evidence,query_list):
207 |     """
208 |     Use elimination algorithm to find joint distribution over variables in
209 |     query_list, given evidence.
210 |     
211 |     Parameters
212 |     ------------
213 |     cpts : a list of DataArray with variable names for axis names
214 |     evidence : a dictionary of observed variables (strings) -> values
215 |     query_list : a list of variables (strings)
216 |         
217 |     Returns
218 |     --------
219 |     marginals : dictionary of variable -> prob_table
220 |     likelihood : likelihood of observations in the model
221 |     """
222 |     
223 |     # find the directed graphical model
224 |     DG = cpts2digraph(cpts)
225 |     # use postorder (leaves to root) from depth-first search as elimination order
226 |     rvs = nx.dfs_postorder_nodes(DG)
227 | 
228 |     # modify elimination list so query nodes are at the end
229 |     rvs_elim = [rv for rv in rvs if rv not in query_list] + query_list
230 |     for rv in rvs_elim:
231 |         # find potentials that reference that node
232 |         pots_here = [cpt for cpt in cpts if rv in cpt.names]
233 |         # remove them from cpts
234 |         cpts = [cpt for cpt in cpts if rv not in cpt.names]
235 |         # Find joint probability distribution of this variable and the ones coupled to it
236 |         product_pot = multiply_potentials(*pots_here)
237 |         # if node is in query set, we don't sum over it
238 |         if rv not in query_list:
239 |             # if node is in evidence set, take slice
240 |             if rv in evidence: product_pot = product_pot.axes(rv)[evidence[rv]]
241 |             # otherwise, sum over it
242 |             else: product_pot = product_pot.sum(axis=rv)
243 | 
244 |         # add resulting product potential to cpts
245 |         cpts.append(product_pot)
246 | 
247 |     assert len(cpts) == 1
248 |     unnormed_prob = cpts[0]
249 |     likelihood = unnormed_prob.sum()
250 |     return unnormed_prob/likelihood, likelihood
251 | 
252 | def cpts2digraph(cpts):
253 |     """
254 |     Each cpt has axes a_1,a_2,...a_k and represents p(a_k | a_1,...a_{k-1}).
255 |     Use cpts to construct directed graph corresponding to these conditional
256 |     probability dists.
257 |     """
258 |     G = nx.DiGraph()
259 |     for cpt in cpts:
260 |         names = [ax.name for ax in cpt.axes]
261 |         target = names[-1]
262 |         G.add_edges_from((source, target) for source in names[:-1])
263 |     return G
264 | 
265 | ############# Sum-product #############
266 | 
267 | def calc_marginals_sumproduct(cpts, evidence, target_node):
268 |     """
269 |     Construct the factor graph. Then use the sum-product algorithm to calculate
270 |     marginals for all variables.
271 |     
272 |     Parameters
273 |     ------------
274 |     cpts : a list of DataArray with variable names for axis labels
275 |     evidence : a dictionary of observed variables (strings) -> values
276 |     target_node : str
277 |         Target node from which to calculate likelihood
278 | 
279 |     Returns
280 |     --------
281 |     marginals : dictionary of variable -> prob_table
282 |     likelihood : likelihood of observations in the model
283 |     """
284 |     
285 |     # In this implementation, we use evidence by using an evidence potential,
286 |     # which equals 1 at the observed value and zero everywhere else.
287 |     # Alternatively, we could take slices of cpts. This is the strategy used in
288 |     # the junction tree algorithm below.
289 |     
290 |     G,names2tables = make_factor_graph(cpts,evidence)
291 |     messages = {}
292 |     # (source,target) for edges in directed spanning tree resulting from depth
293 |     # first search
294 |     message_pairs = dfs_edges(G)
295 |         
296 |     # message passing inward from leaves (actually we don't need to send
297 |     # messages up from some leaves because cpt is normalized)
298 |     for (parent,child) in message_pairs:
299 |         m = make_message(child,parent,G,messages,names2tables)
300 |         messages[(child,parent)] = m
301 |     
302 |     # message passing outward from root
303 |     for (parent,child) in reversed(message_pairs):
304 |         m = make_message(parent,child,G,messages,names2tables)
305 |         messages[(parent,child)] = m
306 | 
307 |     # calculate marginals
308 |     marginals = {}
309 |     potentials = {}
310 |     for node in G.nodes():
311 |         potential = multiply_potentials(*[messages[(src,node)] for src in G.neighbors(node)])
312 |         marginals[node] = normalize(potential)
313 |         potentials[node] = potential
314 | 
315 |     return marginals, potentials[target_node].sum()
316 |         
317 | def make_message(src,targ,G,messages,names2tables):
318 |     """
319 |     Collect messages coming to src from all nodes other than targ and multiply them.
320 |     If targ is a factor node, this product is the message.
321 |     If targ is a variable node, marginalize over all other variables
322 |     """
323 |     # collect messages incoming to src
324 |     incoming_msgs = [messages[(neighb,src)] for neighb in G.neighbors(src) if neighb != targ]
325 |     if isvar2factor(src,targ): return multiply_potentials(names2tables[src],*incoming_msgs)
326 |     return sum_over_other_axes(multiply_potentials(names2tables[src],*incoming_msgs),targ)
327 |         
328 | def isvar2factor(src,targ):
329 |     "True if target is a factor node."
330 |     return isinstance(targ,tuple)
331 |     
332 | def make_factor_graph(cpts,evidence):
333 |     G = nx.Graph()
334 |     
335 |     names2factors = dict((tuple(cpt.names), cpt) for cpt in cpts)
336 |     G.add_nodes_from(names2factors.keys())
337 |     for (name,factor) in names2factors.items():
338 |         for axnames in factor.names:
339 |             G.add_edge(name, axnames)
340 |             
341 |     names2factors.update(
342 |         dict((name,
343 |               DataArray(np.ones(size) if name not in evidence 
344 |                         else one_hot(size,evidence[name]),[name]))
345 |              for cpt in cpts 
346 |              for (name,size) in zip(cpt.names,cpt.shape)))
347 |             
348 |     return G, names2factors
349 | 
350 | def one_hot(size,val):
351 |     "out[val] = 1, out[i] = 0 for i != val"
352 |     out = np.zeros(size)
353 |     out[val] = 1
354 |     return out
355 | 
356 | def dfs_edges(G):
357 |     """
358 |     (source,target) for edges in directed spanning tree resulting from depth
359 |     first search
360 |     """
361 |     DG = nx.dfs_tree(G, source=None)
362 |     return [(src,targ) for targ in nx.dfs_postorder_nodes(DG) for src in DG.predecessors(targ)]
363 | 
364 | 
365 | ############# Junction tree #############
366 | 
367 | ## Applying the junction tree algorithm to a directed graphical model requires several steps
368 | ## 1. Moralize the directed graph.
369 | ## 2. Add edges to obtain a triangulated graph. It is hard to find the best triangulation
370 | ##    (i.e., the one that adds as few edges as possible), so we use a greedy heuristic "min fill"
371 | ## 3. Form a clique tree for triangulated graph. Assign potentials to cliques.
372 | ## 4. Apply the Hugin algorithm to the clique tree
373 | 
374 | 
375 | def calc_marginals_jtree(potentials, evidence):
376 |     """
377 |     Use the hugin algorithm to find marginals and data likelihood.
378 |     """
379 |     JT, names2factors = make_jtree_from_factors(potentials)
380 |     pots = hugin(JT, names2factors, evidence)
381 | 
382 |     # Each random variable appears in many cliques and separators. Each of these potentials is a
383 |     # joint probability distribution, and they should give the same marginals.
384 |     rv2marg = {}
385 |     for pot in pots.values():
386 |         for rv in pot.labels:
387 |             if rv not in rv2marg:
388 |                 rv2marg[rv] = normalize(sum_over_other_axes(pot,rv))
389 |     
390 |     return rv2marg, pot.sum()
391 | 
392 | def hugin(JT,names2factors,evidence):
393 |     
394 |     # intialize potentials, taking slices to incorporate evidence
395 |     potentials = dict([(name,use_evidence(factor,evidence)) 
396 |                         for (name,factor) in names2factors.items()])
397 |         
398 |     message_pairs = dfs_edges(JT)
399 |     # iterate over edges of clique tree
400 |     for (pred,succ) in message_pairs:
401 |         sep = tuple(set(pred).intersection(succ))
402 |         sepname = (pred,succ)
403 |         # update separator
404 |         potentials[sepname] = sum_over_axes(potentials[succ],set(succ).difference(sep))
405 |         # update predecessor clique
406 |         potentials[pred] = multiply_potentials(potentials[pred],potentials[sepname])
407 | 
408 |     for (pred,succ) in reversed(message_pairs):
409 |         sep = tuple(set(pred).intersection(succ))
410 |         sepname = (pred,succ)
411 |         # update separator
412 |         oldsep = potentials[sepname]
413 |         potentials[sepname] = sum_over_axes(potentials[pred],set(pred).difference(sep))
414 |         # update successor clique
415 |         potentials[succ] = multiply_potentials(potentials[succ],1/oldsep,potentials[sepname])            
416 |         
417 |     return potentials
418 |         
419 | def use_evidence(potential,ev_dict):
420 |     "Take slices of potential at all variables appearing in ev_dict"
421 |     obs_dict = dict((label,ev_dict[label]) for label in potential.labels if label in ev_dict)
422 |     return set_slices(potential,**obs_dict) if len(obs_dict) > 0 else potential
423 | 
424 | def triangulate_min_fill(G):
425 |     """
426 |     Return graph with a triangulation of undirected graph G, using min fill.
427 |     Min fill forms an elimination ordering on graph. Each step, we eliminate the node that
428 |     requires us to add the fewest new edges. A graph resulting from elimination is always triangulated (why?)
429 |     """
430 |     G_elim = nx.Graph(G.edges())
431 |     added_edges = []
432 |     for _ in range(G.number_of_nodes()):
433 |         nodes,degrees = zip(*G_elim.degree().items())
434 |         min_deg_node = nodes[np.argmin(degrees)]
435 |         new_edges = [(n1,n2) for (n1,n2) in
436 |                 combinations(G_elim.neighbors(min_deg_node),2) if not
437 |                 G_elim.has_edge(n1,n2)]
438 |         added_edges.extend(new_edges)        
439 |         G_elim.remove_node(min_deg_node)
440 |         G_elim.add_edges_from(new_edges)
441 |     
442 |     return nx.Graph(G.edges() + added_edges)
443 | 
444 | def make_jtree_from_tri_graph(G):
445 |     """returns JT graph"""
446 |     
447 |     # clique graph
448 |     CG = nx.Graph()
449 |     # maximal weight spanning tree of clique graph is guaranteed to be a junction tree
450 |     # (i.e., it satisfies running intersection property)
451 |     # where weight is the size of the intersection between adjacent cliques.
452 |     CG.add_weighted_edges_from((tuple(c1),tuple(c2),-c1c2) 
453 |                       for (c1,c2) in combinations(nx.find_cliques(G),2)
454 |                       for c1c2 in [len(set(c1).intersection(set(c2)))] if c1c2 > 0)
455 |     JT = nx.Graph(nx.mst(CG)) # Minimal weight spanning tree for CliqueGraph
456 |     for src,targ in JT.edges():
457 |         JT[src][targ]["sep"] = tuple(set(src).intersection(set(targ)))
458 |         
459 |     return JT
460 | 
461 | def make_jtree_from_factors(factors):
462 |     """
463 |     Make junction tree and assign factors to cliques.
464 |     1. Moralize
465 |     2. Triangulate
466 |     3. Take MST of clique tree to get junction tree
467 |     4. Assign potentials to cliques and multiply them to get clique potentials
468 |     
469 |     parameters
470 |     -----------
471 |     factors : list of DataArray
472 |     
473 |     returns
474 |     --------
475 |     JT : junction tree (directed graph), with nodes labeled by tuples, e.g. ("A","B","C")
476 |     clique2pot : dictionary of cliques (i.e., node labels) -> DataArray
477 |     """
478 |     VarGraph = moral_graph_from_factors(factors)
479 |     TriangulatedGraph = triangulate_min_fill(VarGraph)
480 |     JT = make_jtree_from_tri_graph(TriangulatedGraph)
481 |     clique2potlist = dict((node,[]) for node in JT.nodes())
482 |     for factor in factors:
483 |         varset = set(factor.labels)
484 |         for clique in JT:
485 |             if varset.issubset(set(clique)):
486 |                 clique2potlist[clique].append(factor)
487 |                 continue
488 |     clique2pot = dict((clique,multiply_potentials(*potlist)) for (clique,potlist) in clique2potlist.items())
489 |     # todo: make sure all cliques have a potential
490 |     return JT,clique2pot
491 |     
492 | def moral_graph_from_factors(factors):
493 |     G = nx.Graph()
494 |     for factor in factors:
495 |         for label1,label2 in combinations(factor.names, 2):
496 |             G.add_edge(label1,label2)    
497 |                     
498 |     return G
499 | 
500 | def normalize(arr):
501 |     return arr/arr.sum()
502 | 
503 | if __name__ == "__main__":
504 |     test_pearl_network()
505 |     #import doctest
506 |     #doctest.testmod()
507 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Requirements for datarray package
2 | # Use with:
3 | #  pip install -r requirements.txt
4 | 
5 | numpy>=1.7
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | # Build wheels compatible with Python 2 and Python 3
3 | universal = 1
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Setup file for the Python datarray package."""
 3 | 
 4 | import os
 5 | 
 6 | # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly
 7 | # update it when the contents of directories change.
 8 | if os.path.exists('MANIFEST'): os.remove('MANIFEST')
 9 | 
10 | # Commit to setuptools
11 | import setuptools
12 | 
13 | from distutils.core import setup
14 | 
15 | # Get version and release info, which is all stored in datarray/version.py
16 | ver_file = os.path.join('datarray', 'version.py')
17 | # Use exec on contents for Python 3 compatibility
18 | with open(ver_file, 'rt') as fobj:
19 |     exec(fobj.read())
20 | 
21 | opts = dict(name=NAME,
22 |             maintainer=MAINTAINER,
23 |             maintainer_email=MAINTAINER_EMAIL,
24 |             description=DESCRIPTION,
25 |             long_description=LONG_DESCRIPTION,
26 |             url=URL,
27 |             download_url=DOWNLOAD_URL,
28 |             license=LICENSE,
29 |             classifiers=CLASSIFIERS,
30 |             author=AUTHOR,
31 |             author_email=AUTHOR_EMAIL,
32 |             platforms=PLATFORMS,
33 |             version=VERSION,
34 |             packages=PACKAGES,
35 |             package_data=PACKAGE_DATA,
36 |             requires=REQUIRES,
37 |             install_requires=INSTALL_REQUIRES,
38 |             zip_safe = False,
39 |             )
40 | 
41 | 
42 | # Now call the actual setup function
43 | if __name__ == '__main__':
44 |     setup(**opts)
45 | 


--------------------------------------------------------------------------------
/tools/release.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Simple release script for datarray.
 3 | 
 4 | Ensure that you've built the docs and pushed those first (after verifying them
 5 | manually).
 6 | """
 7 | from __future__ import print_function
 8 | 
 9 | import os
10 | from subprocess import call
11 | 
12 | sh = lambda s: call(s, shell=True)
13 | 
14 | cwd = os.getcwd()
15 | if not os.path.isfile('setup.py'):
16 |     os.chdir('..')
17 |     if not os.path.isfile('setup.py'):
18 |         print("This script must be run from top-level datarray or tools dir.")
19 |         sys.exit(1)
20 | 
21 | 
22 | sh('./setup.py register')
23 | sh('./setup.py sdist --formats=gztar,zip upload')
24 | 


--------------------------------------------------------------------------------