├── .flake8rc ├── .gitignore ├── LICENSE ├── circle.yml ├── dynarray ├── __init__.py └── dynamic_array.py ├── readme.md ├── setup.cfg ├── setup.py └── tests ├── test_dynamicarray.py └── test_examples.py /.flake8rc: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = I100 3 | max-line-length = 100 4 | exclude = doc/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.*~ 3 | .hypothesis/* 4 | *egg* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2016 Maciej Kula 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | pre: 3 | - pip install numpy 4 | test: 5 | pre: 6 | - pip install "hypothesis[numpy]" 7 | - pip install pytest 8 | - pip install flake8 9 | override: 10 | - flake8 --config .flake8rc 11 | - py.test -xv tests -------------------------------------------------------------------------------- /dynarray/__init__.py: -------------------------------------------------------------------------------- 1 | from .dynamic_array import DynamicArray # NOQA 2 | -------------------------------------------------------------------------------- /dynarray/dynamic_array.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class DynamicArray(object): 5 | """ 6 | Dynamically growable numpy array. 7 | 8 | Parameters 9 | ---------- 10 | 11 | array_or_shape: numpy array or tuple 12 | If an array, a growable array with the same shape, dtype, 13 | and a copy of the data will be created. The array will grow 14 | along the first dimension. 15 | If a tuple, en empty array of the specified shape will be created. 16 | The first element needs to be None to denote that the array will 17 | grow along the first dimension. 18 | dtype: optional, array dtype 19 | The dtype the array should have. 20 | capacity: optional, int 21 | The initial capacity of the array. 22 | allow_views_on_resize: optional, boolean 23 | If False, an exception will be thrown if the array is resized 24 | while there are live references to the array's contents. When 25 | the array is resized, these will point at old data. Set to 26 | True if you want to silence the exception. 27 | 28 | Examples 29 | -------- 30 | 31 | Create a multidimensional array and append rows: 32 | 33 | >>> from dynarray import DynamicArray 34 | >>> # The leading dimension is None to denote that this is 35 | >>> # the dynamic dimension 36 | >>> array = DynamicArray((None, 20, 10)) 37 | >>> array.append(np.random.random((20, 10))) 38 | >>> array.extend(np.random.random((100, 20, 10))) 39 | 40 | Slice and perform arithmetic like with normal numpy arrays: 41 | 42 | >>> array[:2] 43 | """ 44 | 45 | MAGIC_METHODS = ('__radd__', 46 | '__add__', 47 | '__sub__', 48 | '__rsub__', 49 | '__mul__', 50 | '__rmul__', 51 | '__div__', 52 | '__rdiv__', 53 | '__pow__', 54 | '__rpow__', 55 | '__eq__', 56 | '__len__') 57 | 58 | class __metaclass__(type): 59 | def __init__(cls, name, parents, attrs): 60 | 61 | def make_delegate(name): 62 | 63 | def delegate(self, *args, **kwargs): 64 | return getattr(self._data[:self._size], name) 65 | 66 | return delegate 67 | 68 | type.__init__(cls, name, parents, attrs) 69 | 70 | for method_name in cls.MAGIC_METHODS: 71 | setattr(cls, method_name, property(make_delegate(method_name))) 72 | 73 | def __init__(self, array_or_shape=(None,), dtype=None, capacity=10, 74 | allow_views_on_resize=False): 75 | 76 | if isinstance(array_or_shape, tuple): 77 | if not len(array_or_shape) or array_or_shape[0] is not None: 78 | raise ValueError('The shape argument must be a non-empty tuple ' 79 | 'and have None as the first dimension') 80 | self._shape = array_or_shape 81 | self._dtype = dtype 82 | self._size = 0 83 | self._capacity = capacity 84 | elif isinstance(array_or_shape, np.ndarray): 85 | self._shape = (None,) + array_or_shape.shape[1:] 86 | self._dtype = dtype or array_or_shape.dtype 87 | self._size = array_or_shape.shape[0] 88 | self._capacity = max(self._size, capacity) 89 | 90 | self._data = np.empty((self._capacity,) + self._get_trailing_dimensions(), 91 | dtype=self._dtype) 92 | 93 | if isinstance(array_or_shape, np.ndarray): 94 | self[:] = array_or_shape 95 | 96 | self._allow_views_on_resize = allow_views_on_resize 97 | 98 | def _get_trailing_dimensions(self): 99 | 100 | return self._shape[1:] 101 | 102 | def __getitem__(self, idx): 103 | 104 | return self._data[:self._size][idx] 105 | 106 | def __setitem__(self, idx, value): 107 | 108 | self._data[:self._size][idx] = value 109 | 110 | def _grow(self, new_size): 111 | 112 | try: 113 | self._data.resize(((new_size,) + self._get_trailing_dimensions())) 114 | except ValueError as e: 115 | if 'an array that references' in e.message: 116 | if self._allow_views_on_resize: 117 | self._data = np.resize(self._data, 118 | ((new_size,) + self._get_trailing_dimensions())) 119 | else: 120 | raise ValueError('Unable to grow the array ' 121 | 'as it refrences or is referenced ' 122 | 'by another array. Growing the array ' 123 | 'would result in views pointing at stale data. ' 124 | 'You can suppress this exception by setting ' 125 | '`allow_views_on_resize=True` when instantiating ' 126 | 'a DynamicArray.') 127 | else: 128 | raise 129 | 130 | self._capacity = new_size 131 | 132 | def _as_dtype(self, value): 133 | 134 | if hasattr(value, 'dtype') and value.dtype == self._dtype: 135 | return value 136 | else: 137 | return np.array(value, dtype=self._dtype) 138 | 139 | def append(self, value): 140 | """ 141 | Append a row to the array. 142 | 143 | The row's shape has to match the array's trailing dimensions. 144 | """ 145 | 146 | value = self._as_dtype(value) 147 | 148 | if value.shape != self._get_trailing_dimensions(): 149 | 150 | value_unit_shaped = value.shape == (1,) or len(value.shape) == 0 151 | self_unit_shaped = self._shape == (1,) or len(self._get_trailing_dimensions()) == 0 152 | 153 | if value_unit_shaped and self_unit_shaped: 154 | pass 155 | else: 156 | raise ValueError('Input shape {} incompatible with ' 157 | 'array shape {}'.format(value.shape, 158 | self._get_trailing_dimensions())) 159 | 160 | if self._size == self._capacity: 161 | self._grow(max(1, self._capacity * 2)) 162 | 163 | self._data[self._size] = value 164 | 165 | self._size += 1 166 | 167 | def extend(self, values): 168 | """ 169 | Extend the array with a set of rows. 170 | 171 | The rows' dimensions must match the trailing dimensions 172 | of the array. 173 | """ 174 | 175 | values = self._as_dtype(values) 176 | 177 | required_size = self._size + values.shape[0] 178 | 179 | if required_size >= self._capacity: 180 | self._grow(max(self._capacity * 2, 181 | required_size)) 182 | 183 | self._data[self._size:required_size] = values 184 | self._size = required_size 185 | 186 | def shrink_to_fit(self): 187 | """ 188 | Reduce the array's capacity to its size. 189 | """ 190 | 191 | self._grow(self._size) 192 | 193 | @property 194 | def shape(self): 195 | 196 | return (self._size,) + self._get_trailing_dimensions() 197 | 198 | @property 199 | def capacity(self): 200 | 201 | return self._capacity 202 | 203 | def __len__(self): 204 | 205 | return self.shape[0] 206 | 207 | def __repr__(self): 208 | 209 | return (self._data[:self._size].__repr__() 210 | .replace('array', 211 | 'DynamicArray(size={}, capacity={})' 212 | .format(self._size, self._capacity))) 213 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Dynarray 2 | 3 | [![CircleCI](https://circleci.com/gh/maciejkula/dynarray.svg?style=svg)](https://circleci.com/gh/maciejkula/dynarray) 4 | 5 | 6 | Dynamically growable Numpy arrays. They function exactly like normal numpy arrays, but support appending new elements. 7 | 8 | # Installation 9 | 10 | Simply install from PyPI: `pip install dynarray` 11 | 12 | 13 | # Quickstart 14 | 15 | Create an empty one-dimensional array and append elements to it: 16 | 17 | ```python 18 | from dynarray import DynamicArray 19 | 20 | array = DynamicArray() 21 | 22 | for element in range(10): 23 | array.append(element) 24 | ``` 25 | 26 | Create a multidimensional array and append rows: 27 | 28 | ```python 29 | from dynarray import DynamicArray 30 | 31 | # The leading dimension is None to denote that this is 32 | # the dynamic dimension 33 | array = DynamicArray((None, 20, 10)) 34 | 35 | array.append(np.random.random((20, 10))) 36 | array.extend(np.random.random((100, 20, 10))) 37 | 38 | print(array.shape) # (101, 20, 10) 39 | ``` 40 | 41 | Slice and perform arithmetic like with normal numpy arrays: 42 | 43 | ```python 44 | from dynarray import DynamicArray 45 | 46 | array = DynamicArray(np.ones((100, 10)), dtype=np.float16) 47 | 48 | assert array[10:11].sum() == 10.0 49 | print(array[10]) 50 | array[10] *= 0.0 51 | assert array[10].sum() == 0.0 52 | ``` 53 | 54 | Shrink to fit to minimize memory usage when no further resizing is needed: 55 | 56 | ```python 57 | from dynarray import DynamicArray 58 | 59 | array = DynamicArray(np.ones((100, 10)), dtype=np.float16) 60 | array.extend(np.ones((50, 10))) 61 | 62 | assert array.capacity == 200 63 | array.shrink_to_fit() 64 | assert array.capacity == 150 65 | ``` 66 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='dynarray', 5 | version='0.1.3', 6 | description='Dynamically growable numpy arrays.', 7 | url='https://github.com/maciejkula/dynarray', 8 | download_url='https://github.com/maciejkula/dynarray/tarball/0.1.3', 9 | packages=['dynarray'], 10 | install_requires=['numpy'], 11 | tests_require=['pytest', 'hypothesis[numpy]'], 12 | author='Maciej Kula', 13 | license='MIT', 14 | classifiers=['Development Status :: 3 - Alpha', 15 | 'License :: OSI Approved :: MIT License'] 16 | ) 17 | -------------------------------------------------------------------------------- /tests/test_dynamicarray.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from hypothesis import HealthCheck, given, settings 4 | from hypothesis.strategies import integers, lists, sampled_from 5 | from hypothesis.extra.numpy import arrays 6 | 7 | import numpy as np 8 | 9 | from dynarray import DynamicArray 10 | 11 | 12 | def arrays_strategy(): 13 | 14 | shapes = lists(integers(min_value=1, max_value=15), 15 | min_size=1, max_size=4) 16 | 17 | possible_dtypes = [x for x in itertools.chain(*np.sctypes.values()) 18 | if x not in (np.void, np.object_, np.object)] 19 | dtypes = sampled_from(possible_dtypes) 20 | 21 | return shapes.flatmap( 22 | lambda shape: dtypes.flatmap( 23 | lambda dtype: arrays(dtype, shape))) 24 | 25 | 26 | def assert_equal_or_nan(x, y): 27 | 28 | assert np.all(np.logical_or(x == y, 29 | np.isnan(x))) 30 | 31 | 32 | def test_structured_array(): 33 | 34 | source_array = np.array([(1, 2.0, 'Hello'), 35 | (2, 3.0, 'World')], 36 | dtype=[('foo', 'i4'), 37 | ('bar', 'f4'), 38 | ('baz', 'S10')]) 39 | 40 | array = DynamicArray(source_array) 41 | 42 | assert array.shape == source_array.shape 43 | 44 | for _ in range(4): 45 | array.append(source_array[0].tolist()) 46 | 47 | 48 | @settings(suppress_health_check=[HealthCheck.too_slow]) 49 | @given(arrays_strategy()) 50 | def test_shape(source_array): 51 | 52 | array = DynamicArray(source_array) 53 | 54 | assert array.shape == source_array.shape 55 | 56 | 57 | def test_explicit_dtype_overrides_input_dtype(): 58 | 59 | source = np.zeros(10, dtype=np.float64) 60 | array = DynamicArray(source, dtype=np.float16) 61 | 62 | assert array[:].dtype == np.float16 63 | 64 | 65 | @settings(suppress_health_check=[HealthCheck.too_slow]) 66 | @given(arrays_strategy()) 67 | def test_appending(source_array): 68 | 69 | dtype = source_array.dtype 70 | input_arg = (None,) + source_array.shape[1:] 71 | 72 | array = DynamicArray(input_arg, dtype, allow_views_on_resize=True) 73 | 74 | _ = array[:] # NOQA 75 | 76 | for row in source_array: 77 | array.append(row) 78 | 79 | try: 80 | assert np.all(np.logical_or(np.isnan(source_array), 81 | array[:] == source_array)) 82 | except TypeError: 83 | assert np.all(array[:] == source_array) 84 | 85 | 86 | @settings(suppress_health_check=[HealthCheck.too_slow]) 87 | @given(arrays_strategy()) 88 | def test_appending_with_views_fails(source_array): 89 | 90 | dtype = source_array.dtype 91 | input_arg = (None,) + source_array.shape[1:] 92 | 93 | array = DynamicArray(input_arg, dtype) 94 | array.shrink_to_fit() # Force a reallocation on first append 95 | 96 | _ = array[:] # NOQA 97 | 98 | try: 99 | for row in source_array: 100 | array.append(row) 101 | assert False, 'An exception should have been raised.' 102 | except ValueError as e: 103 | assert 'allow_views_on_resize' in e.message 104 | 105 | 106 | @settings(suppress_health_check=[HealthCheck.too_slow]) 107 | @given(arrays_strategy()) 108 | def test_appending_lists(source_array): 109 | 110 | dtype = source_array.dtype 111 | input_arg = (None,) + source_array.shape[1:] 112 | 113 | array = DynamicArray(input_arg, dtype, allow_views_on_resize=True) 114 | 115 | _ = array[:] # NOQA 116 | 117 | for row in source_array: 118 | row_list = row.tolist() 119 | 120 | if isinstance(row_list, basestring): 121 | # Numpy has problems parsing unicode 122 | return 123 | 124 | array.append(row_list) 125 | 126 | try: 127 | assert np.all(np.logical_or(np.isnan(source_array), 128 | array[:] == source_array)) 129 | except TypeError: 130 | assert np.all(array[:] == source_array) 131 | 132 | 133 | @settings(suppress_health_check=[HealthCheck.too_slow]) 134 | @given(arrays_strategy()) 135 | def test_extending(source_array): 136 | 137 | dtype = source_array.dtype 138 | input_arg = (None,) + source_array.shape[1:] 139 | 140 | array = DynamicArray(input_arg, dtype) 141 | 142 | for row in source_array: 143 | array.extend(source_array) 144 | 145 | comparison_array = np.concatenate([source_array] * source_array.shape[0]) 146 | 147 | try: 148 | assert np.all(np.logical_or(np.isnan(comparison_array), 149 | array[:] == comparison_array)) 150 | except TypeError: 151 | assert np.all(array[:] == comparison_array) 152 | 153 | 154 | @settings(suppress_health_check=[HealthCheck.too_slow]) 155 | @given(arrays_strategy()) 156 | def test_array_constructor(source_array): 157 | 158 | array = DynamicArray(source_array) 159 | 160 | try: 161 | assert np.all(np.logical_or(np.isnan(source_array), 162 | array[:] == source_array)) 163 | except TypeError: 164 | assert np.all(array[:] == source_array) 165 | 166 | 167 | @settings(suppress_health_check=[HealthCheck.too_slow]) 168 | @given(arrays_strategy()) 169 | def test_extending_array_constructor(source_array): 170 | 171 | array = DynamicArray(source_array) 172 | 173 | for row in source_array: 174 | array.extend(source_array) 175 | 176 | comparison_array = np.concatenate([source_array] * 177 | (1 + source_array.shape[0])) 178 | 179 | try: 180 | assert np.all(np.logical_or(np.isnan(comparison_array), 181 | array[:] == comparison_array)) 182 | except TypeError: 183 | assert np.all(array[:] == comparison_array) 184 | 185 | 186 | @settings(suppress_health_check=[HealthCheck.too_slow]) 187 | @given(arrays_strategy()) 188 | def test_attr_delegation(source_array): 189 | 190 | array = DynamicArray(source_array) 191 | 192 | try: 193 | source_array + source_array 194 | except TypeError: 195 | return 196 | 197 | try: 198 | assert_equal_or_nan(array + source_array, 199 | source_array + source_array) 200 | except TypeError: 201 | assert (array + source_array == 202 | source_array + source_array) 203 | 204 | try: 205 | assert_equal_or_nan(array - source_array, 206 | source_array - source_array) 207 | except TypeError: 208 | assert (array - source_array == 209 | source_array - source_array) 210 | 211 | try: 212 | assert_equal_or_nan(array * source_array, 213 | source_array * source_array) 214 | except TypeError: 215 | assert (array * source_array == 216 | source_array * source_array) 217 | 218 | try: 219 | assert_equal_or_nan(array / source_array, 220 | source_array / source_array) 221 | except TypeError: 222 | assert (array / source_array == 223 | source_array / source_array) 224 | 225 | try: 226 | assert_equal_or_nan(array ** 2, 227 | source_array ** 2) 228 | except TypeError: 229 | assert (array ** 2 == 230 | source_array ** 2) 231 | 232 | # In-place operators 233 | try: 234 | array = DynamicArray(source_array) 235 | array += source_array 236 | assert_equal_or_nan(array, 237 | source_array + source_array) 238 | except TypeError: 239 | array += source_array 240 | assert (array == 241 | source_array + source_array) 242 | 243 | try: 244 | array = DynamicArray(source_array) 245 | array -= source_array 246 | assert_equal_or_nan(array, 247 | source_array - source_array) 248 | except TypeError: 249 | array -= source_array 250 | assert (array == 251 | source_array - source_array) 252 | 253 | try: 254 | array = DynamicArray(source_array) 255 | array *= source_array 256 | assert_equal_or_nan(array, 257 | source_array * source_array) 258 | except TypeError: 259 | array *= source_array 260 | assert (array == 261 | source_array * source_array) 262 | 263 | try: 264 | array = DynamicArray(source_array) 265 | array /= source_array 266 | assert_equal_or_nan(array, 267 | source_array / source_array) 268 | except TypeError: 269 | array /= source_array 270 | assert (array == 271 | source_array / source_array) 272 | -------------------------------------------------------------------------------- /tests/test_examples.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | 5 | import numpy as np # NOQA 6 | 7 | 8 | def test_examples(): 9 | 10 | readme_path = os.path.join(os.path.join( 11 | os.path.dirname(os.path.abspath(__file__)), '..' 12 | ), 'readme.md') 13 | 14 | with open(readme_path, 'rb') as readme_file: 15 | readme_text = readme_file.read() 16 | examples = re.findall('```[^`]*```', readme_text, flags=re.DOTALL) 17 | 18 | for number, example in enumerate(examples): 19 | source = example.replace('`', '').replace('python', '') 20 | code = compile(source, 'example_{}'.format(number), 'exec') 21 | exec(code) 22 | --------------------------------------------------------------------------------