├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── join ├── __init__.py ├── _core.py └── _join_funcs.py ├── run_tests.sh ├── setup.cfg ├── setup.py └── test ├── __init__.py └── test_join.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .gitignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | 44 | # Translations 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | 51 | # Sphinx documentation 52 | docs/_build/ 53 | 54 | # PyBuilder 55 | target/ 56 | 57 | # IntelliJ 58 | .idea 59 | *.iml 60 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | - "2.7" 5 | - "pypy" 6 | script: python -m test.test_join 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Stuart Owen 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include setup.cfg 2 | include setup.py 3 | include run_tests.sh 4 | include join/__init__.py 5 | include join/_core.py 6 | include join/_join_funcs.py 7 | include test/test_join.py 8 | include README.rst 9 | include LICENSE.txt -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ==== 2 | JOIN 3 | ==== 4 | SQL-style joins for Python iterables. 5 | 6 | .. image:: https://travis-ci.org/StuartAxelOwen/join.svg?branch=master 7 | :target: https://travis-ci.org/StuartAxelOwen/join 8 | 9 | .. code-block:: python 10 | 11 | >>> from join import join, merge, tuple_join 12 | >>> dogs = [ 13 | ... Dog('gatsby', 'Rruff!', 16), 14 | ... Dog('ein', 'wruf!', 9), 15 | ... ] 16 | >>> cats = [ 17 | ... Cat('pleo', 'mreeeoww', 16), 18 | ... Cat('xena', 'mreow', 12), 19 | ... Cat('gatsby', 'rowr', 15), 20 | ... ] 21 | >>> catdogs = merge(cats, dogs, key='name') 22 | >>> catdogs 23 | [CatDog({'right': Dog(name='gatsby', woof='Rruff!', weight=16), 'name': 'gatsby', 'weight': 15, 'meow': 'rowr', 'woof': 'Rruff!', 'left': Cat(name='gatsby', meow='rowr', weight=15)})] 24 | >>> catdogs[0].meow 25 | 'rowr' 26 | >>> catdogs[0].woof 27 | 'Rruff!' 28 | 29 | Installation 30 | ------------ 31 | Install using pip: 32 | 33 | ``pip install join`` 34 | 35 | Use 36 | --- 37 | 38 | ``join`` does the work of associating iterable items together, but gives you all the power for customization, letting you supply your own join function, separate keys for left and right iterables, and even letting you use functions instead of attribute names. 39 | 40 | ``merge`` used above, for example, is ``join`` using an object union to join matched objects. You can use a tuple join, which is default for ``join``: 41 | 42 | .. code-block:: python 43 | 44 | >>> join(cats, dogs, key='name', join_fn=tuple_join) 45 | [(Cat(name='gatsby', meow='rowr', weight=15), Dog(name='gatsby', woof='Rruff!', weight=16))] 46 | 47 | Supplying your own join function is easy: 48 | 49 | .. code-block:: python 50 | 51 | >>> def weight_sum(left, right): 52 | ... return left.weight + right.weight 53 | ... 54 | >>> join(cats, dogs, key='name', join_fn=weight_sum) 55 | [31] 56 | 57 | Using separate key functions is easy too: 58 | 59 | .. code-block:: python 60 | 61 | >>> def cat_key(cat): 62 | ... return cat.weight % 3 == 0 # weight divisible by 3 63 | ... 64 | >>> def dog_key(dog): 65 | ... return dog.weight % 4 == 0 # weight divisible by 4 66 | ... 67 | >>> def name_join(left, right): 68 | ... return left.name + '-' + right.name 69 | ... 70 | >>> join(cats, dogs, left_key=cat_key, right_key=dog_key, join_fn=name_join) 71 | ['pleo-ein', 'xena-gatsby', 'gatsby-gatsby'] 72 | 73 | See also 74 | -------- 75 | 76 | The much larger library Toolz_ contains a similar, but less flexible, ``join`` function. 77 | 78 | Contributing 79 | ------------ 80 | 81 | Pull requests and issues welcome! Run tests from project root with ``sh run_tests.sh``. 82 | 83 | .. _Toolz: https://pypi.python.org/pypi/toolz 84 | -------------------------------------------------------------------------------- /join/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'stuart' 2 | 3 | from ._core import join, merge, group 4 | from ._join_funcs import tuple_join 5 | from ._join_funcs import union_join 6 | 7 | __version__ = '0.1.1' 8 | -------------------------------------------------------------------------------- /join/_core.py: -------------------------------------------------------------------------------- 1 | __author__ = 'stuart' 2 | 3 | from collections import defaultdict 4 | from ._join_funcs import union_join, tuple_join, make_union_join 5 | 6 | 7 | def merge(left, right, how='inner', key=None, left_key=None, right_key=None, 8 | left_as='left', right_as='right'): 9 | """ Performs a join using the union join function. """ 10 | return join(left, right, how, key, left_key, right_key, 11 | join_fn=make_union_join(left_as, right_as)) 12 | 13 | 14 | def join(left, right, how='inner', key=None, left_key=None, right_key=None, 15 | join_fn=tuple_join): 16 | """ 17 | :param left: left iterable to be joined 18 | :param right: right iterable to be joined 19 | :param str | function key: either an attr name, dict key, or function that produces hashable value 20 | :param how: 'inner', 'left', 'right', or 'outer' 21 | :param join_fn: function called on joined left and right iterable items to complete join 22 | :rtype: list 23 | """ 24 | if key is None and (left_key is None or right_key is None): 25 | raise ValueError("Must provide either key param or both left_key and right_key") 26 | 27 | if key is not None: 28 | lkey = rkey = key if callable(key) else make_key_fn(key) 29 | else: 30 | lkey = left_key if callable(left_key) else make_key_fn(left_key) 31 | rkey = right_key if callable(right_key) else make_key_fn(right_key) 32 | 33 | try: 34 | join_impl = { 35 | "left": _left_join, 36 | "right": _right_join, 37 | "inner": _inner_join, 38 | "outer": _outer_join, 39 | }[how] 40 | except KeyError: 41 | raise ValueError("Invalid value for how: {}, must be left, right, " 42 | "inner, or outer.".format(str(how))) 43 | else: 44 | return join_impl(left, right, lkey, rkey, join_fn) 45 | 46 | 47 | def _inner_join(left, right, left_key_fn, right_key_fn, join_fn=union_join): 48 | """ Inner join using left and right key functions 49 | 50 | :param left: left iterable to be joined 51 | :param right: right iterable to be joined 52 | :param function left_key_fn: function that produces hashable value from left objects 53 | :param function right_key_fn: function that produces hashable value from right objects 54 | :param join_fn: function called on joined left and right iterable items to complete join 55 | :rtype: list 56 | """ 57 | joiner = defaultdict(list) 58 | for ele in right: 59 | joiner[right_key_fn(ele)].append(ele) 60 | joined = [] 61 | for ele in left: 62 | for other in joiner[left_key_fn(ele)]: 63 | joined.append(join_fn(ele, other)) 64 | return joined 65 | 66 | 67 | def _left_join(left, right, left_key_fn, right_key_fn, join_fn=union_join): 68 | """ 69 | :param left: left iterable to be joined 70 | :param right: right iterable to be joined 71 | :param function left_key_fn: function that produces hashable value from left objects 72 | :param function right_key_fn: function that produces hashable value from right objects 73 | :param join_fn: function called on joined left and right iterable items to complete join 74 | :rtype: list 75 | """ 76 | joiner = defaultdict(list) 77 | for ele in right: 78 | joiner[right_key_fn(ele)].append(ele) 79 | joined = [] 80 | for ele in left: 81 | for other in joiner.get(left_key_fn(ele), [None]): 82 | joined.append(join_fn(ele, other)) 83 | return joined 84 | 85 | 86 | def _right_join(left, right, left_key_fn, right_key_fn, join_fn=union_join): 87 | """ 88 | :param left: left iterable to be joined 89 | :param right: right iterable to be joined 90 | :param function left_key_fn: function that produces hashable value from left objects 91 | :param function right_key_fn: function that produces hashable value from right objects 92 | :param join_fn: function called on joined left and right iterable items to complete join 93 | :rtype: list 94 | """ 95 | def reversed_join_fn(left_ele, right_ele): 96 | return join_fn(right_ele, left_ele) 97 | return _left_join(right, left, right_key_fn, left_key_fn, reversed_join_fn) 98 | 99 | 100 | def _outer_join(left, right, left_key_fn, right_key_fn, join_fn=union_join): 101 | """ 102 | :param left: left iterable to be joined 103 | :param right: right iterable to be joined 104 | :param function left_key_fn: function that produces hashable value from left objects 105 | :param function right_key_fn: function that produces hashable value from right objects 106 | :param join_fn: function called on joined left and right iterable items to complete join 107 | :rtype: list 108 | """ 109 | left_joiner = defaultdict(list) 110 | for ele in left: 111 | left_joiner[left_key_fn(ele)].append(ele) 112 | right_joiner = defaultdict(list) 113 | for ele in right: 114 | right_joiner[right_key_fn(ele)].append(ele) 115 | keys = set(left_joiner.keys()).union(set(right_joiner.keys())) 116 | 117 | def iter_join(l, r, join_keys): 118 | for join_key in join_keys: 119 | for ele in l.get(join_key, [None]): 120 | for other in r.get(join_key, [None]): 121 | yield join_fn(ele, other) 122 | 123 | return list(iter_join(left_joiner, right_joiner, keys)) 124 | 125 | def group(iterable, key=lambda ele: ele): 126 | """ Groups an iterable by a specified attribute, or using a specified key access function. Returns tuples of grouped elements. 127 | 128 | >>> dogs = [Dog('gatsby', 'Rruff!', 15), Dog('william', 'roof', 12), Dog('edward', 'hi', 15)] 129 | >>> groupby(dogs, 'weight') 130 | [(Dog('gatsby', 'Rruff!', 15), Dog('edward', 'hi', 15)), (Dog('william', 'roof', 12), )] 131 | 132 | :param iterable: iterable to be grouped 133 | :param key: a key-access function or attr name to be used as a group key 134 | """ 135 | if callable(key): 136 | return _group(iterable, key) 137 | else: 138 | return _group(iterable, make_key_fn(key)) 139 | 140 | 141 | def _group(iterable, key_fn): 142 | groups = defaultdict(list) 143 | for ele in iterable: 144 | groups[key_fn(ele)].append(ele) 145 | return map(tuple, groups.values()) 146 | 147 | 148 | def make_key_fn(key): 149 | def key_fn(ele): 150 | if isinstance(ele, dict): 151 | return ele[key] 152 | else: 153 | return getattr(ele, key) 154 | return key_fn 155 | -------------------------------------------------------------------------------- /join/_join_funcs.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | __author__ = 'stuart' 4 | 5 | 6 | def get_object_attrs(obj): 7 | if hasattr(obj, '__dict__'): 8 | return obj.__dict__ 9 | elif hasattr(obj, '__slots__'): 10 | return {key: getattr(obj, key) for key in obj.__slots__} 11 | else: 12 | return {} 13 | 14 | 15 | class Union(object): 16 | def __init__(self, attributes): 17 | if isinstance(attributes, dict): 18 | for name, value in attributes.items(): 19 | setattr(self, name, value) 20 | else: 21 | for name, value in attributes: 22 | setattr(self, name, value) 23 | 24 | def __repr__(self): 25 | return "{}({})".format(self.__class__.__name__, self.__dict__) 26 | 27 | 28 | def tuple_join(left, right): 29 | """ 30 | Returns a tuple of the joined objects 31 | 32 | >>> tuple_join(1, '2') 33 | (1, '2') 34 | 35 | :param left: left object to be joined with right 36 | :param right: right object to be joined with left 37 | :return: tuple containing both join parents 38 | """ 39 | return left, right 40 | 41 | 42 | def union_join(left, right, left_as='left', right_as='right'): 43 | """ 44 | Join function truest to the SQL style join. Merges both objects together in a sum-type, 45 | saving references to each parent in ``left`` and ``right`` attributes. 46 | 47 | >>> Dog = namedtuple('Dog', ['name', 'woof', 'weight']) 48 | >>> dog = Dog('gatsby', 'Ruff!', 15) 49 | >>> Cat = namedtuple('Cat', ['name', 'meow', 'weight']) 50 | >>> cat = Cat('pleo', 'roooowwwr', 12) 51 | >>> catdog = union_join(cat, dog, 'cat', 'dog') 52 | >>> catdog.name 53 | pleo 54 | >>> catdog.woof 55 | Ruff! 56 | >>> catdog.dog.name 57 | gatsby 58 | 59 | :param left: left object to be joined with right 60 | :param right: right object to be joined with left 61 | :return: joined object with attrs/methods from both parents available 62 | """ 63 | attrs = {} 64 | attrs.update(get_object_attrs(right)) 65 | attrs.update(get_object_attrs(left)) 66 | attrs[left_as] = left 67 | attrs[right_as] = right 68 | if isinstance(left, dict) and isinstance(right, dict): 69 | return attrs 70 | else: 71 | joined_class = type(left.__class__.__name__ + right.__class__.__name__, (Union,), 72 | {}) 73 | return joined_class(attrs) 74 | 75 | 76 | def make_union_join(left_as='left', right_as='right'): 77 | return partial(union_join, left_as=left_as, right_as=right_as) 78 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ ! -d "py3env" ]; then 4 | virtualenv -p python3.4 py3env 5 | fi 6 | 7 | if [ ! -d "py2env" ]; then 8 | virtualenv -p python2.7 py2env 9 | fi 10 | 11 | if [ ! -d "pypyenv" ]; then 12 | virtualenv -p pypy pypyenv 13 | fi 14 | 15 | py2env/bin/python -m test.test_join 16 | py3env/bin/python -m test.test_join 17 | pypyenv/bin/python -m test.test_join 18 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | from distutils.core import setup 4 | 5 | with open('README.rst') as readme: 6 | long_description = readme.read() 7 | 8 | with open('LICENSE.txt') as license_file: 9 | license = license_file.read() 10 | 11 | setup(name='join', 12 | packages=['join'], 13 | version='0.1.1', 14 | description='SQL-style joins for iterables.', 15 | long_description=long_description, 16 | license=license, 17 | author='Stuart Axelbrooke', 18 | author_email='stuart@axelbrooke.com', 19 | url='https://github.com/StuartAxelOwen/join', 20 | download_url='https://github.com/StuartAxelOwen/join/archive/0.1.zip', 21 | keywords=['join', 'joins', 'merge', 'merges', 'list join', 'iterable join'], 22 | ) 23 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soaxelbrooke/join/c84fca68ab6a52b1cee526065dc9f5a691764e69/test/__init__.py -------------------------------------------------------------------------------- /test/test_join.py: -------------------------------------------------------------------------------- 1 | __author__ = 'stuart' 2 | 3 | import unittest 4 | from collections import namedtuple 5 | from join import join 6 | from join import merge 7 | from join import group 8 | from join import tuple_join 9 | from join import union_join 10 | 11 | 12 | Dog = namedtuple('Dog', ['name', 'woof', 'weight']) 13 | Cat = namedtuple('Cat', ['name', 'meow', 'weight']) 14 | dogs = [ 15 | Dog('gatsby', 'Rruff!', 16), 16 | Dog('ein', 'wruf!', 9), 17 | Dog('talon', 'woof', 25), 18 | ] 19 | cats = [ 20 | Cat('pleo', 'mreeeoww', 16), 21 | Cat('xena', 'mreow', 12), 22 | Cat('ma\'at', 'meww', 13), 23 | Cat('set', 'meow', 13), 24 | Cat('gatsby', 'rowr', 15), 25 | Cat('gatsby', 'rooooo', 15), 26 | ] 27 | 28 | 29 | class JoinTests(unittest.TestCase): 30 | 31 | def test_inner_join(self): 32 | inner = join(dogs, cats, 'inner', 'name') 33 | assert len(inner) == 2 34 | assert inner[0][0].name == 'gatsby' 35 | assert inner[0][1].meow == 'rowr' 36 | assert inner[0][0].weight == 16 37 | assert inner[0][1].weight == 15 38 | 39 | def test_outer_join(self): 40 | outer = join(dogs, cats, 'outer', 'weight') 41 | assert len(outer) == 8 42 | 43 | def test_left_join(self): 44 | left = join(dogs, cats, 'left', 'name') 45 | assert len(left) == 4 46 | 47 | def test_right_join(self): 48 | right = join(dogs, cats, 'right', 'name') 49 | assert len(right) == 6 50 | 51 | def test_inner_merge(self): 52 | inner = merge(dogs, cats, 'inner', 'name') 53 | assert len(inner) == 2 54 | assert inner[0].name == 'gatsby' 55 | assert inner[0].meow == 'rowr' 56 | assert inner[0].weight == 16 57 | assert inner[0].right.weight == 15 58 | 59 | def test_inner_merge_iter(self): 60 | inner = merge(iter(dogs), iter(cats), 'inner', 'name') 61 | assert len(inner) == 2 62 | assert inner[0].name == 'gatsby' 63 | assert inner[0].meow == 'rowr' 64 | assert inner[0].weight == 16 65 | assert inner[0].right.weight == 15 66 | 67 | def test_left_merge(self): 68 | left = merge(dogs, cats, 'left', 'name') 69 | assert len(left) == len(dogs) + 1 70 | assert left[0].meow == 'rowr' 71 | 72 | def test_right_merge(self): 73 | right = merge(dogs, cats, 'right', 'name') 74 | assert len(right) == len(cats) 75 | assert right[-1].woof == 'Rruff!' 76 | 77 | def test_outer_merge(self): 78 | outer = merge(dogs, cats, 'outer', 'name') 79 | assert len(outer) == len(dogs) + len(cats) - 1 80 | 81 | def test_merge_naming(self): 82 | inner = merge(dogs, cats, 'inner', 'name', left_as='dog', right_as='cat') 83 | assert inner[0].name == 'gatsby' 84 | assert inner[0].cat.name == 'gatsby' 85 | assert inner[0].dog.name == 'gatsby' 86 | assert inner[0].weight == 16 87 | assert inner[0].cat.weight == 15 88 | assert inner[0].dog.weight == 16 89 | 90 | 91 | class GroupByTest(unittest.TestCase): 92 | 93 | def test_basic_group(self): 94 | abcs = 'aabbabccbc' 95 | self.assertSequenceEqual(set(group(abcs)), 96 | set([('a', 'a', 'a',), ('b', 'b', 'b', 'b'), ('c', 'c', 'c')])) 97 | 98 | def test_attr_group(self): 99 | byweight = group(cats, 'weight') 100 | names = set(map(lambda cats: tuple(cat.name for cat in cats), byweight)) 101 | expected = set([('pleo',), ('xena',), ('ma\'at', 'set'), ('gatsby', 'gatsby')]) 102 | self.assertSequenceEqual(names, expected) 103 | 104 | def test_func_group(self): 105 | self.assertSequenceEqual(set([(1, 3, 5), (0, 2, 4)]), set(group(range(6), lambda n: n % 2))) 106 | 107 | 108 | class JoinFuncsTest(unittest.TestCase): 109 | 110 | def test_union_join(self): 111 | catdog = union_join(cats[0], dogs[0]) 112 | assert hasattr(catdog, 'woof') 113 | assert hasattr(catdog, 'meow') 114 | 115 | def test_union_join_with_none(self): 116 | catnone = union_join(cats[0], None) 117 | assert hasattr(catnone, 'meow') 118 | 119 | def test_tuple_join(self): 120 | joined = tuple_join('a', 'b') 121 | assert joined == ('a', 'b') 122 | 123 | def test_tuple_join_with_none(self): 124 | joined = tuple_join('a', None) 125 | assert joined == ('a', None) 126 | 127 | if __name__ == '__main__': 128 | unittest.main() 129 | --------------------------------------------------------------------------------