├── VERSION ├── MANIFEST.in ├── lib └── relations │ ├── __init__.py │ ├── tuple.py │ └── relation.py ├── .gitignore ├── setup.py ├── test ├── test_joins.py ├── test_set_operations.py └── test_relation.py ├── UNLICENSE ├── README.md └── distribute_setup.py /VERSION: -------------------------------------------------------------------------------- 1 | 0.0.2 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include distribute_setup.py 2 | include VERSION 3 | -------------------------------------------------------------------------------- /lib/relations/__init__.py: -------------------------------------------------------------------------------- 1 | from relations.relation import * 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | *.pyo 4 | .DS_Store 5 | build 6 | dist 7 | MANIFEST 8 | test/example/*.sqlite3 9 | doc/.build 10 | distribute-*.egg 11 | distribute-*.tar.gz 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from distribute_setup import use_setuptools 5 | use_setuptools() 6 | 7 | from setuptools import setup, find_packages 8 | import os.path as p 9 | 10 | VERSION = open(p.join(p.dirname(p.abspath(__file__)), 'VERSION')).read().strip() 11 | 12 | setup( 13 | name='relations', 14 | version=VERSION, 15 | description='A simple relational algebra engine in Python.', 16 | author='Zachary Voase', 17 | author_email='z@zacharyvoase.com', 18 | url='http://github.com/zacharyvoase/relations', 19 | packages=find_packages(where='lib'), 20 | package_dir={'': 'lib'}, 21 | install_requires=[ 22 | 'urecord>=0.0.4', 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /lib/relations/tuple.py: -------------------------------------------------------------------------------- 1 | import urecord 2 | 3 | 4 | class Tuple(urecord.RecordInstance): 5 | 6 | """ 7 | A named tuple type, with helpful methods for manipulating fields. 8 | """ 9 | 10 | def __repr__(self): 11 | return 'Tuple(%s)' % ( 12 | ', '.join("%s=%r" % (field, self[i]) 13 | for i, field in enumerate(self._fields))) 14 | 15 | @classmethod 16 | def _make_projection(cls, *fields): 17 | return tuple(cls._fields.index(field) for field in fields) 18 | 19 | @classmethod 20 | def _make_reordering(cls, **new_fields): 21 | # At this point, new_fields is assumed to be a complete bjiection 22 | # from new <=> old fields. 23 | return tuple(cls._fields.index(old_field) 24 | for new_field, old_field in sorted(new_fields.items())) 25 | 26 | def _index_restrict(self, *indices): 27 | return tuple(self[index] for index in indices) 28 | -------------------------------------------------------------------------------- /test/test_joins.py: -------------------------------------------------------------------------------- 1 | import relations 2 | 3 | 4 | employees = relations.Relation('name', 'emp_id', 'dept_name') 5 | employees.add(name='Harry', emp_id=3415, dept_name='Finance') 6 | employees.add(name='Sally', emp_id=2241, dept_name='Sales') 7 | employees.add(name='George', emp_id=3401, dept_name='Finance') 8 | employees.add(name='Harriet', emp_id=2202, dept_name='Sales') 9 | 10 | departments = relations.Relation('dept_name', 'manager') 11 | departments.add(dept_name='Finance', manager='George') 12 | departments.add(dept_name='Sales', manager='Harriet') 13 | departments.add(dept_name='Production', manager='Charles') 14 | 15 | 16 | def test_natural_join(): 17 | joined = employees.natural_join(departments) 18 | 19 | assert len(joined) == 4 20 | 21 | 22 | def test_natural_join_on_disjoint_relations_is_cartesian_product(): 23 | joined = employees.project('name', 'emp_id').natural_join(departments) 24 | 25 | assert len(joined) == (len(employees) * len(departments)) 26 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /test/test_set_operations.py: -------------------------------------------------------------------------------- 1 | from nose.tools import assert_raises 2 | 3 | import relations 4 | 5 | 6 | def test_equivalent_relations_are_union_compatible(): 7 | rel1 = relations.Relation('name', 'age', 'gender') 8 | rel2 = relations.Relation('gender', 'age', 'name') 9 | assert rel1.is_union_compatible(rel2) 10 | 11 | 12 | def test_unequivalent_relations_are_union_compatible(): 13 | rel1 = relations.Relation('name', 'age', 'gender') 14 | rel2 = relations.Relation('symbol', 'price') 15 | assert not rel1.is_union_compatible(rel2) 16 | 17 | 18 | def test_union_contains_elements_from_both_relations(): 19 | rel1 = relations.Relation('name', 'age', 'gender') 20 | rel2 = relations.Relation('gender', 'age', 'name') 21 | rel1.add(name='Alice', age=25, gender='F') 22 | rel2.add(name='Bob', age=32, gender='M') 23 | 24 | union = rel1.union(rel2) 25 | assert union.contains(name='Alice', age=25, gender='F') 26 | assert union.contains(name='Bob', age=32, gender='M') 27 | 28 | 29 | def test_set_operations_raise_error_if_not_union_compatible(): 30 | rel1 = relations.Relation('name', 'age', 'gender') 31 | rel2 = relations.Relation('symbol', 'price') 32 | rel1.add(name='Alice', age=25, gender='F') 33 | rel2.add(symbol='AAPL', price='424.47') 34 | 35 | assert_raises(relations.NotUnionCompatible, 36 | lambda: rel1.union(rel2)) 37 | assert_raises(relations.NotUnionCompatible, 38 | lambda: rel1.intersection(rel2)) 39 | assert_raises(relations.NotUnionCompatible, 40 | lambda: rel1.difference(rel2)) 41 | 42 | 43 | def test_intersection_contains_only_elements_present_in_both_relations(): 44 | rel1 = relations.Relation('name', 'age', 'gender') 45 | rel2 = relations.Relation('gender', 'age', 'name') 46 | rel1.add(name='Alice', age=25, gender='F') 47 | rel1.add(name='Bob', age=32, gender='M') 48 | rel1.add(name='Charlie', age=65, gender='M') 49 | rel2.add(name='Bob', age=32, gender='M') 50 | rel2.add(name='Charlie', age=65, gender='M') 51 | 52 | intersection = rel1.intersection(rel2) 53 | assert len(intersection) == 2 54 | assert not intersection.contains(name='Alice', age=25, gender='F') 55 | 56 | 57 | def test_difference_contains_elements_present_in_self_but_not_in_other(): 58 | rel1 = relations.Relation('name', 'age', 'gender') 59 | rel2 = relations.Relation('gender', 'age', 'name') 60 | rel1.add(name='Alice', age=25, gender='F') 61 | rel1.add(name='Bob', age=32, gender='M') 62 | rel1.add(name='Charlie', age=65, gender='M') 63 | rel2.add(name='Bob', age=32, gender='M') 64 | rel2.add(name='Charlie', age=65, gender='M') 65 | 66 | diff = rel1.difference(rel2) 67 | assert len(diff) == 1 68 | assert diff.contains(name='Alice', age=25, gender='F') 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Relations 2 | 3 | Relations is a simple Python implementation of a relational algebra engine. 4 | 5 | 6 | ## Example 7 | 8 | Create a relation with a heading (i.e. a list of field names): 9 | 10 | >>> import relations 11 | >>> employees = relations.Relation('employee_name', 'dept_name') 12 | >>> employees 13 | 14 | 15 | The fields are re-ordered alphabetically, so that the order of values in tuples 16 | is consistent between equivalent but separate relations. Add tuples to the 17 | relation: 18 | 19 | >>> alice = employees.add(employee_name='Alice', dept_name='Finance') 20 | >>> bob = employees.add(employee_name='Bob', dept_name='Sales') 21 | >>> len(employees) 22 | 2 23 | 24 | A relation is a set; duplicate tuples are considered identical: 25 | 26 | >>> _ = employees.add(employee_name='Alice', dept_name='Finance') 27 | >>> len(employees) 28 | 2 29 | 30 | A relation implements the relational algebra, including the unary operators 31 | **Select**: 32 | 33 | >>> finance_emps = employees.select(lambda emp: emp.dept_name == 'Finance') 34 | >>> len(finance_emps) 35 | 1 36 | 37 | **Project**: 38 | 39 | >>> names = employees.project('employee_name') 40 | >>> names.contains(employee_name='Bob') 41 | True 42 | >>> names.contains(employee_name='Charlie') 43 | False 44 | 45 | and **Rename**: 46 | 47 | >>> employees_renamed = employees.rename(name='employee_name') 48 | >>> employees_renamed.contains(name='Bob') 49 | True 50 | 51 | It also supports the set operations **Union**, **Intersection** and 52 | **Difference**. 53 | 54 | 55 | ## Coming Soon 56 | 57 | Joins: 58 | 59 | * Natural join 60 | * Theta join 61 | * Equijoin 62 | * Semijoin 63 | * Antijoin 64 | * Divide 65 | * Left outer join 66 | * Right outer join 67 | * Full outer join 68 | 69 | 70 | ## (Un)license 71 | 72 | This is free and unencumbered software released into the public domain. 73 | 74 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 75 | software, either in source code form or as a compiled binary, for any purpose, 76 | commercial or non-commercial, and by any means. 77 | 78 | In jurisdictions that recognize copyright laws, the author or authors of this 79 | software dedicate any and all copyright interest in the software to the public 80 | domain. We make this dedication for the benefit of the public at large and to 81 | the detriment of our heirs and successors. We intend this dedication to be an 82 | overt act of relinquishment in perpetuity of all present and future rights to 83 | this software under copyright law. 84 | 85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 86 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 87 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 88 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 89 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 90 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 91 | 92 | For more information, please refer to 93 | -------------------------------------------------------------------------------- /test/test_relation.py: -------------------------------------------------------------------------------- 1 | from nose.tools import assert_raises 2 | 3 | import relations 4 | 5 | 6 | def test_a_relation_has_a_heading(): 7 | employees = relations.Relation('employee_name', 'dept_name') 8 | assert employees.heading == set(['employee_name', 'dept_name']) 9 | 10 | 11 | def test_a_relation_has_cardinality(): 12 | employees = relations.Relation('employee_name', 'dept_name') 13 | assert len(employees) == 0 14 | employees.add(employee_name='Alice', dept_name='Finance') 15 | assert len(employees) == 1 16 | 17 | 18 | def test_a_relation_has_membership(): 19 | employees = relations.Relation('employee_name', 'dept_name') 20 | employees.add(employee_name='Alice', dept_name='Finance') 21 | assert employees.contains(employee_name='Alice', dept_name='Finance') 22 | assert not employees.contains(employee_name='Bob', dept_name='Sales') 23 | 24 | 25 | def test_a_relation_has_a_tuple(): 26 | employees = relations.Relation('employee_name', 'dept_name') 27 | emp = employees.tuple(employee_name='Alice', dept_name='Finance') 28 | assert emp.employee_name == 'Alice' 29 | assert emp.dept_name == 'Finance' 30 | 31 | 32 | def test_a_relation_is_a_set(): 33 | employees = relations.Relation('employee_name', 'dept_name') 34 | employees.add(employee_name='Alice', dept_name='Finance') 35 | employees.add(employee_name='Alice', dept_name='Finance') 36 | assert len(employees) == 1 37 | employees.add(employee_name='Bob', dept_name='Sales') 38 | assert len(employees) == 2 39 | 40 | 41 | def test_select_creates_a_new_relation_with_the_same_heading(): 42 | employees = relations.Relation('employee_name', 'dept_name') 43 | employees.add(employee_name='Alice', dept_name='Finance') 44 | employees.add(employee_name='Bob', dept_name='Sales') 45 | 46 | selected = employees.select(lambda emp: emp.dept_name == 'Finance') 47 | assert isinstance(selected, relations.Relation) 48 | assert selected.heading == employees.heading 49 | 50 | 51 | def test_project_creates_a_new_relation(): 52 | employees = relations.Relation('employee_name', 'dept_name') 53 | employees.add(employee_name='Alice', dept_name='Finance') 54 | employees.add(employee_name='Bob', dept_name='Sales') 55 | 56 | names = employees.project('employee_name') 57 | assert isinstance(names, relations.Relation) 58 | assert names.heading == set(['employee_name']) 59 | assert names.contains(employee_name='Alice') 60 | assert names.contains(employee_name='Bob') 61 | assert len(names) == 2 62 | 63 | 64 | def test_project_raises_error_on_undefined_fields(): 65 | employees = relations.Relation('employee_name', 'dept_name') 66 | employees.add(employee_name='Alice', dept_name='Finance') 67 | employees.add(employee_name='Bob', dept_name='Sales') 68 | 69 | assert_raises(relations.UndefinedFields, 70 | lambda: employees.project('foobar')) 71 | 72 | 73 | def test_projected_relations_are_sets(): 74 | employees = relations.Relation('employee_name', 'dept_name') 75 | employees.add(employee_name='Alice', dept_name='Finance') 76 | employees.add(employee_name='Bob', dept_name='Finance') 77 | 78 | departments = employees.project('dept_name') 79 | assert departments.contains(dept_name='Finance') 80 | assert len(departments) == 1 81 | 82 | 83 | def test_rename_creates_a_new_relation(): 84 | employees = relations.Relation('employee_name', 'dept_name') 85 | employees.add(employee_name='Alice', dept_name='Finance') 86 | employees.add(employee_name='Bob', dept_name='Sales') 87 | 88 | new_emps = employees.rename(name='employee_name', section='dept_name') 89 | assert new_emps.contains(name='Alice', section='Finance') 90 | assert new_emps.contains(name='Bob', section='Sales') 91 | 92 | 93 | def test_rename_raises_error_on_undefined_fields(): 94 | employees = relations.Relation('employee_name', 'dept_name') 95 | employees.add(employee_name='Alice', dept_name='Finance') 96 | employees.add(employee_name='Bob', dept_name='Sales') 97 | 98 | assert_raises(relations.UndefinedFields, 99 | lambda: employees.rename(newfield='foobar')) 100 | -------------------------------------------------------------------------------- /lib/relations/relation.py: -------------------------------------------------------------------------------- 1 | import functools 2 | from itertools import imap 3 | 4 | import urecord 5 | 6 | from relations.tuple import Tuple 7 | 8 | 9 | __all__ = ['Relation', 'RelationalError', 'UndefinedFields', 10 | 'NotUnionCompatible'] 11 | 12 | 13 | class RelationalError(Exception): 14 | """An undefined or invalid operation was attempted.""" 15 | pass 16 | 17 | 18 | class UndefinedFields(RelationalError): 19 | """An undefined field was used in an operation on one or more relations.""" 20 | pass 21 | 22 | 23 | class NotUnionCompatible(RelationalError): 24 | """A set operation was attempted between non-union-compatible relations.""" 25 | pass 26 | 27 | 28 | def check_union_compatible(method): 29 | @functools.wraps(method) 30 | def wrapper(self, other): 31 | if not self.is_union_compatible(other): 32 | raise NotUnionCompatible 33 | return method(self, other) 34 | return wrapper 35 | 36 | 37 | class Relation(object): 38 | 39 | def __init__(self, *fields, **kwargs): 40 | self.heading = frozenset(fields) 41 | self.tuple = urecord.Record(*sorted(fields), instance=Tuple) 42 | self.tuples = {} 43 | 44 | def __repr__(self): 45 | return '' % (self.tuple._fields,) 46 | 47 | def __len__(self): 48 | return len(self.tuples) 49 | 50 | def __contains__(self, tuple_): 51 | return tuple_ in self.tuples 52 | 53 | def __iter__(self): 54 | return iter(self.tuples) 55 | 56 | def clone(self): 57 | """Create a new, empty relation with the same heading as this one.""" 58 | 59 | return type(self)(*self.tuple._fields) 60 | 61 | def is_union_compatible(self, other): 62 | return self.heading == other.heading 63 | 64 | @check_union_compatible 65 | def update(self, other): 66 | 67 | """ 68 | Merge this relation with another union-compatible relation. 69 | 70 | This method modifies (and returns) this relation. The other relation 71 | is not modified. 72 | """ 73 | 74 | self.tuples.update(other.tuples) 75 | return self 76 | 77 | @check_union_compatible 78 | def union(self, other): 79 | """Safe set union between two union-compatible relations.""" 80 | 81 | return self.clone().update(self).update(other) 82 | 83 | @check_union_compatible 84 | def intersection(self, other): 85 | """Safe set intersection between two union-compatible relations.""" 86 | 87 | new_relation = self.clone() 88 | new_relation.tuples.update( 89 | (tuple_, tuple_) for tuple_ in 90 | set(self.tuples).intersection(set(other.tuples))) 91 | return new_relation 92 | 93 | @check_union_compatible 94 | def difference(self, other): 95 | """Safe set difference between two union-compatible relations.""" 96 | 97 | new_relation = self.clone() 98 | new_relation.tuples.update( 99 | (tuple_, tuple_) for tuple_ in 100 | set(self.tuples).difference(set(other.tuples))) 101 | return new_relation 102 | 103 | def add(self, **kwargs): 104 | 105 | """ 106 | Add a tuple to this relation. 107 | 108 | This method attempts to be as efficient as possible, re-using the same 109 | Python object if the tuple already exists in this relation. 110 | 111 | Arguments should be given in keyword form: 112 | 113 | >>> employees = Relation('name', 'department') 114 | >>> alice = employees.add(name='Alice', department='Finance') 115 | >>> alice.name 116 | 'Alice' 117 | >>> alice.department 118 | 'Finance' 119 | """ 120 | 121 | tuple_ = self.tuple(**kwargs) 122 | return self.tuples.setdefault(tuple_, tuple_) 123 | 124 | def contains(self, **kwargs): 125 | 126 | """ 127 | Determine if this relation contains the specified tuple. 128 | 129 | Arguments are given in the same form as for :meth:`add`. This is easier 130 | than having to construct a tuple and use Python's `in` operator, e.g.: 131 | 132 | >>> employees.contains(name='Alice', department='Sales') 133 | True 134 | 135 | Whereas without this method you'd do: 136 | 137 | >>> employees.tuple(name='Alice', department='Sales') in employees 138 | True 139 | """ 140 | 141 | return self.tuple(**kwargs) in self 142 | 143 | def select(self, predicate): 144 | 145 | """ 146 | Filter the tuples in this relation based on a predicate. 147 | 148 | Returns a new, union-compatible relation. 149 | """ 150 | 151 | new_relation = self.clone() 152 | new_relation.tuples.update( 153 | (tuple_, tuple_) for tuple_ in filter(predicate, self.tuples)) 154 | return new_relation 155 | 156 | def project(self, *fields): 157 | 158 | """ 159 | Return a new relation with a heading restricted to the given fields. 160 | 161 | The new relation is not union-compatible, and will also be a set, so 162 | it may have a smaller cardinality than the original relation. Here's 163 | an example: 164 | 165 | >>> employees = Relation('name', 'department') 166 | >>> employees.add(name='Alice', department='Sales') 167 | >>> employees.add(name='Bob', department='Sales') 168 | >>> len(employees) 169 | 2 170 | >>> departments = employees.project('department') 171 | >>> len(departments) 172 | 1 173 | >>> departments.contains(department='Sales') 174 | True 175 | """ 176 | 177 | new_relation = type(self)(*fields) 178 | if not new_relation.heading.issubset(self.heading): 179 | undefined_fields = tuple(new_relation.heading.difference(self.heading)) 180 | raise UndefinedFields("Undefined fields used in project(): %r" % 181 | undefined_fields) 182 | 183 | projection = self.tuple._make_projection(*fields) 184 | 185 | new_relation.tuples.update((tuple_, tuple_) 186 | for tuple_ in imap( 187 | lambda t: new_relation.tuple(*t._index_restrict(*projection)), 188 | self.tuples)) 189 | return new_relation 190 | 191 | def rename(self, **new_fields): 192 | 193 | """ 194 | Rename some fields in this relation. 195 | 196 | Accepts keyword arguments in the form 197 | ``new_field_name='old_field_name'``. The new relation returned will 198 | only be union-compatible if no arguments are given to this function. 199 | """ 200 | 201 | if not is_bijection(new_fields): 202 | raise RelationalError("Field mapping is not one-to-one") 203 | elif not set(new_fields.values()).issubset(self.heading): 204 | undefined_fields = tuple(set(new_fields.values()).difference(self.heading)) 205 | raise UndefinedFields("Undefined fields used in rename(): %r" % 206 | undefined_fields) 207 | 208 | # Get a complete bijection from new field names => old field names 209 | renamed_fields = set(new_fields.values()) 210 | for field_name in self.heading: 211 | if field_name not in renamed_fields: 212 | new_fields[field_name] = field_name 213 | 214 | new_relation = type(self)(*new_fields.keys()) 215 | reordering = self.tuple._make_reordering(**new_fields) 216 | new_relation.tuples.update( 217 | (tuple_, tuple_) for tuple_ in imap( 218 | lambda t: new_relation.tuple(*t._index_restrict(*reordering)), 219 | self.tuples)) 220 | return new_relation 221 | 222 | def natural_join(self, other): 223 | new_relation = type(self)(*self.heading.union(other.heading)) 224 | common_fields = self.heading.intersection(other.heading) 225 | projection1 = self.tuple._make_projection(*common_fields) 226 | projection2 = other.tuple._make_projection(*common_fields) 227 | for tuple1 in self: 228 | for tuple2 in other: 229 | if (tuple1._index_restrict(*projection1) == 230 | tuple2._index_restrict(*projection2)): 231 | row = tuple1._asdict() 232 | row.update(tuple2._asdict()) 233 | new_relation.add(**row) 234 | return new_relation 235 | 236 | 237 | def is_bijection(dictionary): 238 | """Check if a dictionary is a proper one-to-one mapping.""" 239 | 240 | return len(set(dictionary.keys())) == len(set(dictionary.values())) 241 | 242 | 243 | def invert_bijection(dictionary): 244 | """Return the inverse of a bijection. Does not check the input.""" 245 | 246 | return dict((value, key) for (key, value) in dictionary.iteritems()) 247 | -------------------------------------------------------------------------------- /distribute_setup.py: -------------------------------------------------------------------------------- 1 | #!python 2 | """Bootstrap distribute installation 3 | 4 | If you want to use setuptools in your package's setup.py, just include this 5 | file in the same directory with it, and add this to the top of your setup.py:: 6 | 7 | from distribute_setup import use_setuptools 8 | use_setuptools() 9 | 10 | If you want to require a specific version of setuptools, set a download 11 | mirror, or use an alternate download directory, you can do so by supplying 12 | the appropriate options to ``use_setuptools()``. 13 | 14 | This file can also be run as a script to install or upgrade setuptools. 15 | """ 16 | import os 17 | import sys 18 | import time 19 | import fnmatch 20 | import tempfile 21 | import tarfile 22 | from distutils import log 23 | 24 | try: 25 | from site import USER_SITE 26 | except ImportError: 27 | USER_SITE = None 28 | 29 | try: 30 | import subprocess 31 | 32 | def _python_cmd(*args): 33 | args = (sys.executable,) + args 34 | return subprocess.call(args) == 0 35 | 36 | except ImportError: 37 | # will be used for python 2.3 38 | def _python_cmd(*args): 39 | args = (sys.executable,) + args 40 | # quoting arguments if windows 41 | if sys.platform == 'win32': 42 | def quote(arg): 43 | if ' ' in arg: 44 | return '"%s"' % arg 45 | return arg 46 | args = [quote(arg) for arg in args] 47 | return os.spawnl(os.P_WAIT, sys.executable, *args) == 0 48 | 49 | DEFAULT_VERSION = "0.6.14" 50 | DEFAULT_URL = "http://pypi.python.org/packages/source/d/distribute/" 51 | SETUPTOOLS_FAKED_VERSION = "0.6c11" 52 | 53 | SETUPTOOLS_PKG_INFO = """\ 54 | Metadata-Version: 1.0 55 | Name: setuptools 56 | Version: %s 57 | Summary: xxxx 58 | Home-page: xxx 59 | Author: xxx 60 | Author-email: xxx 61 | License: xxx 62 | Description: xxx 63 | """ % SETUPTOOLS_FAKED_VERSION 64 | 65 | 66 | def _install(tarball): 67 | # extracting the tarball 68 | tmpdir = tempfile.mkdtemp() 69 | log.warn('Extracting in %s', tmpdir) 70 | old_wd = os.getcwd() 71 | try: 72 | os.chdir(tmpdir) 73 | tar = tarfile.open(tarball) 74 | _extractall(tar) 75 | tar.close() 76 | 77 | # going in the directory 78 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) 79 | os.chdir(subdir) 80 | log.warn('Now working in %s', subdir) 81 | 82 | # installing 83 | log.warn('Installing Distribute') 84 | if not _python_cmd('setup.py', 'install'): 85 | log.warn('Something went wrong during the installation.') 86 | log.warn('See the error message above.') 87 | finally: 88 | os.chdir(old_wd) 89 | 90 | 91 | def _build_egg(egg, tarball, to_dir): 92 | # extracting the tarball 93 | tmpdir = tempfile.mkdtemp() 94 | log.warn('Extracting in %s', tmpdir) 95 | old_wd = os.getcwd() 96 | try: 97 | os.chdir(tmpdir) 98 | tar = tarfile.open(tarball) 99 | _extractall(tar) 100 | tar.close() 101 | 102 | # going in the directory 103 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) 104 | os.chdir(subdir) 105 | log.warn('Now working in %s', subdir) 106 | 107 | # building an egg 108 | log.warn('Building a Distribute egg in %s', to_dir) 109 | _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) 110 | 111 | finally: 112 | os.chdir(old_wd) 113 | # returning the result 114 | log.warn(egg) 115 | if not os.path.exists(egg): 116 | raise IOError('Could not build the egg.') 117 | 118 | 119 | def _do_download(version, download_base, to_dir, download_delay): 120 | egg = os.path.join(to_dir, 'distribute-%s-py%d.%d.egg' 121 | % (version, sys.version_info[0], sys.version_info[1])) 122 | if not os.path.exists(egg): 123 | tarball = download_setuptools(version, download_base, 124 | to_dir, download_delay) 125 | _build_egg(egg, tarball, to_dir) 126 | sys.path.insert(0, egg) 127 | import setuptools 128 | setuptools.bootstrap_install_from = egg 129 | 130 | 131 | def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, 132 | to_dir=os.curdir, download_delay=15, no_fake=True): 133 | # making sure we use the absolute path 134 | to_dir = os.path.abspath(to_dir) 135 | was_imported = 'pkg_resources' in sys.modules or \ 136 | 'setuptools' in sys.modules 137 | try: 138 | try: 139 | import pkg_resources 140 | if not hasattr(pkg_resources, '_distribute'): 141 | if not no_fake: 142 | _fake_setuptools() 143 | raise ImportError 144 | except ImportError: 145 | return _do_download(version, download_base, to_dir, download_delay) 146 | try: 147 | pkg_resources.require("distribute>="+version) 148 | return 149 | except pkg_resources.VersionConflict: 150 | e = sys.exc_info()[1] 151 | if was_imported: 152 | sys.stderr.write( 153 | "The required version of distribute (>=%s) is not available,\n" 154 | "and can't be installed while this script is running. Please\n" 155 | "install a more recent version first, using\n" 156 | "'easy_install -U distribute'." 157 | "\n\n(Currently using %r)\n" % (version, e.args[0])) 158 | sys.exit(2) 159 | else: 160 | del pkg_resources, sys.modules['pkg_resources'] # reload ok 161 | return _do_download(version, download_base, to_dir, 162 | download_delay) 163 | except pkg_resources.DistributionNotFound: 164 | return _do_download(version, download_base, to_dir, 165 | download_delay) 166 | finally: 167 | if not no_fake: 168 | _create_fake_setuptools_pkg_info(to_dir) 169 | 170 | def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, 171 | to_dir=os.curdir, delay=15): 172 | """Download distribute from a specified location and return its filename 173 | 174 | `version` should be a valid distribute version number that is available 175 | as an egg for download under the `download_base` URL (which should end 176 | with a '/'). `to_dir` is the directory where the egg will be downloaded. 177 | `delay` is the number of seconds to pause before an actual download 178 | attempt. 179 | """ 180 | # making sure we use the absolute path 181 | to_dir = os.path.abspath(to_dir) 182 | try: 183 | from urllib.request import urlopen 184 | except ImportError: 185 | from urllib2 import urlopen 186 | tgz_name = "distribute-%s.tar.gz" % version 187 | url = download_base + tgz_name 188 | saveto = os.path.join(to_dir, tgz_name) 189 | src = dst = None 190 | if not os.path.exists(saveto): # Avoid repeated downloads 191 | try: 192 | log.warn("Downloading %s", url) 193 | src = urlopen(url) 194 | # Read/write all in one block, so we don't create a corrupt file 195 | # if the download is interrupted. 196 | data = src.read() 197 | dst = open(saveto, "wb") 198 | dst.write(data) 199 | finally: 200 | if src: 201 | src.close() 202 | if dst: 203 | dst.close() 204 | return os.path.realpath(saveto) 205 | 206 | def _no_sandbox(function): 207 | def __no_sandbox(*args, **kw): 208 | try: 209 | from setuptools.sandbox import DirectorySandbox 210 | if not hasattr(DirectorySandbox, '_old'): 211 | def violation(*args): 212 | pass 213 | DirectorySandbox._old = DirectorySandbox._violation 214 | DirectorySandbox._violation = violation 215 | patched = True 216 | else: 217 | patched = False 218 | except ImportError: 219 | patched = False 220 | 221 | try: 222 | return function(*args, **kw) 223 | finally: 224 | if patched: 225 | DirectorySandbox._violation = DirectorySandbox._old 226 | del DirectorySandbox._old 227 | 228 | return __no_sandbox 229 | 230 | def _patch_file(path, content): 231 | """Will backup the file then patch it""" 232 | existing_content = open(path).read() 233 | if existing_content == content: 234 | # already patched 235 | log.warn('Already patched.') 236 | return False 237 | log.warn('Patching...') 238 | _rename_path(path) 239 | f = open(path, 'w') 240 | try: 241 | f.write(content) 242 | finally: 243 | f.close() 244 | return True 245 | 246 | _patch_file = _no_sandbox(_patch_file) 247 | 248 | def _same_content(path, content): 249 | return open(path).read() == content 250 | 251 | def _rename_path(path): 252 | new_name = path + '.OLD.%s' % time.time() 253 | log.warn('Renaming %s into %s', path, new_name) 254 | os.rename(path, new_name) 255 | return new_name 256 | 257 | def _remove_flat_installation(placeholder): 258 | if not os.path.isdir(placeholder): 259 | log.warn('Unkown installation at %s', placeholder) 260 | return False 261 | found = False 262 | for file in os.listdir(placeholder): 263 | if fnmatch.fnmatch(file, 'setuptools*.egg-info'): 264 | found = True 265 | break 266 | if not found: 267 | log.warn('Could not locate setuptools*.egg-info') 268 | return 269 | 270 | log.warn('Removing elements out of the way...') 271 | pkg_info = os.path.join(placeholder, file) 272 | if os.path.isdir(pkg_info): 273 | patched = _patch_egg_dir(pkg_info) 274 | else: 275 | patched = _patch_file(pkg_info, SETUPTOOLS_PKG_INFO) 276 | 277 | if not patched: 278 | log.warn('%s already patched.', pkg_info) 279 | return False 280 | # now let's move the files out of the way 281 | for element in ('setuptools', 'pkg_resources.py', 'site.py'): 282 | element = os.path.join(placeholder, element) 283 | if os.path.exists(element): 284 | _rename_path(element) 285 | else: 286 | log.warn('Could not find the %s element of the ' 287 | 'Setuptools distribution', element) 288 | return True 289 | 290 | _remove_flat_installation = _no_sandbox(_remove_flat_installation) 291 | 292 | def _after_install(dist): 293 | log.warn('After install bootstrap.') 294 | placeholder = dist.get_command_obj('install').install_purelib 295 | _create_fake_setuptools_pkg_info(placeholder) 296 | 297 | def _create_fake_setuptools_pkg_info(placeholder): 298 | if not placeholder or not os.path.exists(placeholder): 299 | log.warn('Could not find the install location') 300 | return 301 | pyver = '%s.%s' % (sys.version_info[0], sys.version_info[1]) 302 | setuptools_file = 'setuptools-%s-py%s.egg-info' % \ 303 | (SETUPTOOLS_FAKED_VERSION, pyver) 304 | pkg_info = os.path.join(placeholder, setuptools_file) 305 | if os.path.exists(pkg_info): 306 | log.warn('%s already exists', pkg_info) 307 | return 308 | 309 | log.warn('Creating %s', pkg_info) 310 | f = open(pkg_info, 'w') 311 | try: 312 | f.write(SETUPTOOLS_PKG_INFO) 313 | finally: 314 | f.close() 315 | 316 | pth_file = os.path.join(placeholder, 'setuptools.pth') 317 | log.warn('Creating %s', pth_file) 318 | f = open(pth_file, 'w') 319 | try: 320 | f.write(os.path.join(os.curdir, setuptools_file)) 321 | finally: 322 | f.close() 323 | 324 | _create_fake_setuptools_pkg_info = _no_sandbox(_create_fake_setuptools_pkg_info) 325 | 326 | def _patch_egg_dir(path): 327 | # let's check if it's already patched 328 | pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO') 329 | if os.path.exists(pkg_info): 330 | if _same_content(pkg_info, SETUPTOOLS_PKG_INFO): 331 | log.warn('%s already patched.', pkg_info) 332 | return False 333 | _rename_path(path) 334 | os.mkdir(path) 335 | os.mkdir(os.path.join(path, 'EGG-INFO')) 336 | pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO') 337 | f = open(pkg_info, 'w') 338 | try: 339 | f.write(SETUPTOOLS_PKG_INFO) 340 | finally: 341 | f.close() 342 | return True 343 | 344 | _patch_egg_dir = _no_sandbox(_patch_egg_dir) 345 | 346 | def _before_install(): 347 | log.warn('Before install bootstrap.') 348 | _fake_setuptools() 349 | 350 | 351 | def _under_prefix(location): 352 | if 'install' not in sys.argv: 353 | return True 354 | args = sys.argv[sys.argv.index('install')+1:] 355 | for index, arg in enumerate(args): 356 | for option in ('--root', '--prefix'): 357 | if arg.startswith('%s=' % option): 358 | top_dir = arg.split('root=')[-1] 359 | return location.startswith(top_dir) 360 | elif arg == option: 361 | if len(args) > index: 362 | top_dir = args[index+1] 363 | return location.startswith(top_dir) 364 | if arg == '--user' and USER_SITE is not None: 365 | return location.startswith(USER_SITE) 366 | return True 367 | 368 | 369 | def _fake_setuptools(): 370 | log.warn('Scanning installed packages') 371 | try: 372 | import pkg_resources 373 | except ImportError: 374 | # we're cool 375 | log.warn('Setuptools or Distribute does not seem to be installed.') 376 | return 377 | ws = pkg_resources.working_set 378 | try: 379 | setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools', 380 | replacement=False)) 381 | except TypeError: 382 | # old distribute API 383 | setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools')) 384 | 385 | if setuptools_dist is None: 386 | log.warn('No setuptools distribution found') 387 | return 388 | # detecting if it was already faked 389 | setuptools_location = setuptools_dist.location 390 | log.warn('Setuptools installation detected at %s', setuptools_location) 391 | 392 | # if --root or --preix was provided, and if 393 | # setuptools is not located in them, we don't patch it 394 | if not _under_prefix(setuptools_location): 395 | log.warn('Not patching, --root or --prefix is installing Distribute' 396 | ' in another location') 397 | return 398 | 399 | # let's see if its an egg 400 | if not setuptools_location.endswith('.egg'): 401 | log.warn('Non-egg installation') 402 | res = _remove_flat_installation(setuptools_location) 403 | if not res: 404 | return 405 | else: 406 | log.warn('Egg installation') 407 | pkg_info = os.path.join(setuptools_location, 'EGG-INFO', 'PKG-INFO') 408 | if (os.path.exists(pkg_info) and 409 | _same_content(pkg_info, SETUPTOOLS_PKG_INFO)): 410 | log.warn('Already patched.') 411 | return 412 | log.warn('Patching...') 413 | # let's create a fake egg replacing setuptools one 414 | res = _patch_egg_dir(setuptools_location) 415 | if not res: 416 | return 417 | log.warn('Patched done.') 418 | _relaunch() 419 | 420 | 421 | def _relaunch(): 422 | log.warn('Relaunching...') 423 | # we have to relaunch the process 424 | # pip marker to avoid a relaunch bug 425 | if sys.argv[:3] == ['-c', 'install', '--single-version-externally-managed']: 426 | sys.argv[0] = 'setup.py' 427 | args = [sys.executable] + sys.argv 428 | sys.exit(subprocess.call(args)) 429 | 430 | 431 | def _extractall(self, path=".", members=None): 432 | """Extract all members from the archive to the current working 433 | directory and set owner, modification time and permissions on 434 | directories afterwards. `path' specifies a different directory 435 | to extract to. `members' is optional and must be a subset of the 436 | list returned by getmembers(). 437 | """ 438 | import copy 439 | import operator 440 | from tarfile import ExtractError 441 | directories = [] 442 | 443 | if members is None: 444 | members = self 445 | 446 | for tarinfo in members: 447 | if tarinfo.isdir(): 448 | # Extract directories with a safe mode. 449 | directories.append(tarinfo) 450 | tarinfo = copy.copy(tarinfo) 451 | tarinfo.mode = 448 # decimal for oct 0700 452 | self.extract(tarinfo, path) 453 | 454 | # Reverse sort directories. 455 | if sys.version_info < (2, 4): 456 | def sorter(dir1, dir2): 457 | return cmp(dir1.name, dir2.name) 458 | directories.sort(sorter) 459 | directories.reverse() 460 | else: 461 | directories.sort(key=operator.attrgetter('name'), reverse=True) 462 | 463 | # Set correct owner, mtime and filemode on directories. 464 | for tarinfo in directories: 465 | dirpath = os.path.join(path, tarinfo.name) 466 | try: 467 | self.chown(tarinfo, dirpath) 468 | self.utime(tarinfo, dirpath) 469 | self.chmod(tarinfo, dirpath) 470 | except ExtractError: 471 | e = sys.exc_info()[1] 472 | if self.errorlevel > 1: 473 | raise 474 | else: 475 | self._dbg(1, "tarfile: %s" % e) 476 | 477 | 478 | def main(argv, version=DEFAULT_VERSION): 479 | """Install or upgrade setuptools and EasyInstall""" 480 | tarball = download_setuptools() 481 | _install(tarball) 482 | 483 | 484 | if __name__ == '__main__': 485 | main(sys.argv[1:]) 486 | --------------------------------------------------------------------------------