├── VERSION
├── MANIFEST.in
├── lib
└── relations
│ ├── __init__.py
│ ├── tuple.py
│ └── relation.py
├── .gitignore
├── setup.py
├── test
├── test_joins.py
├── test_set_operations.py
└── test_relation.py
├── UNLICENSE
├── README.md
└── distribute_setup.py
/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.2
2 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include distribute_setup.py
2 | include VERSION
3 |
--------------------------------------------------------------------------------
/lib/relations/__init__.py:
--------------------------------------------------------------------------------
1 | from relations.relation import *
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 | *.pyo
4 | .DS_Store
5 | build
6 | dist
7 | MANIFEST
8 | test/example/*.sqlite3
9 | doc/.build
10 | distribute-*.egg
11 | distribute-*.tar.gz
12 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from distribute_setup import use_setuptools
5 | use_setuptools()
6 |
7 | from setuptools import setup, find_packages
8 | import os.path as p
9 |
10 | VERSION = open(p.join(p.dirname(p.abspath(__file__)), 'VERSION')).read().strip()
11 |
12 | setup(
13 | name='relations',
14 | version=VERSION,
15 | description='A simple relational algebra engine in Python.',
16 | author='Zachary Voase',
17 | author_email='z@zacharyvoase.com',
18 | url='http://github.com/zacharyvoase/relations',
19 | packages=find_packages(where='lib'),
20 | package_dir={'': 'lib'},
21 | install_requires=[
22 | 'urecord>=0.0.4',
23 | ],
24 | )
25 |
--------------------------------------------------------------------------------
/lib/relations/tuple.py:
--------------------------------------------------------------------------------
1 | import urecord
2 |
3 |
4 | class Tuple(urecord.RecordInstance):
5 |
6 | """
7 | A named tuple type, with helpful methods for manipulating fields.
8 | """
9 |
10 | def __repr__(self):
11 | return 'Tuple(%s)' % (
12 | ', '.join("%s=%r" % (field, self[i])
13 | for i, field in enumerate(self._fields)))
14 |
15 | @classmethod
16 | def _make_projection(cls, *fields):
17 | return tuple(cls._fields.index(field) for field in fields)
18 |
19 | @classmethod
20 | def _make_reordering(cls, **new_fields):
21 | # At this point, new_fields is assumed to be a complete bjiection
22 | # from new <=> old fields.
23 | return tuple(cls._fields.index(old_field)
24 | for new_field, old_field in sorted(new_fields.items()))
25 |
26 | def _index_restrict(self, *indices):
27 | return tuple(self[index] for index in indices)
28 |
--------------------------------------------------------------------------------
/test/test_joins.py:
--------------------------------------------------------------------------------
1 | import relations
2 |
3 |
4 | employees = relations.Relation('name', 'emp_id', 'dept_name')
5 | employees.add(name='Harry', emp_id=3415, dept_name='Finance')
6 | employees.add(name='Sally', emp_id=2241, dept_name='Sales')
7 | employees.add(name='George', emp_id=3401, dept_name='Finance')
8 | employees.add(name='Harriet', emp_id=2202, dept_name='Sales')
9 |
10 | departments = relations.Relation('dept_name', 'manager')
11 | departments.add(dept_name='Finance', manager='George')
12 | departments.add(dept_name='Sales', manager='Harriet')
13 | departments.add(dept_name='Production', manager='Charles')
14 |
15 |
16 | def test_natural_join():
17 | joined = employees.natural_join(departments)
18 |
19 | assert len(joined) == 4
20 |
21 |
22 | def test_natural_join_on_disjoint_relations_is_cartesian_product():
23 | joined = employees.project('name', 'emp_id').natural_join(departments)
24 |
25 | assert len(joined) == (len(employees) * len(departments))
26 |
--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/test/test_set_operations.py:
--------------------------------------------------------------------------------
1 | from nose.tools import assert_raises
2 |
3 | import relations
4 |
5 |
6 | def test_equivalent_relations_are_union_compatible():
7 | rel1 = relations.Relation('name', 'age', 'gender')
8 | rel2 = relations.Relation('gender', 'age', 'name')
9 | assert rel1.is_union_compatible(rel2)
10 |
11 |
12 | def test_unequivalent_relations_are_union_compatible():
13 | rel1 = relations.Relation('name', 'age', 'gender')
14 | rel2 = relations.Relation('symbol', 'price')
15 | assert not rel1.is_union_compatible(rel2)
16 |
17 |
18 | def test_union_contains_elements_from_both_relations():
19 | rel1 = relations.Relation('name', 'age', 'gender')
20 | rel2 = relations.Relation('gender', 'age', 'name')
21 | rel1.add(name='Alice', age=25, gender='F')
22 | rel2.add(name='Bob', age=32, gender='M')
23 |
24 | union = rel1.union(rel2)
25 | assert union.contains(name='Alice', age=25, gender='F')
26 | assert union.contains(name='Bob', age=32, gender='M')
27 |
28 |
29 | def test_set_operations_raise_error_if_not_union_compatible():
30 | rel1 = relations.Relation('name', 'age', 'gender')
31 | rel2 = relations.Relation('symbol', 'price')
32 | rel1.add(name='Alice', age=25, gender='F')
33 | rel2.add(symbol='AAPL', price='424.47')
34 |
35 | assert_raises(relations.NotUnionCompatible,
36 | lambda: rel1.union(rel2))
37 | assert_raises(relations.NotUnionCompatible,
38 | lambda: rel1.intersection(rel2))
39 | assert_raises(relations.NotUnionCompatible,
40 | lambda: rel1.difference(rel2))
41 |
42 |
43 | def test_intersection_contains_only_elements_present_in_both_relations():
44 | rel1 = relations.Relation('name', 'age', 'gender')
45 | rel2 = relations.Relation('gender', 'age', 'name')
46 | rel1.add(name='Alice', age=25, gender='F')
47 | rel1.add(name='Bob', age=32, gender='M')
48 | rel1.add(name='Charlie', age=65, gender='M')
49 | rel2.add(name='Bob', age=32, gender='M')
50 | rel2.add(name='Charlie', age=65, gender='M')
51 |
52 | intersection = rel1.intersection(rel2)
53 | assert len(intersection) == 2
54 | assert not intersection.contains(name='Alice', age=25, gender='F')
55 |
56 |
57 | def test_difference_contains_elements_present_in_self_but_not_in_other():
58 | rel1 = relations.Relation('name', 'age', 'gender')
59 | rel2 = relations.Relation('gender', 'age', 'name')
60 | rel1.add(name='Alice', age=25, gender='F')
61 | rel1.add(name='Bob', age=32, gender='M')
62 | rel1.add(name='Charlie', age=65, gender='M')
63 | rel2.add(name='Bob', age=32, gender='M')
64 | rel2.add(name='Charlie', age=65, gender='M')
65 |
66 | diff = rel1.difference(rel2)
67 | assert len(diff) == 1
68 | assert diff.contains(name='Alice', age=25, gender='F')
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Relations
2 |
3 | Relations is a simple Python implementation of a relational algebra engine.
4 |
5 |
6 | ## Example
7 |
8 | Create a relation with a heading (i.e. a list of field names):
9 |
10 | >>> import relations
11 | >>> employees = relations.Relation('employee_name', 'dept_name')
12 | >>> employees
13 |
14 |
15 | The fields are re-ordered alphabetically, so that the order of values in tuples
16 | is consistent between equivalent but separate relations. Add tuples to the
17 | relation:
18 |
19 | >>> alice = employees.add(employee_name='Alice', dept_name='Finance')
20 | >>> bob = employees.add(employee_name='Bob', dept_name='Sales')
21 | >>> len(employees)
22 | 2
23 |
24 | A relation is a set; duplicate tuples are considered identical:
25 |
26 | >>> _ = employees.add(employee_name='Alice', dept_name='Finance')
27 | >>> len(employees)
28 | 2
29 |
30 | A relation implements the relational algebra, including the unary operators
31 | **Select**:
32 |
33 | >>> finance_emps = employees.select(lambda emp: emp.dept_name == 'Finance')
34 | >>> len(finance_emps)
35 | 1
36 |
37 | **Project**:
38 |
39 | >>> names = employees.project('employee_name')
40 | >>> names.contains(employee_name='Bob')
41 | True
42 | >>> names.contains(employee_name='Charlie')
43 | False
44 |
45 | and **Rename**:
46 |
47 | >>> employees_renamed = employees.rename(name='employee_name')
48 | >>> employees_renamed.contains(name='Bob')
49 | True
50 |
51 | It also supports the set operations **Union**, **Intersection** and
52 | **Difference**.
53 |
54 |
55 | ## Coming Soon
56 |
57 | Joins:
58 |
59 | * Natural join
60 | * Theta join
61 | * Equijoin
62 | * Semijoin
63 | * Antijoin
64 | * Divide
65 | * Left outer join
66 | * Right outer join
67 | * Full outer join
68 |
69 |
70 | ## (Un)license
71 |
72 | This is free and unencumbered software released into the public domain.
73 |
74 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
75 | software, either in source code form or as a compiled binary, for any purpose,
76 | commercial or non-commercial, and by any means.
77 |
78 | In jurisdictions that recognize copyright laws, the author or authors of this
79 | software dedicate any and all copyright interest in the software to the public
80 | domain. We make this dedication for the benefit of the public at large and to
81 | the detriment of our heirs and successors. We intend this dedication to be an
82 | overt act of relinquishment in perpetuity of all present and future rights to
83 | this software under copyright law.
84 |
85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
86 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
87 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
88 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
89 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
90 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
91 |
92 | For more information, please refer to
93 |
--------------------------------------------------------------------------------
/test/test_relation.py:
--------------------------------------------------------------------------------
1 | from nose.tools import assert_raises
2 |
3 | import relations
4 |
5 |
6 | def test_a_relation_has_a_heading():
7 | employees = relations.Relation('employee_name', 'dept_name')
8 | assert employees.heading == set(['employee_name', 'dept_name'])
9 |
10 |
11 | def test_a_relation_has_cardinality():
12 | employees = relations.Relation('employee_name', 'dept_name')
13 | assert len(employees) == 0
14 | employees.add(employee_name='Alice', dept_name='Finance')
15 | assert len(employees) == 1
16 |
17 |
18 | def test_a_relation_has_membership():
19 | employees = relations.Relation('employee_name', 'dept_name')
20 | employees.add(employee_name='Alice', dept_name='Finance')
21 | assert employees.contains(employee_name='Alice', dept_name='Finance')
22 | assert not employees.contains(employee_name='Bob', dept_name='Sales')
23 |
24 |
25 | def test_a_relation_has_a_tuple():
26 | employees = relations.Relation('employee_name', 'dept_name')
27 | emp = employees.tuple(employee_name='Alice', dept_name='Finance')
28 | assert emp.employee_name == 'Alice'
29 | assert emp.dept_name == 'Finance'
30 |
31 |
32 | def test_a_relation_is_a_set():
33 | employees = relations.Relation('employee_name', 'dept_name')
34 | employees.add(employee_name='Alice', dept_name='Finance')
35 | employees.add(employee_name='Alice', dept_name='Finance')
36 | assert len(employees) == 1
37 | employees.add(employee_name='Bob', dept_name='Sales')
38 | assert len(employees) == 2
39 |
40 |
41 | def test_select_creates_a_new_relation_with_the_same_heading():
42 | employees = relations.Relation('employee_name', 'dept_name')
43 | employees.add(employee_name='Alice', dept_name='Finance')
44 | employees.add(employee_name='Bob', dept_name='Sales')
45 |
46 | selected = employees.select(lambda emp: emp.dept_name == 'Finance')
47 | assert isinstance(selected, relations.Relation)
48 | assert selected.heading == employees.heading
49 |
50 |
51 | def test_project_creates_a_new_relation():
52 | employees = relations.Relation('employee_name', 'dept_name')
53 | employees.add(employee_name='Alice', dept_name='Finance')
54 | employees.add(employee_name='Bob', dept_name='Sales')
55 |
56 | names = employees.project('employee_name')
57 | assert isinstance(names, relations.Relation)
58 | assert names.heading == set(['employee_name'])
59 | assert names.contains(employee_name='Alice')
60 | assert names.contains(employee_name='Bob')
61 | assert len(names) == 2
62 |
63 |
64 | def test_project_raises_error_on_undefined_fields():
65 | employees = relations.Relation('employee_name', 'dept_name')
66 | employees.add(employee_name='Alice', dept_name='Finance')
67 | employees.add(employee_name='Bob', dept_name='Sales')
68 |
69 | assert_raises(relations.UndefinedFields,
70 | lambda: employees.project('foobar'))
71 |
72 |
73 | def test_projected_relations_are_sets():
74 | employees = relations.Relation('employee_name', 'dept_name')
75 | employees.add(employee_name='Alice', dept_name='Finance')
76 | employees.add(employee_name='Bob', dept_name='Finance')
77 |
78 | departments = employees.project('dept_name')
79 | assert departments.contains(dept_name='Finance')
80 | assert len(departments) == 1
81 |
82 |
83 | def test_rename_creates_a_new_relation():
84 | employees = relations.Relation('employee_name', 'dept_name')
85 | employees.add(employee_name='Alice', dept_name='Finance')
86 | employees.add(employee_name='Bob', dept_name='Sales')
87 |
88 | new_emps = employees.rename(name='employee_name', section='dept_name')
89 | assert new_emps.contains(name='Alice', section='Finance')
90 | assert new_emps.contains(name='Bob', section='Sales')
91 |
92 |
93 | def test_rename_raises_error_on_undefined_fields():
94 | employees = relations.Relation('employee_name', 'dept_name')
95 | employees.add(employee_name='Alice', dept_name='Finance')
96 | employees.add(employee_name='Bob', dept_name='Sales')
97 |
98 | assert_raises(relations.UndefinedFields,
99 | lambda: employees.rename(newfield='foobar'))
100 |
--------------------------------------------------------------------------------
/lib/relations/relation.py:
--------------------------------------------------------------------------------
1 | import functools
2 | from itertools import imap
3 |
4 | import urecord
5 |
6 | from relations.tuple import Tuple
7 |
8 |
9 | __all__ = ['Relation', 'RelationalError', 'UndefinedFields',
10 | 'NotUnionCompatible']
11 |
12 |
13 | class RelationalError(Exception):
14 | """An undefined or invalid operation was attempted."""
15 | pass
16 |
17 |
18 | class UndefinedFields(RelationalError):
19 | """An undefined field was used in an operation on one or more relations."""
20 | pass
21 |
22 |
23 | class NotUnionCompatible(RelationalError):
24 | """A set operation was attempted between non-union-compatible relations."""
25 | pass
26 |
27 |
28 | def check_union_compatible(method):
29 | @functools.wraps(method)
30 | def wrapper(self, other):
31 | if not self.is_union_compatible(other):
32 | raise NotUnionCompatible
33 | return method(self, other)
34 | return wrapper
35 |
36 |
37 | class Relation(object):
38 |
39 | def __init__(self, *fields, **kwargs):
40 | self.heading = frozenset(fields)
41 | self.tuple = urecord.Record(*sorted(fields), instance=Tuple)
42 | self.tuples = {}
43 |
44 | def __repr__(self):
45 | return '' % (self.tuple._fields,)
46 |
47 | def __len__(self):
48 | return len(self.tuples)
49 |
50 | def __contains__(self, tuple_):
51 | return tuple_ in self.tuples
52 |
53 | def __iter__(self):
54 | return iter(self.tuples)
55 |
56 | def clone(self):
57 | """Create a new, empty relation with the same heading as this one."""
58 |
59 | return type(self)(*self.tuple._fields)
60 |
61 | def is_union_compatible(self, other):
62 | return self.heading == other.heading
63 |
64 | @check_union_compatible
65 | def update(self, other):
66 |
67 | """
68 | Merge this relation with another union-compatible relation.
69 |
70 | This method modifies (and returns) this relation. The other relation
71 | is not modified.
72 | """
73 |
74 | self.tuples.update(other.tuples)
75 | return self
76 |
77 | @check_union_compatible
78 | def union(self, other):
79 | """Safe set union between two union-compatible relations."""
80 |
81 | return self.clone().update(self).update(other)
82 |
83 | @check_union_compatible
84 | def intersection(self, other):
85 | """Safe set intersection between two union-compatible relations."""
86 |
87 | new_relation = self.clone()
88 | new_relation.tuples.update(
89 | (tuple_, tuple_) for tuple_ in
90 | set(self.tuples).intersection(set(other.tuples)))
91 | return new_relation
92 |
93 | @check_union_compatible
94 | def difference(self, other):
95 | """Safe set difference between two union-compatible relations."""
96 |
97 | new_relation = self.clone()
98 | new_relation.tuples.update(
99 | (tuple_, tuple_) for tuple_ in
100 | set(self.tuples).difference(set(other.tuples)))
101 | return new_relation
102 |
103 | def add(self, **kwargs):
104 |
105 | """
106 | Add a tuple to this relation.
107 |
108 | This method attempts to be as efficient as possible, re-using the same
109 | Python object if the tuple already exists in this relation.
110 |
111 | Arguments should be given in keyword form:
112 |
113 | >>> employees = Relation('name', 'department')
114 | >>> alice = employees.add(name='Alice', department='Finance')
115 | >>> alice.name
116 | 'Alice'
117 | >>> alice.department
118 | 'Finance'
119 | """
120 |
121 | tuple_ = self.tuple(**kwargs)
122 | return self.tuples.setdefault(tuple_, tuple_)
123 |
124 | def contains(self, **kwargs):
125 |
126 | """
127 | Determine if this relation contains the specified tuple.
128 |
129 | Arguments are given in the same form as for :meth:`add`. This is easier
130 | than having to construct a tuple and use Python's `in` operator, e.g.:
131 |
132 | >>> employees.contains(name='Alice', department='Sales')
133 | True
134 |
135 | Whereas without this method you'd do:
136 |
137 | >>> employees.tuple(name='Alice', department='Sales') in employees
138 | True
139 | """
140 |
141 | return self.tuple(**kwargs) in self
142 |
143 | def select(self, predicate):
144 |
145 | """
146 | Filter the tuples in this relation based on a predicate.
147 |
148 | Returns a new, union-compatible relation.
149 | """
150 |
151 | new_relation = self.clone()
152 | new_relation.tuples.update(
153 | (tuple_, tuple_) for tuple_ in filter(predicate, self.tuples))
154 | return new_relation
155 |
156 | def project(self, *fields):
157 |
158 | """
159 | Return a new relation with a heading restricted to the given fields.
160 |
161 | The new relation is not union-compatible, and will also be a set, so
162 | it may have a smaller cardinality than the original relation. Here's
163 | an example:
164 |
165 | >>> employees = Relation('name', 'department')
166 | >>> employees.add(name='Alice', department='Sales')
167 | >>> employees.add(name='Bob', department='Sales')
168 | >>> len(employees)
169 | 2
170 | >>> departments = employees.project('department')
171 | >>> len(departments)
172 | 1
173 | >>> departments.contains(department='Sales')
174 | True
175 | """
176 |
177 | new_relation = type(self)(*fields)
178 | if not new_relation.heading.issubset(self.heading):
179 | undefined_fields = tuple(new_relation.heading.difference(self.heading))
180 | raise UndefinedFields("Undefined fields used in project(): %r" %
181 | undefined_fields)
182 |
183 | projection = self.tuple._make_projection(*fields)
184 |
185 | new_relation.tuples.update((tuple_, tuple_)
186 | for tuple_ in imap(
187 | lambda t: new_relation.tuple(*t._index_restrict(*projection)),
188 | self.tuples))
189 | return new_relation
190 |
191 | def rename(self, **new_fields):
192 |
193 | """
194 | Rename some fields in this relation.
195 |
196 | Accepts keyword arguments in the form
197 | ``new_field_name='old_field_name'``. The new relation returned will
198 | only be union-compatible if no arguments are given to this function.
199 | """
200 |
201 | if not is_bijection(new_fields):
202 | raise RelationalError("Field mapping is not one-to-one")
203 | elif not set(new_fields.values()).issubset(self.heading):
204 | undefined_fields = tuple(set(new_fields.values()).difference(self.heading))
205 | raise UndefinedFields("Undefined fields used in rename(): %r" %
206 | undefined_fields)
207 |
208 | # Get a complete bijection from new field names => old field names
209 | renamed_fields = set(new_fields.values())
210 | for field_name in self.heading:
211 | if field_name not in renamed_fields:
212 | new_fields[field_name] = field_name
213 |
214 | new_relation = type(self)(*new_fields.keys())
215 | reordering = self.tuple._make_reordering(**new_fields)
216 | new_relation.tuples.update(
217 | (tuple_, tuple_) for tuple_ in imap(
218 | lambda t: new_relation.tuple(*t._index_restrict(*reordering)),
219 | self.tuples))
220 | return new_relation
221 |
222 | def natural_join(self, other):
223 | new_relation = type(self)(*self.heading.union(other.heading))
224 | common_fields = self.heading.intersection(other.heading)
225 | projection1 = self.tuple._make_projection(*common_fields)
226 | projection2 = other.tuple._make_projection(*common_fields)
227 | for tuple1 in self:
228 | for tuple2 in other:
229 | if (tuple1._index_restrict(*projection1) ==
230 | tuple2._index_restrict(*projection2)):
231 | row = tuple1._asdict()
232 | row.update(tuple2._asdict())
233 | new_relation.add(**row)
234 | return new_relation
235 |
236 |
237 | def is_bijection(dictionary):
238 | """Check if a dictionary is a proper one-to-one mapping."""
239 |
240 | return len(set(dictionary.keys())) == len(set(dictionary.values()))
241 |
242 |
243 | def invert_bijection(dictionary):
244 | """Return the inverse of a bijection. Does not check the input."""
245 |
246 | return dict((value, key) for (key, value) in dictionary.iteritems())
247 |
--------------------------------------------------------------------------------
/distribute_setup.py:
--------------------------------------------------------------------------------
1 | #!python
2 | """Bootstrap distribute installation
3 |
4 | If you want to use setuptools in your package's setup.py, just include this
5 | file in the same directory with it, and add this to the top of your setup.py::
6 |
7 | from distribute_setup import use_setuptools
8 | use_setuptools()
9 |
10 | If you want to require a specific version of setuptools, set a download
11 | mirror, or use an alternate download directory, you can do so by supplying
12 | the appropriate options to ``use_setuptools()``.
13 |
14 | This file can also be run as a script to install or upgrade setuptools.
15 | """
16 | import os
17 | import sys
18 | import time
19 | import fnmatch
20 | import tempfile
21 | import tarfile
22 | from distutils import log
23 |
24 | try:
25 | from site import USER_SITE
26 | except ImportError:
27 | USER_SITE = None
28 |
29 | try:
30 | import subprocess
31 |
32 | def _python_cmd(*args):
33 | args = (sys.executable,) + args
34 | return subprocess.call(args) == 0
35 |
36 | except ImportError:
37 | # will be used for python 2.3
38 | def _python_cmd(*args):
39 | args = (sys.executable,) + args
40 | # quoting arguments if windows
41 | if sys.platform == 'win32':
42 | def quote(arg):
43 | if ' ' in arg:
44 | return '"%s"' % arg
45 | return arg
46 | args = [quote(arg) for arg in args]
47 | return os.spawnl(os.P_WAIT, sys.executable, *args) == 0
48 |
49 | DEFAULT_VERSION = "0.6.14"
50 | DEFAULT_URL = "http://pypi.python.org/packages/source/d/distribute/"
51 | SETUPTOOLS_FAKED_VERSION = "0.6c11"
52 |
53 | SETUPTOOLS_PKG_INFO = """\
54 | Metadata-Version: 1.0
55 | Name: setuptools
56 | Version: %s
57 | Summary: xxxx
58 | Home-page: xxx
59 | Author: xxx
60 | Author-email: xxx
61 | License: xxx
62 | Description: xxx
63 | """ % SETUPTOOLS_FAKED_VERSION
64 |
65 |
66 | def _install(tarball):
67 | # extracting the tarball
68 | tmpdir = tempfile.mkdtemp()
69 | log.warn('Extracting in %s', tmpdir)
70 | old_wd = os.getcwd()
71 | try:
72 | os.chdir(tmpdir)
73 | tar = tarfile.open(tarball)
74 | _extractall(tar)
75 | tar.close()
76 |
77 | # going in the directory
78 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
79 | os.chdir(subdir)
80 | log.warn('Now working in %s', subdir)
81 |
82 | # installing
83 | log.warn('Installing Distribute')
84 | if not _python_cmd('setup.py', 'install'):
85 | log.warn('Something went wrong during the installation.')
86 | log.warn('See the error message above.')
87 | finally:
88 | os.chdir(old_wd)
89 |
90 |
91 | def _build_egg(egg, tarball, to_dir):
92 | # extracting the tarball
93 | tmpdir = tempfile.mkdtemp()
94 | log.warn('Extracting in %s', tmpdir)
95 | old_wd = os.getcwd()
96 | try:
97 | os.chdir(tmpdir)
98 | tar = tarfile.open(tarball)
99 | _extractall(tar)
100 | tar.close()
101 |
102 | # going in the directory
103 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
104 | os.chdir(subdir)
105 | log.warn('Now working in %s', subdir)
106 |
107 | # building an egg
108 | log.warn('Building a Distribute egg in %s', to_dir)
109 | _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir)
110 |
111 | finally:
112 | os.chdir(old_wd)
113 | # returning the result
114 | log.warn(egg)
115 | if not os.path.exists(egg):
116 | raise IOError('Could not build the egg.')
117 |
118 |
119 | def _do_download(version, download_base, to_dir, download_delay):
120 | egg = os.path.join(to_dir, 'distribute-%s-py%d.%d.egg'
121 | % (version, sys.version_info[0], sys.version_info[1]))
122 | if not os.path.exists(egg):
123 | tarball = download_setuptools(version, download_base,
124 | to_dir, download_delay)
125 | _build_egg(egg, tarball, to_dir)
126 | sys.path.insert(0, egg)
127 | import setuptools
128 | setuptools.bootstrap_install_from = egg
129 |
130 |
131 | def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
132 | to_dir=os.curdir, download_delay=15, no_fake=True):
133 | # making sure we use the absolute path
134 | to_dir = os.path.abspath(to_dir)
135 | was_imported = 'pkg_resources' in sys.modules or \
136 | 'setuptools' in sys.modules
137 | try:
138 | try:
139 | import pkg_resources
140 | if not hasattr(pkg_resources, '_distribute'):
141 | if not no_fake:
142 | _fake_setuptools()
143 | raise ImportError
144 | except ImportError:
145 | return _do_download(version, download_base, to_dir, download_delay)
146 | try:
147 | pkg_resources.require("distribute>="+version)
148 | return
149 | except pkg_resources.VersionConflict:
150 | e = sys.exc_info()[1]
151 | if was_imported:
152 | sys.stderr.write(
153 | "The required version of distribute (>=%s) is not available,\n"
154 | "and can't be installed while this script is running. Please\n"
155 | "install a more recent version first, using\n"
156 | "'easy_install -U distribute'."
157 | "\n\n(Currently using %r)\n" % (version, e.args[0]))
158 | sys.exit(2)
159 | else:
160 | del pkg_resources, sys.modules['pkg_resources'] # reload ok
161 | return _do_download(version, download_base, to_dir,
162 | download_delay)
163 | except pkg_resources.DistributionNotFound:
164 | return _do_download(version, download_base, to_dir,
165 | download_delay)
166 | finally:
167 | if not no_fake:
168 | _create_fake_setuptools_pkg_info(to_dir)
169 |
170 | def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
171 | to_dir=os.curdir, delay=15):
172 | """Download distribute from a specified location and return its filename
173 |
174 | `version` should be a valid distribute version number that is available
175 | as an egg for download under the `download_base` URL (which should end
176 | with a '/'). `to_dir` is the directory where the egg will be downloaded.
177 | `delay` is the number of seconds to pause before an actual download
178 | attempt.
179 | """
180 | # making sure we use the absolute path
181 | to_dir = os.path.abspath(to_dir)
182 | try:
183 | from urllib.request import urlopen
184 | except ImportError:
185 | from urllib2 import urlopen
186 | tgz_name = "distribute-%s.tar.gz" % version
187 | url = download_base + tgz_name
188 | saveto = os.path.join(to_dir, tgz_name)
189 | src = dst = None
190 | if not os.path.exists(saveto): # Avoid repeated downloads
191 | try:
192 | log.warn("Downloading %s", url)
193 | src = urlopen(url)
194 | # Read/write all in one block, so we don't create a corrupt file
195 | # if the download is interrupted.
196 | data = src.read()
197 | dst = open(saveto, "wb")
198 | dst.write(data)
199 | finally:
200 | if src:
201 | src.close()
202 | if dst:
203 | dst.close()
204 | return os.path.realpath(saveto)
205 |
206 | def _no_sandbox(function):
207 | def __no_sandbox(*args, **kw):
208 | try:
209 | from setuptools.sandbox import DirectorySandbox
210 | if not hasattr(DirectorySandbox, '_old'):
211 | def violation(*args):
212 | pass
213 | DirectorySandbox._old = DirectorySandbox._violation
214 | DirectorySandbox._violation = violation
215 | patched = True
216 | else:
217 | patched = False
218 | except ImportError:
219 | patched = False
220 |
221 | try:
222 | return function(*args, **kw)
223 | finally:
224 | if patched:
225 | DirectorySandbox._violation = DirectorySandbox._old
226 | del DirectorySandbox._old
227 |
228 | return __no_sandbox
229 |
230 | def _patch_file(path, content):
231 | """Will backup the file then patch it"""
232 | existing_content = open(path).read()
233 | if existing_content == content:
234 | # already patched
235 | log.warn('Already patched.')
236 | return False
237 | log.warn('Patching...')
238 | _rename_path(path)
239 | f = open(path, 'w')
240 | try:
241 | f.write(content)
242 | finally:
243 | f.close()
244 | return True
245 |
246 | _patch_file = _no_sandbox(_patch_file)
247 |
248 | def _same_content(path, content):
249 | return open(path).read() == content
250 |
251 | def _rename_path(path):
252 | new_name = path + '.OLD.%s' % time.time()
253 | log.warn('Renaming %s into %s', path, new_name)
254 | os.rename(path, new_name)
255 | return new_name
256 |
257 | def _remove_flat_installation(placeholder):
258 | if not os.path.isdir(placeholder):
259 | log.warn('Unkown installation at %s', placeholder)
260 | return False
261 | found = False
262 | for file in os.listdir(placeholder):
263 | if fnmatch.fnmatch(file, 'setuptools*.egg-info'):
264 | found = True
265 | break
266 | if not found:
267 | log.warn('Could not locate setuptools*.egg-info')
268 | return
269 |
270 | log.warn('Removing elements out of the way...')
271 | pkg_info = os.path.join(placeholder, file)
272 | if os.path.isdir(pkg_info):
273 | patched = _patch_egg_dir(pkg_info)
274 | else:
275 | patched = _patch_file(pkg_info, SETUPTOOLS_PKG_INFO)
276 |
277 | if not patched:
278 | log.warn('%s already patched.', pkg_info)
279 | return False
280 | # now let's move the files out of the way
281 | for element in ('setuptools', 'pkg_resources.py', 'site.py'):
282 | element = os.path.join(placeholder, element)
283 | if os.path.exists(element):
284 | _rename_path(element)
285 | else:
286 | log.warn('Could not find the %s element of the '
287 | 'Setuptools distribution', element)
288 | return True
289 |
290 | _remove_flat_installation = _no_sandbox(_remove_flat_installation)
291 |
292 | def _after_install(dist):
293 | log.warn('After install bootstrap.')
294 | placeholder = dist.get_command_obj('install').install_purelib
295 | _create_fake_setuptools_pkg_info(placeholder)
296 |
297 | def _create_fake_setuptools_pkg_info(placeholder):
298 | if not placeholder or not os.path.exists(placeholder):
299 | log.warn('Could not find the install location')
300 | return
301 | pyver = '%s.%s' % (sys.version_info[0], sys.version_info[1])
302 | setuptools_file = 'setuptools-%s-py%s.egg-info' % \
303 | (SETUPTOOLS_FAKED_VERSION, pyver)
304 | pkg_info = os.path.join(placeholder, setuptools_file)
305 | if os.path.exists(pkg_info):
306 | log.warn('%s already exists', pkg_info)
307 | return
308 |
309 | log.warn('Creating %s', pkg_info)
310 | f = open(pkg_info, 'w')
311 | try:
312 | f.write(SETUPTOOLS_PKG_INFO)
313 | finally:
314 | f.close()
315 |
316 | pth_file = os.path.join(placeholder, 'setuptools.pth')
317 | log.warn('Creating %s', pth_file)
318 | f = open(pth_file, 'w')
319 | try:
320 | f.write(os.path.join(os.curdir, setuptools_file))
321 | finally:
322 | f.close()
323 |
324 | _create_fake_setuptools_pkg_info = _no_sandbox(_create_fake_setuptools_pkg_info)
325 |
326 | def _patch_egg_dir(path):
327 | # let's check if it's already patched
328 | pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
329 | if os.path.exists(pkg_info):
330 | if _same_content(pkg_info, SETUPTOOLS_PKG_INFO):
331 | log.warn('%s already patched.', pkg_info)
332 | return False
333 | _rename_path(path)
334 | os.mkdir(path)
335 | os.mkdir(os.path.join(path, 'EGG-INFO'))
336 | pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
337 | f = open(pkg_info, 'w')
338 | try:
339 | f.write(SETUPTOOLS_PKG_INFO)
340 | finally:
341 | f.close()
342 | return True
343 |
344 | _patch_egg_dir = _no_sandbox(_patch_egg_dir)
345 |
346 | def _before_install():
347 | log.warn('Before install bootstrap.')
348 | _fake_setuptools()
349 |
350 |
351 | def _under_prefix(location):
352 | if 'install' not in sys.argv:
353 | return True
354 | args = sys.argv[sys.argv.index('install')+1:]
355 | for index, arg in enumerate(args):
356 | for option in ('--root', '--prefix'):
357 | if arg.startswith('%s=' % option):
358 | top_dir = arg.split('root=')[-1]
359 | return location.startswith(top_dir)
360 | elif arg == option:
361 | if len(args) > index:
362 | top_dir = args[index+1]
363 | return location.startswith(top_dir)
364 | if arg == '--user' and USER_SITE is not None:
365 | return location.startswith(USER_SITE)
366 | return True
367 |
368 |
369 | def _fake_setuptools():
370 | log.warn('Scanning installed packages')
371 | try:
372 | import pkg_resources
373 | except ImportError:
374 | # we're cool
375 | log.warn('Setuptools or Distribute does not seem to be installed.')
376 | return
377 | ws = pkg_resources.working_set
378 | try:
379 | setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools',
380 | replacement=False))
381 | except TypeError:
382 | # old distribute API
383 | setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools'))
384 |
385 | if setuptools_dist is None:
386 | log.warn('No setuptools distribution found')
387 | return
388 | # detecting if it was already faked
389 | setuptools_location = setuptools_dist.location
390 | log.warn('Setuptools installation detected at %s', setuptools_location)
391 |
392 | # if --root or --preix was provided, and if
393 | # setuptools is not located in them, we don't patch it
394 | if not _under_prefix(setuptools_location):
395 | log.warn('Not patching, --root or --prefix is installing Distribute'
396 | ' in another location')
397 | return
398 |
399 | # let's see if its an egg
400 | if not setuptools_location.endswith('.egg'):
401 | log.warn('Non-egg installation')
402 | res = _remove_flat_installation(setuptools_location)
403 | if not res:
404 | return
405 | else:
406 | log.warn('Egg installation')
407 | pkg_info = os.path.join(setuptools_location, 'EGG-INFO', 'PKG-INFO')
408 | if (os.path.exists(pkg_info) and
409 | _same_content(pkg_info, SETUPTOOLS_PKG_INFO)):
410 | log.warn('Already patched.')
411 | return
412 | log.warn('Patching...')
413 | # let's create a fake egg replacing setuptools one
414 | res = _patch_egg_dir(setuptools_location)
415 | if not res:
416 | return
417 | log.warn('Patched done.')
418 | _relaunch()
419 |
420 |
421 | def _relaunch():
422 | log.warn('Relaunching...')
423 | # we have to relaunch the process
424 | # pip marker to avoid a relaunch bug
425 | if sys.argv[:3] == ['-c', 'install', '--single-version-externally-managed']:
426 | sys.argv[0] = 'setup.py'
427 | args = [sys.executable] + sys.argv
428 | sys.exit(subprocess.call(args))
429 |
430 |
431 | def _extractall(self, path=".", members=None):
432 | """Extract all members from the archive to the current working
433 | directory and set owner, modification time and permissions on
434 | directories afterwards. `path' specifies a different directory
435 | to extract to. `members' is optional and must be a subset of the
436 | list returned by getmembers().
437 | """
438 | import copy
439 | import operator
440 | from tarfile import ExtractError
441 | directories = []
442 |
443 | if members is None:
444 | members = self
445 |
446 | for tarinfo in members:
447 | if tarinfo.isdir():
448 | # Extract directories with a safe mode.
449 | directories.append(tarinfo)
450 | tarinfo = copy.copy(tarinfo)
451 | tarinfo.mode = 448 # decimal for oct 0700
452 | self.extract(tarinfo, path)
453 |
454 | # Reverse sort directories.
455 | if sys.version_info < (2, 4):
456 | def sorter(dir1, dir2):
457 | return cmp(dir1.name, dir2.name)
458 | directories.sort(sorter)
459 | directories.reverse()
460 | else:
461 | directories.sort(key=operator.attrgetter('name'), reverse=True)
462 |
463 | # Set correct owner, mtime and filemode on directories.
464 | for tarinfo in directories:
465 | dirpath = os.path.join(path, tarinfo.name)
466 | try:
467 | self.chown(tarinfo, dirpath)
468 | self.utime(tarinfo, dirpath)
469 | self.chmod(tarinfo, dirpath)
470 | except ExtractError:
471 | e = sys.exc_info()[1]
472 | if self.errorlevel > 1:
473 | raise
474 | else:
475 | self._dbg(1, "tarfile: %s" % e)
476 |
477 |
478 | def main(argv, version=DEFAULT_VERSION):
479 | """Install or upgrade setuptools and EasyInstall"""
480 | tarball = download_setuptools()
481 | _install(tarball)
482 |
483 |
484 | if __name__ == '__main__':
485 | main(sys.argv[1:])
486 |
--------------------------------------------------------------------------------