├── .editorconfig
├── .gitignore
├── batbelt
├── __init__.py
├── utils.py
├── parallel.py
├── objects.py
├── hack.py
├── strings.py
└── structs.py
├── setup.py
└── README.rst
/.editorconfig:
--------------------------------------------------------------------------------
1 | # EditorConfig is awesome: http://EditorConfig.org
2 |
3 | # top-most EditorConfig file
4 | root = true
5 |
6 | [*]
7 | indent_style = space
8 | end_of_line = lf
9 | charset = utf-8
10 | trim_trailing_whitespace = true
11 | insert_final_newline = true
12 |
13 | [*.py]
14 | indent_size = 4
15 |
16 | [*.{html,js,rb,scss,xml,less,css}]
17 | indent_size = 2
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # temporary, hidden and backup files
2 |
3 | *~
4 | *.bak
5 | *.log
6 | *.tmp
7 | *_index
8 | *.orig
9 | *.swp
10 | .* # all hidden files...
11 | !.gitignore # ...except gitignore
12 |
13 | # binaries
14 |
15 | *.pyc
16 | *.pyo
17 |
18 | # files generated by setuptools
19 |
20 | *.egg-info
21 | dist
22 | build
23 | *.tar.gz
24 | *.egg
25 | *.in
26 |
27 | # others
28 |
29 | content
30 | *.sublime-project
31 | *.sublime-workspace
32 |
--------------------------------------------------------------------------------
/batbelt/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 |
6 | __version__ = "0.5.2"
7 |
8 | from strings import (slugify, normalize, escape_html,
9 | unescape_html, json_dumps, json_loads)
10 | from structs import (chunks, get, dmerge, sset, dswap, window,
11 | subdict, iget, flatten, skip_duplicates)
12 | from objects import attr, import_from_path, Null
13 | from utils import to_timestamp
14 | from hack import decorator_with_args
15 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 |
2 | from setuptools import setup, find_packages
3 |
4 | open('MANIFEST.in', 'w').write('\n'.join((
5 | "include *.rst",
6 | )))
7 |
8 | from batbelt import __version__
9 |
10 | setup(
11 |
12 | name="batbelt",
13 | version=__version__,
14 | packages=find_packages('.'),
15 | author="Sam et Max",
16 | author_email="lesametlemax@gmail.com",
17 | description="A collection of gagdets that makes Python even more powerful.",
18 | long_description=open('README.rst').read(),
19 | include_package_data=True,
20 | classifiers=[
21 | 'Programming Language :: Python',
22 | "Intended Audience :: Information Technology",
23 | "License :: OSI Approved :: zlib/libpng License",
24 | "Natural Language :: English",
25 | "Programming Language :: Python :: 2.7"
26 | ],
27 | url="https://github.com/sametmax/Bat-belt"
28 | )
29 |
30 |
--------------------------------------------------------------------------------
/batbelt/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 | """
6 | The infamous utils.py module filled with functions you don't where else
7 | to put.
8 | """
9 |
10 |
11 | import sys
12 | import os
13 |
14 | from datetime import datetime
15 |
16 |
17 | CLASSIC_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
18 | CLASSIC_DATETIME_PATTERN = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}'
19 |
20 |
21 |
22 |
23 | def to_timestamp(dt):
24 | """
25 | Return a timestamp for the given datetime object.
26 |
27 | Example:
28 |
29 | >>> import datetime
30 | >>> to_timestamp(datetime.datetime(2000, 1, 1, 1, 1, 1, 1))
31 | 946688461
32 | """
33 | return (dt - datetime(1970, 1, 1)).total_seconds()
34 |
35 |
36 | class ImportableItems(list):
37 |
38 | def __init__(self, *args, **kwargs):
39 | super(ImportableItems, self).__init__(*args, **kwargs)
40 | self.non_importable_items = {}
41 |
42 | def append(self, item_name):
43 | self.non_importable_items.pop(item_name, None)
44 | super(ImportableItems, self).append(item_name)
45 |
46 |
47 | def import_list(*args):
48 | """
49 | Allow to create easily a __all__ listing for a module.
50 |
51 | Returns a value for __all__ and a decorator to add anything
52 | to it easily.
53 | """
54 |
55 | importable_items = ImportableItems()
56 | importable_items.non_importable_items.update(sys._getframe(1).f_globals)
57 | for item in args:
58 | importable_items.append(item)
59 |
60 |
61 | def importable(func, name=None):
62 |
63 | if name is None:
64 | try:
65 | name = func.__name__
66 | except AttributeError:
67 | raise ValueError('You must provide a name for '
68 | 'this item: %s' % repr(func))
69 | importable_items.append(name)
70 |
71 | return func
72 |
73 | return importable_items, importable
74 |
75 |
76 |
77 | def add_to_pythonpath(path, starting_point='.', insertion_index=None):
78 | """
79 | Add the directory to the sys.path.
80 |
81 | You can path an absolute or a relative path to it.
82 |
83 | If you choose to use a relative path, it will be relative to
84 | `starting_point` by default, which is set to '.'.
85 |
86 | You may want to set it to something like __file__ (the basename will
87 | be stripped, and the current file's parent directory will be used
88 | as a starting point, which is probably what you expect in the
89 | first place).
90 |
91 | :example:
92 |
93 | >>> add_to_pythonpath('../..', __file__)
94 | """
95 |
96 | if not os.path.isabs(path):
97 |
98 | if os.path.isfile(starting_point):
99 | starting_point = os.path.dirname(starting_point)
100 |
101 | path = os.path.join(starting_point, path)
102 |
103 | path = os.path.realpath(os.path.expandvars(os.path.expanduser(path)))
104 |
105 | if path not in sys.path:
106 | if insertion_index is None:
107 | sys.path.append(path)
108 | else:
109 | sys.path.insert(insertion_index, path)
110 |
111 |
--------------------------------------------------------------------------------
/batbelt/parallel.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 |
6 |
7 |
8 |
9 | import threading
10 | import multiprocessing
11 | from functools import wraps
12 | from Queue import Queue, Empty
13 |
14 | __all__ = ['process', 'thread']
15 |
16 |
17 | def process(*proxy_args, **proxy_kwargs):
18 |
19 | fire = proxy_kwargs.pop('fire', False) or proxy_args or proxy_kwargs
20 |
21 | def decorator(func):
22 |
23 | if fire:
24 |
25 | @wraps(func)
26 | def fun(*args, **kwargs):
27 | func(*args, **kwargs)
28 |
29 | fun.process = multiprocessing.Process(target=fun, args=proxy_args,
30 | kwargs=proxy_kwargs)
31 | fun.process.start()
32 |
33 | return fun
34 |
35 | else:
36 |
37 | @wraps(func)
38 | def wrapper(*args, **kwargs):
39 | process = multiprocessing.Process(target=func, args=args,
40 | kwargs=kwargs)
41 | process.start()
42 | return process
43 |
44 | return wrapper
45 |
46 | return decorator
47 |
48 |
49 | def thread(*proxy_args, **proxy_kwargs):
50 |
51 | fire = proxy_kwargs.pop('fire', False) or proxy_args or proxy_kwargs
52 |
53 | def decorator(func):
54 |
55 | if fire:
56 |
57 | @wraps(func)
58 | def fun(*args, **kwargs):
59 | func(*args, **kwargs)
60 |
61 | fun.thread = threading.Thread(target=fun, args=proxy_args,
62 | kwargs=proxy_kwargs)
63 | fun.thread.start()
64 |
65 | return fun
66 |
67 | else:
68 |
69 | @wraps(func)
70 | def wrapper(*args, **kwargs):
71 | thread = threading.Thread(target=func, args=args,
72 | kwargs=kwargs)
73 | thread.start()
74 | return thread
75 |
76 | return wrapper
77 |
78 | return decorator
79 |
80 |
81 | class StopWorker():
82 | pass
83 |
84 |
85 | def worker(block=True, timeout=0.1, method='process'):
86 | """
87 | Turn a function into a worker:
88 |
89 | from parallel import worker
90 |
91 | @worker()
92 | def test(mot):
93 | print "in %s" % mot
94 | return mot
95 |
96 | process = test.start()
97 |
98 | for x in range(10):
99 | process.put(x)
100 |
101 | for x in range(10):
102 | print "out %s" % process.get()
103 |
104 | process.stop()
105 |
106 | Which outputs:
107 |
108 | in 0
109 | out 0
110 | in 1
111 | in 2
112 | out 1
113 | in 3
114 | out 2
115 | in 4
116 | out 3
117 | out 4
118 | in 5
119 | out 5
120 | in 6
121 | out 6
122 | in 7
123 | out 7
124 | in 8
125 | out 8
126 | in 9
127 | out 9
128 |
129 | If an exception occures, it will be send back in the message queue.
130 |
131 | So you may want to check the result with isintance().
132 | """
133 |
134 | def decorator(func):
135 |
136 | if method == 'thread':
137 | Q = Queue
138 | Manager = threading.Thread
139 | else:
140 | Q = multiprocessing.Queue
141 | Manager = multiprocessing.Process
142 |
143 | in_queue = Q()
144 | out_queue = Q()
145 |
146 | def main_loop():
147 |
148 | while True:
149 | try:
150 |
151 | res = in_queue.get(block, timeout)
152 |
153 | if isinstance(res, StopWorker):
154 | break
155 |
156 | out_queue.put(func(res))
157 |
158 | except Empty:
159 | pass
160 | except KeyboardInterrupt:
161 | break
162 | except Exception as e:
163 | out_queue.put(e)
164 |
165 | @wraps(func)
166 | def wrapper(*args, **kwargs):
167 | return func(*args, **kwargs)
168 |
169 | def get(block=block, timeout=timeout):
170 | res = out_queue.get(block, timeout)
171 | if isinstance(res, Exception):
172 | raise res
173 | return res
174 |
175 | wrapper.manager = Manager(target=main_loop)
176 | wrapper.manager.get = get
177 | wrapper.manager.stop = lambda: in_queue.put(StopWorker(), block, timeout)
178 | wrapper.manager.put = lambda x: in_queue.put(x, block, timeout)
179 | wrapper.start = lambda: wrapper.manager.start() or wrapper.manager
180 |
181 | return wrapper
182 |
183 | return decorator
184 |
--------------------------------------------------------------------------------
/batbelt/objects.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 |
6 |
7 | from functools import wraps
8 |
9 |
10 | __all__ = ['import_from_path', 'attr', 'dynamicmethod', 'NullObject', 'Null']
11 |
12 |
13 | def import_from_path(path):
14 | """
15 | Import a class dynamically, given it's dotted path.
16 | """
17 | module_name, class_name = path.rsplit('.', 1)
18 | try:
19 | return getattr(__import__(module_name, fromlist=[class_name]), class_name)
20 | except AttributeError:
21 | raise ImportError('Unable to import %s' % path)
22 |
23 |
24 |
25 | def attr(obj, *attrs, **kwargs):
26 | """
27 | Follow chained attributes and get the value of the last attributes.
28 | If an attribute error is raised, returns the default value.
29 |
30 | res = attr(data, 'test', 'o', 'bla', default="yeah")
31 |
32 | is the equivalent of
33 |
34 | try:
35 | res = getattr(getattr(getattr(data, 'test'), 'o'), 'bla')
36 | except AttributeError:
37 | res = "yeah"
38 |
39 | """
40 | try:
41 | value = getattr(obj, attrs[0])
42 |
43 | for attr in attrs[1:]:
44 | value = getattr(value, attr)
45 | except (IndexError, AttributeError):
46 | return kwargs.get('default', None)
47 |
48 | return value
49 |
50 |
51 | class InstanceAndClassMethodDescriptor(object):
52 | """
53 | Return a wrapper around the method calling the method as a classmethod
54 | or as a normal method depending of if it's called from an instance
55 | or a class.
56 | """
57 |
58 | def __init__(self, method):
59 | self.method = method
60 |
61 | def __get__(self, obj, obj_type):
62 |
63 | @wraps(self.method)
64 | def wrapper(*args, **kwargs):
65 | return self.method(obj or obj_type, *args, **kwargs)
66 |
67 | return wrapper
68 |
69 |
70 |
71 | def dynamicmethod(func):
72 | """
73 | Wrap the method so it becomes a function wich will receives the first
74 | parameter as been the class or the instance depanding of if it's called
75 | from a class or an instance.
76 | """
77 | return InstanceAndClassMethodDescriptor(func)
78 |
79 |
80 | class Singleton(type):
81 | """
82 | Metaclass implementing the Singleton pattern.
83 |
84 | Usage:
85 |
86 | >>> class Test(object):
87 | ... __metaclass__ = Singleton
88 | ...
89 | >>> id(Test()) == id(Test())
90 | True
91 | """
92 |
93 | _instances = {}
94 |
95 | def __call__(cls, *args, **kwargs):
96 | if cls not in cls._instances:
97 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
98 | return cls._instances[cls]
99 |
100 |
101 |
102 | class NullObject(object):
103 | """
104 |
105 | Null object pattern implementation. This object is a singleton. It
106 | accept any parameters, and attribute lookup and any method calls,
107 | and always return itself.
108 |
109 | Example:
110 |
111 | >>> n = NullObject()
112 | >>> n
113 | NullObject()
114 | >>> n == NullObject('value') == NullObject('value', param='value')
115 | True
116 | >>> n() == n('value') == n('value', param='value') == n
117 | True
118 | >>> n.attr1
119 | NullObject()
120 | >>> n.attr1.attr2
121 | NullObject()
122 | >>> n.method1()
123 | NullObject()
124 | >>> n.method1().method2()
125 | NullObject()
126 | >>> n.method('value')
127 | NullObject()
128 | >>> n.method(param='value')
129 | NullObject()
130 | >>> n.method('value', param='value')
131 | NullObject()
132 | >>> n.attr1.method1()
133 | NullObject()
134 | >>> n.method1().attr1
135 | NullObject()
136 | >>> n.attr1 = 'value'
137 | >>> n.attr1.attr2 = 'value'
138 | >>> del n.attr1
139 | >>> del n.attr1.attr2.attr3
140 | >>> str(n) == ''
141 | True
142 | >>> n + 1 / 7 % 3
143 | NullObject()
144 | >>> n[1] == n[:4] == n
145 | True
146 | >>> 'test' in n
147 | False
148 | >>> n['test']
149 | NullObject()
150 | >>> NullObject() >> 1
151 | NullObject()
152 | >>> NullObject() == None
153 | True
154 |
155 |
156 | Iterating on NullObject() will end up in a infinite loop with the next item
157 | always beeing NullObject(). Stuff like sorted() will hang.
158 | """
159 |
160 | def __init__(self, *args, **kwargs):
161 | """
162 | NullObject accept any arguments
163 | """
164 | pass
165 |
166 | def __repr__(self):
167 | return "NullObject()"
168 |
169 | def __str__(self):
170 | return ""
171 |
172 | def __eq__(self, other):
173 | """
174 | NullObject is only equal to itself or None
175 | """
176 | return isinstance(other, NullObject) or other is None
177 |
178 | # Like None, NullObject is False is a boolean context
179 | __nonzero__ = __bool__ = lambda self: False
180 |
181 | # Any attribute lookup, method call or operation on NullObject returns NullObject
182 | nullify = lambda self, *x, **kwargs: self
183 |
184 | __call__ = nullify
185 | __getattr__ = __setattr__ = __delattr__ = nullify
186 | __cmp__ = __ne__ = __lt__ = __gt__ = __le__ = __ge__ = nullify
187 | __pos__ = __neg__ = __abs__ = __invert__ = nullify
188 | __add__ = __sub__ = __mul__ = __mod__ = __pow__ = nullify
189 | __floordiv__ = __div__ = __truediv__ = __divmod__ = nullify
190 | __lshift__ = __rshift__ = __and__ = __or__ = __xor__ = nullify
191 | __radd__ = __rsub__ = __rmul__ = __rmod__ = __rpow__ = nullify
192 | __rfloordiv__ = __rdiv__ = __rtruediv__ = __rdivmod__ = nullify
193 | __rlshift__ = __rrshift__ = __rand__ = __ror__ = __rxor__ = nullify
194 | __iadd__ = __isub__ = __imul__ = __imod__ = __ipow__ = nullify
195 | __ifloordiv__ = __idiv__ = __itruediv__ = __idivmod__ = nullify
196 | __ilshift__ = __irshift__ = __iand__ = __ior__ = __ixor__ = nullify
197 | __getitem__ = __setitem__ = __delitem__ = nullify
198 | __getslice__ = __setslice__ = __delslice__ = nullify
199 | __reversed__ = nullify
200 | __contains__ = __missing__ = nullify
201 | __enter__ = __exit__ = nullify
202 |
203 | # Some spacial methods cannot be transformed because they should
204 | # return special types:
205 | # __int__ = __long__ = __float__ = __complex__ = __oct__ = __hex__ = ...
206 | # __index__ = __trunc__ = __coerce_ = ...
207 | # __len__ = ...
208 | # __iter__ = ...
209 | # __round__ = __floor__ = __ceil__ = __trunc__ = ...
210 |
211 |
212 | # One official instance off NullObject, that can be used like None
213 | Null = type('Null', (NullObject,), {"__repr__": lambda s: "Null"})()
214 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | *************************************************************
2 | A collection of gagdets that makes Python even more powerful
3 | *************************************************************
4 |
5 | There is not real structure for this lib, it's just a bunch of snippets I put together because I use them often.
6 |
7 | Not all of them are documented here, few of them have tests, it's zlib licence, you know the drill...
8 |
9 |
10 | To timestamp
11 | =============
12 |
13 | datetime.fromtimestamp exists but not the other away around, and it's not likely to change anytime soon (see: http://bugs.python.org/issue2736). In the meantime::
14 |
15 | >>> from datetime import datetime
16 | >>> to_timestamp(datetime(2000, 1, 1, 2, 1, 1))
17 | 946692061
18 | >>> datetime.fromtimestamp(946688461) # tu as codé celle là et pas l'autre connard !
19 | datetime.datetime(2000, 1, 1, 2, 1, 1)
20 |
21 |
22 | Get this nest value or a default
23 | =================================
24 |
25 | Don't::
26 |
27 | try:
28 | res = data['key'][0]['other key'][1]
29 | except (KeyError, IndexError):
30 | res = "value"
31 |
32 |
33 | Do::
34 |
35 | get(data, 'key', 0, 'other key, 1, default="value")
36 |
37 |
38 | For attributes::
39 |
40 | devise = attr(car, 'insurance', 'expiration_date', 'timezone')
41 |
42 |
43 | Iteration tools missing in itertools
44 | ===================================================================================
45 |
46 |
47 | Iteration by chunk or with a sliding window::
48 |
49 | >>> for chunk in chunks(l, 3):
50 | ... print list(chunk)
51 | ...
52 | [0, 1, 2]
53 | [3, 4, 5]
54 | [6, 7, 8]
55 | [9]
56 | >>> for slide in window(l, 3):
57 | ... print list(slide)
58 | ...
59 | [0, 1, 2]
60 | [1, 2, 3]
61 | [2, 3, 4]
62 | [3, 4, 5]
63 | [4, 5, 6]
64 | [5, 6, 7]
65 | [6, 7, 8]
66 | [7, 8, 9]
67 |
68 |
69 | Get the first element an any iterable (not just indexable) or the first one to be True::
70 |
71 | >>> first(xrange(10))
72 | 0
73 | >>> first_true(xrange(10))
74 | 1
75 | >>> first([], default="What the one thing we say to the God of Death ?")
76 | 'What the one thing we say to the God of Death ?'
77 |
78 | Sorted Set
79 | ===================================================================================
80 |
81 | Slow but useful data structure::
82 |
83 | >>> for x in sset((3, 2, 2, 2, 1, 2)):
84 | ... print x
85 | ...
86 | 3
87 | 2
88 | 1
89 |
90 |
91 | Dictionaries one liners
92 | ===================================================================================
93 |
94 |
95 | I wish + was overloaded for dicts::
96 |
97 | >>> dmerge({"a": 1, "b": 2}, {"b": 2, "c": 3})
98 | {'a': 1, 'c': 3, 'b': 2}
99 |
100 |
101 | Sometimes you do not want to simply overwrite the values inside the original dict, but merge them in custom fashion::
102 |
103 | >>> def my_merge(v1, v2):
104 | ... if isinstance(v1, dict) and isinstance(v2, dict):
105 | ... return dmerge(v1, v2)
106 | ... return v2
107 | >>> dmerge({"a": 1, "b": {'ok': 5}}, {"b": {'ko': 5 }, "c": 3}, my_merge)
108 | {'a': 1, 'c': 3, 'b': {'ko': 5, 'ok': 5}}
109 |
110 | Original dicts are not modified, but this will modify them::
111 |
112 | >>> from batbelt.structs import rename
113 | >>> rename({"a": 1, "b": 2})
114 | >>> rename({"a": 1, "b": 2}, 'b', 'z')
115 | {u'a': 1, u'z': 2}
116 |
117 | (not thread safe).
118 |
119 | Twited but satisfying::
120 |
121 | >>> from batbelt.structs import unpack
122 | >>> dct = {'a': 2, 'b': 4, 'z': 42}
123 | >>> a, b, c = unpack(dct, 'a', 'b', 'c', default=1)
124 | >>> a
125 | 2
126 | >>> b
127 | 4
128 | >>> c
129 | 1
130 |
131 |
132 | String tools
133 | ===================================================================================
134 |
135 | The mandatory "slufigy"::
136 |
137 | >>> slugify(u"Hélo Whorde")
138 | helo-whorde
139 |
140 | You get better slugification if you install the `unidecode` lib, but it's optional. You can specify `separator` if you don't like `-` or call directly `normalize()` (the underlying function) if you wish more control.
141 |
142 | The module also feature html_escape/unescape that is not useless and json_dumps/loads that understand datetime by default. Look at the source for these, I'm lazy (PL for documentation are welcome).
143 |
144 | There is also a poor man template system using the `format()` string method on a file content. No loop, but still nice for quick and dirty file generation :
145 |
146 | from batbelt.strings import render
147 |
148 | render('stuff.conf.tpl', {"var": "value"}, "/etc/stuff.conf")
149 |
150 |
151 | Import this
152 | ===================================================================================
153 |
154 |
155 | `__import__` is weird. Let's abstract that ::
156 |
157 | TaClasse = import_from_path('foo.bar.TaClasse')
158 | ton_obj = TaClasse()
159 |
160 |
161 | Catpure prints
162 | ===================================================================================
163 |
164 |
165 | A context manager to deal with this libs that print the result instead of returning it :
166 |
167 |
168 | >>> with capture_ouput() as (stdout, stderr):
169 | ... print "hello",
170 | ...
171 | >>> print stdout.read()
172 | hello
173 | >>> stdout.close()
174 |
175 |
176 | Create a decorator that accept arguments
177 | ===================================================================================
178 |
179 |
180 | I never remember how to do this. And I don't have to anymore.
181 |
182 | First, write the decorator::
183 |
184 | # all arguments after 'func' are your decorator argument
185 | @decorator_with_args()
186 | def your_decorator(func, arg1, arg2=None):
187 |
188 | if arg1:
189 | # do stuff here
190 |
191 | # do your usual decorator jimbo jumbo, wrapping, calling, returning...
192 | def wrapper():
193 | return func(arg2)
194 |
195 |
196 | return wrapper
197 |
198 |
199 |
200 | Enjoy :
201 |
202 | @your_decorator(False, 1)
203 | def hop(un_arg):
204 | # do stuff in the decorated function
205 |
206 |
207 |
208 | Add a any directory to the PYTHON PATH
209 | ===========================================
210 |
211 | Accepts shell variables and relative paths :
212 |
213 | from batbelt.utils import add_to_pythonpath
214 | add_to_pythonpath("~/..")
215 |
216 | You can (and probably wants) specify a starting point if you pass a relative path. The default starting point is the result is `os.getcwd()` while you probably wants the directory containing you script. To to so, pass `__file__`:
217 |
218 | add_to_pythonpath("../..", starting_point=__file__)
219 |
220 | `starting_point` can be a file path (basename will be stripped) or a directory name. If will be from there that the reltive path will be calculated.
221 |
222 | You can also choose where in the `sys.path` list the your path will be added by passing `insertion_index`, which default to the after the last existing item.
223 |
224 |
225 | Poor man task queue
226 | ===================================================================================================
227 |
228 |
229 | You don't always need the guaranty of a big lib, you just need a little worker to do the job outside of the main thread::
230 |
231 |
232 |
233 | from batbelt.parallel import worker
234 |
235 | @worker()
236 | def task(arg):
237 | arg = arg + 10
238 | return arg
239 |
240 |
241 | # start the worker
242 | process = task.start()
243 |
244 | # send tasks
245 | for x in range(10):
246 | process.put(x)
247 |
248 | # (optionaly) get results
249 | for x in range(10):
250 | print process.get()
251 |
252 | ## 10
253 | ## 11
254 | ## 12
255 | ## 13
256 | ## 14
257 | ## 15
258 | ## 16
259 | ## 17
260 | ## 18
261 | ## 19
262 |
263 | # stop the worker
264 | process.stop()
265 |
266 | The worker use multiprocessing by default, but if you prefer threads: `@worker(method="tread")`.
267 |
268 | If you look for it in the source code, you'll see goodies such as Singletong, Null Pattern implementation and other things you don't use that often.
269 |
--------------------------------------------------------------------------------
/batbelt/hack.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 |
6 | import sys
7 |
8 | from functools import wraps
9 |
10 | from io import BytesIO
11 | from contextlib import contextmanager
12 |
13 |
14 | @contextmanager
15 | def capture_ouput(stdout_to=None, stderr_to=None):
16 | """
17 | Context manager that captures any printed ouput in the 'with' block.
18 |
19 | :Example:
20 |
21 | >>> with capture_ouput() as (stdout, stderr):
22 | ... print "hello",
23 | ...
24 | >>> print stdout.getvalue().upper()
25 | HELLO
26 | >>> with capture_ouput() as (stdout, stderr): # doctest: +IGNORE_EXCEPTION_DETAIL
27 | ... assert False
28 | ...
29 | Traceback (most recent call last):
30 | AssertionError
31 | >>> from tempfile import NamedTemporaryFile
32 | >>> f = NamedTemporaryFile(mode="rw+b")
33 | >>> with capture_ouput(f) as (stdout, stderr):
34 | ... print "hello",
35 | ...
36 | >>> print stdout.read()
37 | hello
38 |
39 |
40 | .. :warning: this is NOT thread safe.
41 |
42 | .. :note: The file like objects containing the capture are not closed
43 | automatically by this context manager. You are responsible
44 | to do it.
45 |
46 | It does not capture exception, so they bubble out and print the stack
47 | trace anyway.
48 | """
49 |
50 | try:
51 |
52 | stdout, stderr = sys.stdout, sys.stderr
53 | sys.stdout = c1 = stdout_to or BytesIO()
54 | sys.stderr = c2 = stderr_to or BytesIO()
55 | yield c1, c2
56 |
57 | finally:
58 |
59 | sys.stdout = stdout
60 | sys.stderr = stderr
61 |
62 | try:
63 | c1.flush()
64 | c1.seek(0)
65 | except (ValueError, TypeError):
66 | pass
67 |
68 | try:
69 | c2.flush()
70 | c2.seek(0)
71 | except (ValueError, TypeError):
72 | pass
73 |
74 |
75 | def decorator_with_args(wrap=True,
76 | function_assigned=('__module__', '__name__', '__doc__'),
77 | function_updated=('__dict__',),
78 | decorator_assigned=('__module__', '__name__', '__doc__'),
79 | decorator_updated=('__dict__',),
80 | ):
81 | """
82 | Use this decorator on a wannabe decorator.
83 |
84 | It will turn it into a decorator that accept any arguments and
85 | wraps the resulting decorated function unless you set wrap=False.
86 |
87 | Usage:
88 |
89 | # You use @decorator_with_args on a function you wish to
90 | # be a decorator accepting arguments
91 | @decorator_with_args()
92 | def your_decorator(func, *args, **kwargs):
93 | def wrapper():
94 | # do stuff
95 | return func()
96 | return wrapper
97 |
98 | Your decorator must accept the function as the first argument, and
99 | expact the other arguments after that. It doesn't have to be *args,
100 | **kwargs, it can be any signature, as long as the first argument,
101 | is the function to decorate.
102 |
103 | One your wannabe decorator decorated, you can use it this way:
104 |
105 | # When you use YOUR decorator, you will be able to pass arguments
106 | @your_decorator(arg1, arg2, arg3='foo')
107 | def a_function():
108 | # do stuff
109 |
110 | # If you don't use arguments, you still need the parenthesis
111 | @your_decorator()
112 | def another_function():
113 | # do stuff
114 |
115 | By default, @decorator_with_args will attempt to apply functools.wraps on
116 | the wrapper your wannabe decorator returns. If you don't wish that,
117 | pass wrap=False:
118 |
119 | @decorator_with_args(wrap=False)
120 | def your_decorator(func, *args, **kwargs):
121 | def wrapper():
122 | # do stuff
123 | return func()
124 | # This will be passed to functools.wraps() if you don't
125 | # set wrap=False
126 | return wrapper
127 |
128 | You can also pass the same arguments you would pass to functools.wraps
129 | directly to @decorator_with_args. They will be passed to along:
130 |
131 | @decorator_with_args(function_assigned=('__module__', '__name__', '__doc__'))
132 | def your_decorator(func, *args, **kwargs):
133 | def wrapper():
134 | # do stuff
135 | return func()
136 | # this will apply functools.wrap() with assigned being set to
137 | # ('__module__', '__name__')
138 | return wrapper
139 |
140 | The params are named function_assigned and function_updated instead of
141 | just assigned and updated like in functools.wraps.
142 |
143 | Also, @decorator_with_args will ALWAYS apply functools.wraps to the
144 | wrapper around your wannabe decorator. You can also control what's
145 | copied by passing decorator_assigned and decorator_updated the same way:
146 |
147 | # functools.wrap will always be applied to your_decorator()
148 | # but you can choose with which arguments
149 | @decorator_with_args(decorator_updated=('__dict__',))
150 | def your_decorator(func, *args, **kwargs):
151 | def wrapper():
152 | # do stuff
153 | return func()
154 | return wrapper
155 |
156 | """
157 | # decorator() will return this function, wich will be the real decorator
158 | # called on the wannabe decorator.
159 | def _decorator(wannabe_decorator):
160 |
161 | # This is the function that will return your wrapped wannabe decorator.
162 | # Il will add a wrapper that will call your wannabe decorator with
163 | # the arguments stored in a closure under the hood.
164 | def decorator_maker(*args, **kwargs):
165 |
166 | # This is the the wrapper around your wannabe decorator. It
167 | # replaces your function so it can pass arguments to it.
168 | # We apply @wraps on it so it takes all metadata from
169 | # the wannabe decorator and attach them to itself.
170 | @wraps(wannabe_decorator, decorator_assigned, decorator_updated)
171 | def decorator_wrapper(func):
172 |
173 | # The wrapper calls your wannabe decorator, passing the
174 | # function to decorate and arguments to it.
175 | # It will get the wrapper your wannabe decorator returns,
176 | # and if, wrap=True (default), will apply @wraps on it too.
177 | d = wannabe_decorator(func, *args, **kwargs)
178 | if wraps:
179 | d = wraps(func, function_assigned, function_updated)(d)
180 | return d
181 |
182 | return decorator_wrapper
183 |
184 | return decorator_maker
185 |
186 | return _decorator
187 |
188 |
189 | class MultiStopIteration(StopIteration):
190 | def throw(self):
191 | raise self
192 |
193 |
194 |
195 | @contextmanager
196 | def multibreak():
197 | '''
198 |
199 | Context manager which allow to break multiple nested for loops at once.
200 |
201 | Example:
202 |
203 | >>> with multibreak() as stop:
204 | ... for x in range(1, 4):
205 | ... for z in range(1, 4):
206 | ... for w in range(1, 4):
207 | ... print w
208 | ... if x * z * w == 2 * 2 * 2:
209 | ... print 'stop'
210 | ... stop()
211 | ...
212 | 1
213 | 2
214 | 3
215 | 1
216 | 2
217 | 3
218 | 1
219 | 2
220 | 3
221 | 1
222 | 2
223 | 3
224 | 1
225 | 2
226 | stop
227 | '''
228 |
229 | try:
230 | yield MultiStopIteration().throw
231 | except MultiStopIteration:
232 | pass
233 |
234 |
235 | def accept_callbacks(func):
236 | """
237 | A decorator to allow any function to be able to accept callbacks.
238 |
239 | :Example:
240 |
241 | # make your function accept callbacks
242 | @accept_callbacks
243 | def add(a, b):
244 | return a + b
245 |
246 | # write a callback that accept 'result' as the first parameter
247 | # and the function paramters as other parameters
248 | def my_callback(result, a, b):
249 | print("Function called with a=%s et b=%s !" % (a, b))
250 | print("It returned '%s'" % result)
251 |
252 | # add the callback to the callback list
253 | add.callbacks.append(my_callback)
254 |
255 | # enjoy
256 | >>> add(1, 2)
257 | Function called with a=1 et b=2 !
258 | It returned '3'
259 | 3
260 |
261 | """
262 |
263 | callbacks = []
264 |
265 | @wraps(func)
266 | def wrapper(*args, **kwargs):
267 |
268 | result = func(*args, **kwargs)
269 |
270 | for callback in callbacks:
271 | callback(result, *args, **kwargs)
272 |
273 | return result
274 |
275 | wrapper.callbacks = callbacks
276 |
277 | return wrapper
278 |
--------------------------------------------------------------------------------
/batbelt/strings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #!/usr/bin/env python
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 | r"""
6 |
7 | Various tools to manipulate strings. Features tools to normalize and
8 | slugify strings. Also some ways to escape HTML and a JSON serializer
9 | that deals with datetime objects.
10 |
11 | Here is an example of the generic slugifier utility (as for 'normalize',
12 | 'escape_html', 'unescape_html' and "json_dumps" are pretty straightforward
13 | and don't need that much of an explanation. Ok, it's just because I'm
14 | too lazy to write it down really).
15 |
16 | Works out of the box for Latin-based scripts.
17 |
18 | Example:
19 |
20 | >>> from strings import slugify
21 | >>> slugify(u"C'est No\xebl !")
22 | u'cest-noel'
23 | >>> slugify(u"C'est No\xebl !", separator="_")
24 | u'cest_noel'
25 |
26 | It will handle all unicode equivalences if (and only if) the optional
27 | unidecode library is installed.
28 |
29 | Example:
30 |
31 | >>> slugify(u"\u5317\u4eb0")
32 | u'bei-jing'
33 |
34 | More about it:
35 | - http://en.wikipedia.org/wiki/Unicode_equivalence;
36 | - http://pypi.python.org/pypi/Unidecode.
37 |
38 | If you do have unidecode installed, but wish not to use it, use the
39 | unicodedata_slugify fonction:
40 |
41 | >>> slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") # slugify() uses unidecode if it can
42 | u'hello-world'
43 | >>> unicodedata_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") # this will more limited
44 | u'hell-world'
45 |
46 | In case you wish to keep the non ASCII characters "as-is", use
47 | unicode_slugify():
48 |
49 | >>> print unicode_slugify(u"C'est No\xebl !")
50 | cest-no\xebl
51 |
52 | """
53 |
54 | import re
55 | import os
56 | import codecs
57 | import json
58 | import unicodedata
59 |
60 | from datetime import datetime, timedelta, date, time
61 | from xml.sax.saxutils import escape, unescape
62 |
63 | from utils import CLASSIC_DATETIME_FORMAT, CLASSIC_DATETIME_PATTERN
64 |
65 |
66 |
67 | def unicode_slugify(string, separator=r'-'):
68 | r"""
69 | Slugify a unicode string using to normalize the string, but without trying
70 | to convert or strip non ASCII characters.
71 |
72 | Example:
73 |
74 | >>> print unicode_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld")
75 | h\xe9ll\xf8-w\xc3\xb6rld
76 | >>> unidecode_slugify(u"Bonjour, tout l'monde !", separator="_")
77 | u'bonjour_tout_lmonde'
78 | >>> unidecode_slugify(u"\tStuff with -- dashes and... spaces \n")
79 | u'stuff-with-dashes-and-spaces'
80 | """
81 |
82 | string = re.sub(r'[^\w\s' + separator + ']', '', string, flags=re.U)
83 | string = string.strip().lower()
84 | return unicode(re.sub(r'[' + separator + '\s]+',
85 | separator, string, flags=re.U))
86 |
87 |
88 | def unicodedata_slugify(string, separator=r'-'):
89 | r"""
90 | Slugify a unicode string using unicodedata to normalize the string.
91 |
92 | Example:
93 |
94 | >>> unicodedata_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld")
95 | u'hell-world'
96 | >>> unidecode_slugify(u"Bonjour, tout l'monde !", separator="_")
97 | u'bonjour_tout_lmonde'
98 | >>> unidecode_slugify(u"\tStuff with -- dashes and... spaces \n")
99 | u'stuff-with-dashes-and-spaces'
100 | """
101 |
102 | string = unicodedata.normalize('NFKD', string).encode('ascii', 'ignore')
103 | string = re.sub(r'[^\w\s' + separator + ']', '', string).strip().lower()
104 | return unicode(re.sub(r'[' + separator + '\s]+', separator, string))
105 |
106 |
107 | def unidecode_slugify(string, separator=r'-'):
108 | r"""
109 | Slugify a unicode string using unidecode to normalize the string.
110 |
111 | Example:
112 |
113 | >>> unidecode_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld")
114 | u'hello-world'
115 | >>> unidecode_slugify(u"Bonjour, tout l'monde !", separator="_")
116 | u'bonjour_tout_lmonde'
117 | >>> unidecode_slugify(u"\tStuff with -- dashes and... spaces \n")
118 | u'stuff-with-dashes-and-spaces'
119 | """
120 |
121 | string = unidecode.unidecode(string)
122 | string = re.sub(r'[^\w\s' + separator + ']', '', string).strip().lower()
123 | return unicode(re.sub(r'[' + separator + '\s]+', separator, string))
124 |
125 |
126 | def unicodedata_normalize(string):
127 | r"""
128 | Returns a new string withou non ASCII characters, trying to replace
129 | them with their ASCII closest counter parts when possible.
130 |
131 | :Example:
132 |
133 | >>> normalize(u"H\xe9ll\xf8 W\xc3\xb6rld")
134 | 'Hell World'
135 |
136 |
137 | This version use unicodedata and provide limited yet
138 | useful results.
139 | """
140 | return unicodedata.normalize('NFKD', string).encode('ascii', 'ignore')
141 |
142 |
143 | def unidecode_normalize(string):
144 | r"""
145 | Returns a new string withou non ASCII characters, trying to replace
146 | them with their ASCII closest counter parts when possible.
147 |
148 | :Example:
149 |
150 | >>> normalize(u"H\xe9ll\xf8 W\xc3\xb6rld")
151 | 'Hello World'
152 |
153 | This version use unidecode and provide enhanced results.
154 | """
155 | return unidecode.unidecode(string)
156 |
157 |
158 | try:
159 | import unidecode
160 | slugify = unidecode_slugify
161 | normalize = unidecode_normalize
162 | except ImportError:
163 | slugify = unicodedata_slugify
164 | normalize = unicodedata_normalize
165 |
166 |
167 | def escape_html(text, additional_escape={'"': """, "'": "'"}):
168 | """
169 | Turn HTML tag caracters into HTML entities.
170 |
171 |
172 | Example:
173 |
174 | >>> escape_html("Ben & Jelly's !")
175 | '<strong>Ben & Jelly's !</strong>'
176 |
177 | """
178 | return escape(text, additional_escape)
179 |
180 |
181 | def unescape_html(text, additional_escape={""": '"', "'": "'"}):
182 | """
183 | Turn HTML tag entities into ASCII caracters.
184 |
185 | Example:
186 |
187 | >>> unescape_html('<strong>Ben & Jelly's !</strong>')
188 | "Ben & Jelly's !"
189 | """
190 | return unescape(text, additional_escape)
191 |
192 |
193 |
194 | class JSONEncoder(json.JSONEncoder):
195 | """
196 | Json encoder with date and time handling.
197 |
198 | You should use naive datetime only. If you have timezone information,
199 | store them in a separate field.
200 | """
201 |
202 |
203 | DATETIME_FORMAT = CLASSIC_DATETIME_FORMAT
204 | DATE_FORMAT, TIME_FORMAT = DATETIME_FORMAT.split()
205 | TIMEDELTA_FORMAT = "timedelta(seconds='%s')"
206 |
207 |
208 | def __init__(self, datetime_format=None, date_format=None, time_format=None,
209 | timedelta_format=None, *args, **kwargs):
210 |
211 | self.datetime_format = datetime_format or self.DATETIME_FORMAT
212 | self.date_format = date_format or self.DATE_FORMAT
213 | self.time_format = time_format or self.TIME_FORMAT
214 | self.timedelta_format = timedelta_format or self.TIMEDELTA_FORMAT
215 |
216 | super(JSONEncoder, self).__init__(self, *args, **kwargs)
217 |
218 |
219 | def default(self, obj):
220 |
221 | if isinstance(obj, datetime):
222 | return obj.strftime(self.datetime_format)
223 |
224 | if isinstance(obj, date):
225 | return obj.strftime(self.date_format)
226 |
227 | if isinstance(obj, time):
228 | return obj.strftime(self.time_format)
229 |
230 | if isinstance(obj, timedelta):
231 | return self.timedelta_format % obj.total_seconds()
232 |
233 | return json.JSONEncoder.default(self, obj)
234 |
235 |
236 |
237 | class JSONDecoder(json.JSONDecoder):
238 | """
239 | Json decoder that decode JSON encoded with JSONEncoder
240 | """
241 |
242 | DATETIME_PATTERN = CLASSIC_DATETIME_PATTERN
243 | DATE_PATTERN, TIME_PATTERN = DATETIME_PATTERN.split()
244 | TIMEDELTA_PATTERN = r"timedelta\(seconds='(?P\d+(?:\.\d+)*)'\)"
245 |
246 |
247 | def __init__(self, datetime_pattern=None, date_pattern=None,
248 | time_pattern=None, timedelta_pattern=None, datetime_format=None,
249 | date_format=None, time_format=None, *args, **kwargs):
250 |
251 | self.datetime_format = datetime_format or JSONEncoder.DATETIME_FORMAT
252 | self.date_format = date_format or JSONEncoder.DATE_FORMAT
253 | self.time_format = time_format or JSONEncoder.TIME_FORMAT
254 |
255 | self.datetime_pattern = re.compile(datetime_pattern or self.DATETIME_PATTERN)
256 | self.date_pattern = re.compile(date_pattern or self.DATE_PATTERN)
257 | self.time_pattern = re.compile(time_pattern or self.TIME_PATTERN)
258 | self.timedelta_pattern = re.compile(timedelta_pattern or self.TIMEDELTA_PATTERN)
259 |
260 | super(JSONDecoder, self).__init__(object_pairs_hook=self.object_pairs_hook,
261 | *args, **kwargs)
262 |
263 | def object_pairs_hook(self, obj):
264 | return dict((k, self.decode_on_match(v)) for k, v in obj)
265 |
266 |
267 | def decode_on_match(self, obj):
268 | """
269 | Try to match the string, and if it fits any date format,
270 | parse it and returns a Python object.
271 | """
272 |
273 | string = unicode(obj)
274 |
275 | match = re.search(self.datetime_pattern, string)
276 | if match:
277 | return datetime.strptime(match.string, self.datetime_format)
278 |
279 | match = re.search(self.date_pattern, string)
280 | if match:
281 | return datetime.strptime(match.string, self.date_format).date()
282 |
283 | match = re.search(self.time_pattern, string)
284 | if match:
285 | return datetime.strptime(match.string, self.time_format).time()
286 |
287 | match = re.search(self.timedelta_pattern, string)
288 | if match:
289 | return timedelta(seconds=float(match.groupdict()['seconds']))
290 |
291 | return obj
292 |
293 |
294 | def json_dumps(data, datetime_format=None, date_format=None, time_format=None,
295 | timedelta_format=None, *args, **kwargs):
296 | r"""
297 | Same as Python's json.dumps but also serialize datetime, date, time
298 | and timedelta.
299 |
300 | Example:
301 | >>> import datetime
302 | >>> json_dumps({'test': datetime.datetime(2000, 1, 1, 1, 1, 1)})
303 | '{"test": "2000-01-01 01:01:01.000000"}'
304 | >>> json_dumps({'test': datetime.date(2000, 1, 1)})
305 | '{"test": "2000-01-01"}'
306 | >>> json_dumps({'test': datetime.time(1, 1, 1)})
307 | '{"test": "01:01:01.000000"}'
308 | >>> json_dumps({'test': datetime.timedelta(1, 1)})
309 | '{"test": "timedelta(seconds=\'86401.0\')"}'
310 | >>> json_dumps({u'test': datetime.timedelta(1, 1), u'a': [1, 2]})
311 | '{"test": "timedelta(seconds=\'86401.0\')", "a": [1, 2]}'
312 |
313 | """
314 | return JSONEncoder(datetime_format, date_format, time_format,
315 | timedelta_format, *args, **kwargs).encode(data)
316 |
317 |
318 | def json_loads(string, datetime_pattern=None, date_pattern=None,
319 | time_pattern=None, timedelta_pattern=None, datetime_format=None,
320 | date_format=None, time_format=None, *args, **kwargs):
321 | r"""
322 | Same as Python's json.loads, but handles formats from batbelt.json_dumps
323 | which are currently mainly date formats.
324 |
325 | Example:
326 |
327 | >>> json_loads('{"test": "2000-01-01 01:01:01.000000"}')
328 | {u'test': datetime.datetime(2000, 1, 1, 1, 1, 1)}
329 | >>> json_loads('{"test": "2000-01-01"}')
330 | {u'test': datetime.date(2000, 1, 1)}
331 | >>> json_loads('{"test": "01:01:01.000000"}')
332 | {u'test': datetime.time(1, 1, 1)}
333 | >>> json_loads('{"test": "timedelta(seconds=\'86401.0\')"}')
334 | {u'test': datetime.timedelta(1, 1)}
335 | >>> json_loads('{"test": "timedelta(seconds=\'86401.0\')", "a": [1, 2]}')
336 | {u'test': datetime.timedelta(1, 1), u'a': [1, 2]}
337 |
338 | """
339 | return JSONDecoder(datetime_pattern, date_pattern, time_pattern,
340 | timedelta_pattern, datetime_format, date_format,
341 | time_format, *args, **kwargs).decode(string)
342 |
343 |
344 | def template(tpl, context):
345 | """
346 | Use the given a template file, call .format() on it's content,
347 | and returns it as a string.
348 |
349 | Template file can be a path or a file like object.
350 | """
351 |
352 | try:
353 | tpl = open(tpl)
354 | except TypeError:
355 | pass
356 |
357 | return tpl.read().format(**context)
358 |
359 |
360 | def render(tpl, context, target):
361 | """
362 | Render the template and write the result in a file.
363 |
364 | Template and target files can be a path or a file like objects.
365 | """
366 |
367 | try:
368 | target = open(target, 'w')
369 | except TypeError:
370 | pass
371 |
372 | res = template(tpl, context)
373 | target.write(res)
374 |
375 | target.close()
376 |
377 |
378 | def write(path, *args, **kwargs):
379 | """
380 | Try to write to the file at `path` the values passed as `args` as lines.
381 |
382 | It will attempt decoding / encoding and casting automatically each value
383 | to a string.
384 |
385 | This is an utility function : its slow and doesn't consider edge cases,
386 | but allow to do just what you want most of the time in one line.
387 |
388 | :Example:
389 |
390 | s = '/tmp/test'
391 | write(s, 'test', '\xe9', 1, ['fdjskl'])
392 | print open(s).read()
393 | test
394 | \xe9
395 | 1
396 | ['fdjskl']
397 |
398 | You can optionally pass :
399 |
400 | mode : among 'a', 'w', which default to 'w'. Binary mode is forced.
401 | encoding : which default to utf8 and will condition decoding AND encoding
402 | errors : what to do when en encoding error occurs : 'replace' by default,
403 | which replace faulty caracters with '?'
404 |
405 | You can pass string or unicode as *args, but if you pass strings,
406 | make sure you pass them with the same encoding you wish to write to
407 | the file.
408 | """
409 |
410 | mode = kwargs.get('mode', 'w')
411 | encoding = kwargs.get('encoding', 'utf8')
412 | errors = kwargs.get('encoding', 'replace')
413 |
414 | with codecs.open(path, mode=mode, encoding=encoding, errors=errors) as f:
415 |
416 | for line in args:
417 |
418 | if isinstance(line, str):
419 | line = line.decode(encoding, errors)
420 |
421 | if not isinstance(line, unicode):
422 | line = repr(line)
423 |
424 | f.write(line + os.linesep)
425 |
426 |
427 |
428 | if __name__ == "__main__":
429 | import doctest
430 | doctest.testmod()
431 |
432 |
--------------------------------------------------------------------------------
/batbelt/structs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim: ai ts=4 sts=4 et sw=4 nu
4 |
5 |
6 | from collections import MutableSet, deque
7 |
8 | from itertools import islice, chain
9 |
10 |
11 | __all__ = ['chunks', 'dmerge', 'get', 'window', 'dswap', 'subdict', 'first',
12 | 'first_true', 'sset']
13 |
14 |
15 | def chunks(seq, chunksize, process=tuple):
16 | """
17 | Yields items from an iterator in iterable chunks.
18 | """
19 | it = iter(seq)
20 | while True:
21 | yield process(chain([it.next()], islice(it, chunksize - 1)))
22 |
23 |
24 |
25 | def window(iterable, size=2):
26 | """
27 | Yields iterms by bunch of a given size, but rolling only one item
28 | in and out at a time when iterating.
29 | """
30 | iterable = iter(iterable)
31 | d = deque(islice(iterable, size), size)
32 | yield d
33 | for x in iterable:
34 | d.append(x)
35 | yield d
36 |
37 |
38 | def dmerge(d1, d2, merge_func=None):
39 | """
40 | Create a new dictionary being the merge of the two passed as a
41 | parameter. If a key is in both dictionaries, the values are processed
42 | with the merge_func.
43 |
44 | By default the value in the second dictionary erases the value in the
45 | first one.
46 | """
47 | d = {}
48 |
49 | d.update(d1)
50 |
51 | if merge_func is None:
52 | d.update(d2)
53 | return d
54 |
55 | for k, v in d2.iteritems():
56 | if k in d:
57 | d[k] = merge_func(d[k], v)
58 | else:
59 | d[k] = v
60 | return d
61 |
62 |
63 | def dswap(dct):
64 | """
65 | Swap key and values of a given dictionary. Return a new dictionary.
66 |
67 | If you have duplicate values, the last one in the dictionary order
68 | will be used. Since dictionary order is not predictable, you should
69 | make sure to either remove duplicates values before processing, or
70 | just make sure loosing some keys is not a problem for you.
71 |
72 |
73 | example:
74 |
75 | >>> sorted(dswap({'a': 1, 'b': 2}).items())
76 | [(1, 'a'), (2, 'b')]
77 | """
78 | return dict((value, key) for key, value in dct.iteritems())
79 |
80 |
81 | def get(data, *keys, **kwargs):
82 | """
83 | Extract a data from nested mapping and sequences using a list of keys
84 | and indices to apply successively. If a key error or an index error
85 | is raised, returns the default value.
86 |
87 | res = get(data, 'test', 0, 'bla', default="yeah")
88 |
89 | is the equivalent of
90 |
91 | try:
92 | res = data['test'][0]['bla']
93 | except (KeyError, IndexError):
94 | res = "yeah"
95 |
96 | """
97 | try:
98 | value = data[keys[0]]
99 |
100 | for key in keys[1:]:
101 | value = value[key]
102 | except (KeyError, IndexError, TypeError):
103 | return kwargs.get('default', None)
104 |
105 | return value
106 |
107 |
108 | def iget(data, value, default=None):
109 | """
110 | Same as indexing, but works with any iterable,
111 | and accept a default value.
112 |
113 | :Example:
114 |
115 | >>> iget(xrange(10), 0)
116 | 0
117 | >>> iget(xrange(10), 5)
118 | 5
119 | >>> iget(xrange(10), 10000, default='wololo')
120 | u'wololo'
121 | """
122 |
123 | for x in islice(data, value, None):
124 | return x
125 | return default
126 |
127 |
128 |
129 | def rename(dct, old_name, new_name):
130 | """
131 | Rename a key in a dictionary. No effect if the key does not exists.
132 |
133 | Return the dictiony passed as parameter.
134 | """
135 |
136 | try:
137 | dct[new_name] = dct[old_name]
138 | del dct[old_name]
139 | except KeyError:
140 | pass
141 |
142 | return dct
143 |
144 |
145 | def unpack(indexable, *args, **kwargs):
146 | """
147 | Return an generator with the values for the given keys/indices or
148 | a default value.
149 |
150 | :Example:
151 |
152 | >>> dct = {'a': 2, 'b': 4, 'z': 42}
153 | >>> a, b, c = unpack(dct, 'a', 'b', 'c', default=1)
154 | >>> a
155 | 2
156 | >>> b
157 | 4
158 | >>> c
159 | 1
160 | >>> list(unpack(range(5, 10), 2, 4))
161 | [7, 9]
162 | """
163 |
164 | default = kwargs.get('default', None)
165 |
166 | for key in args:
167 | yield get(indexable, key, default=default)
168 |
169 |
170 |
171 | def subdict(dct, include=(), exclude=()):
172 | """
173 | Return a dictionary that is a copy of the given one.
174 |
175 | All values in `include` are used as key to be copied to
176 | the resulting dictionary.
177 |
178 | You can also pass a list of key to exclude instead by setting
179 | `exclude`. But you can't use both `include` and `exclude`: if you do,
180 | `exclude will be ignored`
181 |
182 | Example:
183 |
184 | >>> subdict({1:None, 2: False, 3: True}, [1, 2])
185 | {1: None, 2: False}
186 | >>> subdict({1:None, 2: False, 3: True}, exclude=[1, 2])
187 | {3: True}
188 |
189 | """
190 |
191 | if include:
192 | return dict((k, v) for k, v in dct.iteritems() if k in include)
193 |
194 | return dict((k, v) for k, v in dct.iteritems() if k not in exclude)
195 |
196 |
197 |
198 | # aliased for compat, but should probably be removed
199 | first = lambda data, default=None: iget(data, 0, default)
200 |
201 |
202 | def first_true(iterable, key=lambda x: x, default=None):
203 | """
204 | Return the first item of any iterable for which the key is True.
205 |
206 | By default the key is the entire element.
207 |
208 | If the iterable is empty, return the default value.
209 | """
210 | for x in iterable:
211 | if key(x):
212 | return x
213 | return default
214 |
215 |
216 | def skip_duplicates(iterable, key=lambda x: x):
217 | """
218 | Returns a generator that will yield all objects from iterable, skipping
219 | duplicates.
220 |
221 | Duplicates are identified using the `key` function to calculate a
222 | unique fingerprint. This does not use natural equality, but the
223 | result use a set() to remove duplicates, so defining __eq__
224 | on your objects would have not effect.
225 |
226 | By default the fingerprint is the object itself,
227 | which ensure the functions works as-is with iterable of primitives
228 | such as int, str or tuple.
229 |
230 | :Example:
231 |
232 | >>> list(skip_duplicates([1, 2, 3, 4, 4, 2, 1, 3 , 4]))
233 | [1, 2, 3, 4]
234 |
235 | The return value of `key` MUST be hashable, which means for
236 | non hashable objects such as dict, set or list, you need to specify
237 | a a function that returns a hashable fingerprint.
238 |
239 | :Example:
240 |
241 | >>> list(skip_duplicates(([], [], (), [1, 2], (1, 2)), lambda x: tuple(x)))
242 | [[], [1, 2]]
243 | >>> list(skip_duplicates(([], [], (), [1, 2], (1, 2)), lambda x: (type(x), tuple(x))))
244 | [[], (), [1, 2], (1, 2)]
245 |
246 | For more complex types, such as custom classes, the default behavior
247 | is to remove nothing. You MUST provide a `key` function is you wish
248 | to filter those.
249 |
250 | :Example:
251 |
252 | >>> class Test(object):
253 | def __init__(self, foo='bar'):
254 | self.foo = foo
255 | def __repr__(self):
256 | return "Test('%s')" % self.foo
257 | ...
258 | >>> list(skip_duplicates([Test(), Test(), Test('other')]))
259 | [Test('bar'), Test('bar'), Test('other')]
260 | >>> list(skip_duplicates([Test(), Test(), Test('other')], lambda x: x.foo))
261 | [Test('bar'), Test('other')]
262 |
263 | See also :
264 | - strip_duplicates : a simpler, slower function that returns a list
265 | of elements with no duplicates. It accepts
266 | non hashable elements and honors __eq__.
267 | - remove_duplicates : remove duplicates from a list in place.
268 | Most ressource efficient merthod.
269 | """
270 | fingerprints = set()
271 |
272 | try:
273 | for x in iterable:
274 | fingerprint = key(x)
275 | if fingerprint not in fingerprints:
276 | yield x
277 | fingerprints.add(fingerprint)
278 | except TypeError as e:
279 | try:
280 | hash(fingerprint)
281 | except TypeError:
282 | raise TypeError(
283 | "Calculating the key on one element resulted in a non hashable "
284 | "object of type '%s'. Change the 'key' parameter to a function "
285 | "that always, returns a hashable object. Hint : primitives "
286 | "like int, str or tuple, are hashable, dict, set and list are "
287 | "not. \nThe object that triggered the error was:\n%s" % (
288 | type(fingerprint), x)
289 | )
290 | else:
291 | raise
292 |
293 |
294 |
295 | def strip_duplicates(iterable, equals=lambda x, y: x == y):
296 | """
297 | Return a list of elements from iterable, without duplicates.
298 |
299 | This uses equality to find duplicates, and will honor __eq__, but
300 | will not work on infinite iterables.
301 |
302 | :Examples:
303 |
304 | >>> strip_duplicates('fdjqkslfjdmkfdsqjkfmjqsdmlkfjqslkmfjsdklfl')
305 | ['f', 'd', 'j', 'q', 'k', 's', 'l', 'm']
306 | >>> strip_duplicates(([], [], (), [1, 2], (1, 2)))
307 | [[], (), [1, 2], (1, 2)]
308 | >>> strip_duplicates(([], [], (), [1, 2], (1, 2)), lambda x, y: tuple(x) == tuple(y))
309 | [[], [1, 2]]
310 | >>> class Test(object):
311 | def __init__(self, foo='bar'):
312 | self.foo = foo
313 | def __repr__(self):
314 | return "Test('%s')" % self.foo
315 | def __eq__(self, other):
316 | return self.foo == other.foo
317 | >>> strip_duplicates([Test(), Test(), Test('other')])
318 | [Test('bar'), Test('other')]
319 |
320 | See also :
321 | - skip_duplicates : returns a generator yielding elements without
322 | duplicates. Faster, works on infinite iterables,
323 | but uses hashes instead of equality.
324 | - remove_duplicates : remove duplicates from a list in place.
325 | Most ressource efficient merthod.
326 | """
327 |
328 | iterable = iter(iterable)
329 |
330 | res = []
331 | while True:
332 |
333 | try:
334 | elem = next(iterable)
335 | except StopIteration:
336 | break
337 |
338 | res.append(elem)
339 |
340 | iterable = iter([x for x in iterable if not equals(elem, x)])
341 |
342 | return res
343 |
344 |
345 | def remove_duplicates(lst, equals=lambda x, y: x == y):
346 | """
347 | Removes duplicates from a list, in place.
348 |
349 | Works only with lists and modifies the list, but it's pretty ressource
350 | saving compared to other methods.
351 |
352 | See also :
353 | - skip_duplicates : returns a generator yielding elements without
354 | duplicates. Faster, works on infinite iterables,
355 | but uses hashes instead of equality.
356 | - strip_duplicates : a simpler, slower function that returns a list
357 | of elements with no duplicates. It accepts
358 | non hashable elements and honors __eq__.
359 | """
360 |
361 | if not isinstance(lst, list):
362 | raise TypeError('This function works only with lists.')
363 |
364 | i1 = 0
365 | l = (len(lst) - 1)
366 |
367 | while i1 < l:
368 |
369 | elem = lst[i1]
370 |
371 | i2 = i1 + 1
372 | while i2 <= l:
373 | if equals(elem, lst[i2]):
374 | del lst[i2]
375 | l -= 1
376 | i2 += 1
377 |
378 | i1 += 1
379 |
380 | return lst
381 |
382 |
383 |
384 | KEY, PREV, NEXT = range(3)
385 |
386 |
387 | class sset(MutableSet):
388 | """
389 | Set that preserves ordering.
390 |
391 | From http://code.activestate.com/recipes/576694/
392 | """
393 |
394 | def __init__(self, iterable=None):
395 | self.end = end = []
396 | end += [None, end, end] # sentinel node for doubly linked list
397 | self.map = {} # key --> [key, prev, next]
398 | if iterable is not None:
399 | self |= iterable
400 |
401 | def __len__(self):
402 | return len(self.map)
403 |
404 | def __contains__(self, key):
405 | return key in self.map
406 |
407 | def add(self, key):
408 | if key not in self.map:
409 | end = self.end
410 | curr = end[PREV]
411 | curr[NEXT] = end[PREV] = self.map[key] = [key, curr, end]
412 |
413 | def discard(self, key):
414 | if key in self.map:
415 | key, prev, next = self.map.pop(key)
416 | prev[NEXT] = next
417 | next[PREV] = prev
418 |
419 | def __iter__(self):
420 | end = self.end
421 | curr = end[NEXT]
422 | while curr is not end:
423 | yield curr[KEY]
424 | curr = curr[NEXT]
425 |
426 | def __reversed__(self):
427 | end = self.end
428 | curr = end[PREV]
429 | while curr is not end:
430 | yield curr[KEY]
431 | curr = curr[PREV]
432 |
433 | def pop(self, last=True):
434 | if not self:
435 | raise KeyError('set is empty')
436 | key = next(reversed(self)) if last else next(iter(self))
437 | self.discard(key)
438 | return key
439 |
440 | def __repr__(self):
441 | if not self:
442 | return '%s()' % (self.__class__.__name__,)
443 | return '%s(%r)' % (self.__class__.__name__, list(self))
444 |
445 | def __eq__(self, other):
446 | if isinstance(other, sset):
447 | return len(self) == len(other) and list(self) == list(other)
448 | return set(self) == set(other)
449 |
450 | def __del__(self):
451 | self.clear() # remove circular references
452 |
453 |
454 |
455 | class Flattener(object):
456 | """
457 | Create a flattener that you can call on a deeply nested data
458 | structures to iterate over the items as it if it were a flat iterable.
459 |
460 | The flattener returns a generator that lazily yield the items and
461 | deals with up to hundred of levels of nesting (~800 on my machine,
462 | and you can control it with sys.setrecursionlimit).
463 |
464 | A default flattener named 'flatten' is available by default.
465 |
466 | :Example:
467 |
468 | a = []
469 | for i in range(10):
470 | a = [a, i]
471 | print(a)
472 |
473 | [[[[[[[[[[[], 0], 1], 2], 3], 4], 5], 6], 7], 8], 9]
474 |
475 | print(list(flatten(a)))
476 |
477 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
478 |
479 | By default, it flattens all the types listed in
480 | Flattener.DEFAULT_FLATTEN_TYPES but you can pass you list via
481 | flatten_types while calling a Flatener instance.
482 |
483 | For ambigious types like dict, you can pass iterable_getters, a
484 | mapping type / callback letting you define how to extract items from
485 | each type.
486 |
487 | :Example:
488 |
489 | a = []
490 | for i in range(2):
491 | a = [a, i] + [{'a': 1., 'b': {'c': 3.}}]
492 | print(a)
493 |
494 | [[[], 0, {'a': 1.0, 'b': {'c': 3.0}}], 1, {'a': 1.0, 'b': {'c': 3.0}}]
495 |
496 | new_ft = Flattener.DEFAULT_FLATTEN_TYPES + (dict,)
497 |
498 | dico_flatten = Flattener(flatten_types=new_ft,
499 | iterable_getters={dict: lambda x: x.items()})
500 |
501 | print(list(dico_flatten(a)))
502 |
503 | [0, u'a', 1.0, u'b', u'c', 3.0, 1, u'a', 1.0, u'b', u'c', 3.0]
504 |
505 | """
506 |
507 | DEFAULT_FLATTEN_TYPES = (
508 | list,
509 | tuple,
510 | set,
511 | (x for x in ()).__class__,
512 | xrange,
513 | deque,
514 | MutableSet,
515 | # Sequence # warning, a string is a subclass of Sequence
516 | )
517 |
518 |
519 | def __init__(self, flatten_types=None, iterable_getters={}):
520 | self.flatten_types = flatten_types or self.DEFAULT_FLATTEN_TYPES
521 | self.iterable_getters = iterable_getters
522 |
523 |
524 | def should_flatten(self, obj):
525 | """
526 | Returns if the object should be flatten or not, checking if the
527 | objects is an instance of type listed in DEFAULT_FLATTEN_TYPES
528 | by default.
529 | """
530 | return isinstance(obj, self.flatten_types)
531 |
532 |
533 | def transform_iterable(self, obj):
534 | """
535 | Apply a pre-processing to an object before iterate on it. Can
536 | be useful for types such as dict on which you may want to call
537 | values() or items() before iteration.
538 |
539 | By defaut, it check if the object is an DIRECT instance (not
540 | a subclass) of any key in iterable_getters, passed in __init__
541 | and apply the transform.
542 |
543 | iterable_getter should be a mapping with types as key and
544 | transformation function as values, such as :
545 |
546 | {dict: lambda x: x.items()}
547 |
548 | iterable_getter default value is {}, making transform_iterable
549 | a noop.
550 | """
551 | if obj.__class__ in self.iterable_getters:
552 | return self.iterable_getters[obj.__class__](obj)
553 | return obj
554 |
555 |
556 | def __call__(self, iterable):
557 | """
558 | Returns a generator yieling items from a deeply nested iterable
559 | like it would be a flat one.
560 | """
561 | for e in iterable:
562 | if self.should_flatten(e):
563 | for f in self(self.transform_iterable(e)):
564 | yield f
565 | else:
566 | yield e
567 |
568 |
569 |
570 | flatten = Flattener()
571 |
572 |
--------------------------------------------------------------------------------