├── .editorconfig ├── .gitignore ├── batbelt ├── __init__.py ├── utils.py ├── parallel.py ├── objects.py ├── hack.py ├── strings.py └── structs.py ├── setup.py └── README.rst /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | 13 | [*.py] 14 | indent_size = 4 15 | 16 | [*.{html,js,rb,scss,xml,less,css}] 17 | indent_size = 2 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # temporary, hidden and backup files 2 | 3 | *~ 4 | *.bak 5 | *.log 6 | *.tmp 7 | *_index 8 | *.orig 9 | *.swp 10 | .* # all hidden files... 11 | !.gitignore # ...except gitignore 12 | 13 | # binaries 14 | 15 | *.pyc 16 | *.pyo 17 | 18 | # files generated by setuptools 19 | 20 | *.egg-info 21 | dist 22 | build 23 | *.tar.gz 24 | *.egg 25 | *.in 26 | 27 | # others 28 | 29 | content 30 | *.sublime-project 31 | *.sublime-workspace 32 | -------------------------------------------------------------------------------- /batbelt/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | 6 | __version__ = "0.5.2" 7 | 8 | from strings import (slugify, normalize, escape_html, 9 | unescape_html, json_dumps, json_loads) 10 | from structs import (chunks, get, dmerge, sset, dswap, window, 11 | subdict, iget, flatten, skip_duplicates) 12 | from objects import attr, import_from_path, Null 13 | from utils import to_timestamp 14 | from hack import decorator_with_args 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | from setuptools import setup, find_packages 3 | 4 | open('MANIFEST.in', 'w').write('\n'.join(( 5 | "include *.rst", 6 | ))) 7 | 8 | from batbelt import __version__ 9 | 10 | setup( 11 | 12 | name="batbelt", 13 | version=__version__, 14 | packages=find_packages('.'), 15 | author="Sam et Max", 16 | author_email="lesametlemax@gmail.com", 17 | description="A collection of gagdets that makes Python even more powerful.", 18 | long_description=open('README.rst').read(), 19 | include_package_data=True, 20 | classifiers=[ 21 | 'Programming Language :: Python', 22 | "Intended Audience :: Information Technology", 23 | "License :: OSI Approved :: zlib/libpng License", 24 | "Natural Language :: English", 25 | "Programming Language :: Python :: 2.7" 26 | ], 27 | url="https://github.com/sametmax/Bat-belt" 28 | ) 29 | 30 | -------------------------------------------------------------------------------- /batbelt/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | """ 6 | The infamous utils.py module filled with functions you don't where else 7 | to put. 8 | """ 9 | 10 | 11 | import sys 12 | import os 13 | 14 | from datetime import datetime 15 | 16 | 17 | CLASSIC_DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f' 18 | CLASSIC_DATETIME_PATTERN = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}' 19 | 20 | 21 | 22 | 23 | def to_timestamp(dt): 24 | """ 25 | Return a timestamp for the given datetime object. 26 | 27 | Example: 28 | 29 | >>> import datetime 30 | >>> to_timestamp(datetime.datetime(2000, 1, 1, 1, 1, 1, 1)) 31 | 946688461 32 | """ 33 | return (dt - datetime(1970, 1, 1)).total_seconds() 34 | 35 | 36 | class ImportableItems(list): 37 | 38 | def __init__(self, *args, **kwargs): 39 | super(ImportableItems, self).__init__(*args, **kwargs) 40 | self.non_importable_items = {} 41 | 42 | def append(self, item_name): 43 | self.non_importable_items.pop(item_name, None) 44 | super(ImportableItems, self).append(item_name) 45 | 46 | 47 | def import_list(*args): 48 | """ 49 | Allow to create easily a __all__ listing for a module. 50 | 51 | Returns a value for __all__ and a decorator to add anything 52 | to it easily. 53 | """ 54 | 55 | importable_items = ImportableItems() 56 | importable_items.non_importable_items.update(sys._getframe(1).f_globals) 57 | for item in args: 58 | importable_items.append(item) 59 | 60 | 61 | def importable(func, name=None): 62 | 63 | if name is None: 64 | try: 65 | name = func.__name__ 66 | except AttributeError: 67 | raise ValueError('You must provide a name for ' 68 | 'this item: %s' % repr(func)) 69 | importable_items.append(name) 70 | 71 | return func 72 | 73 | return importable_items, importable 74 | 75 | 76 | 77 | def add_to_pythonpath(path, starting_point='.', insertion_index=None): 78 | """ 79 | Add the directory to the sys.path. 80 | 81 | You can path an absolute or a relative path to it. 82 | 83 | If you choose to use a relative path, it will be relative to 84 | `starting_point` by default, which is set to '.'. 85 | 86 | You may want to set it to something like __file__ (the basename will 87 | be stripped, and the current file's parent directory will be used 88 | as a starting point, which is probably what you expect in the 89 | first place). 90 | 91 | :example: 92 | 93 | >>> add_to_pythonpath('../..', __file__) 94 | """ 95 | 96 | if not os.path.isabs(path): 97 | 98 | if os.path.isfile(starting_point): 99 | starting_point = os.path.dirname(starting_point) 100 | 101 | path = os.path.join(starting_point, path) 102 | 103 | path = os.path.realpath(os.path.expandvars(os.path.expanduser(path))) 104 | 105 | if path not in sys.path: 106 | if insertion_index is None: 107 | sys.path.append(path) 108 | else: 109 | sys.path.insert(insertion_index, path) 110 | 111 | -------------------------------------------------------------------------------- /batbelt/parallel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | 6 | 7 | 8 | 9 | import threading 10 | import multiprocessing 11 | from functools import wraps 12 | from Queue import Queue, Empty 13 | 14 | __all__ = ['process', 'thread'] 15 | 16 | 17 | def process(*proxy_args, **proxy_kwargs): 18 | 19 | fire = proxy_kwargs.pop('fire', False) or proxy_args or proxy_kwargs 20 | 21 | def decorator(func): 22 | 23 | if fire: 24 | 25 | @wraps(func) 26 | def fun(*args, **kwargs): 27 | func(*args, **kwargs) 28 | 29 | fun.process = multiprocessing.Process(target=fun, args=proxy_args, 30 | kwargs=proxy_kwargs) 31 | fun.process.start() 32 | 33 | return fun 34 | 35 | else: 36 | 37 | @wraps(func) 38 | def wrapper(*args, **kwargs): 39 | process = multiprocessing.Process(target=func, args=args, 40 | kwargs=kwargs) 41 | process.start() 42 | return process 43 | 44 | return wrapper 45 | 46 | return decorator 47 | 48 | 49 | def thread(*proxy_args, **proxy_kwargs): 50 | 51 | fire = proxy_kwargs.pop('fire', False) or proxy_args or proxy_kwargs 52 | 53 | def decorator(func): 54 | 55 | if fire: 56 | 57 | @wraps(func) 58 | def fun(*args, **kwargs): 59 | func(*args, **kwargs) 60 | 61 | fun.thread = threading.Thread(target=fun, args=proxy_args, 62 | kwargs=proxy_kwargs) 63 | fun.thread.start() 64 | 65 | return fun 66 | 67 | else: 68 | 69 | @wraps(func) 70 | def wrapper(*args, **kwargs): 71 | thread = threading.Thread(target=func, args=args, 72 | kwargs=kwargs) 73 | thread.start() 74 | return thread 75 | 76 | return wrapper 77 | 78 | return decorator 79 | 80 | 81 | class StopWorker(): 82 | pass 83 | 84 | 85 | def worker(block=True, timeout=0.1, method='process'): 86 | """ 87 | Turn a function into a worker: 88 | 89 | from parallel import worker 90 | 91 | @worker() 92 | def test(mot): 93 | print "in %s" % mot 94 | return mot 95 | 96 | process = test.start() 97 | 98 | for x in range(10): 99 | process.put(x) 100 | 101 | for x in range(10): 102 | print "out %s" % process.get() 103 | 104 | process.stop() 105 | 106 | Which outputs: 107 | 108 | in 0 109 | out 0 110 | in 1 111 | in 2 112 | out 1 113 | in 3 114 | out 2 115 | in 4 116 | out 3 117 | out 4 118 | in 5 119 | out 5 120 | in 6 121 | out 6 122 | in 7 123 | out 7 124 | in 8 125 | out 8 126 | in 9 127 | out 9 128 | 129 | If an exception occures, it will be send back in the message queue. 130 | 131 | So you may want to check the result with isintance(). 132 | """ 133 | 134 | def decorator(func): 135 | 136 | if method == 'thread': 137 | Q = Queue 138 | Manager = threading.Thread 139 | else: 140 | Q = multiprocessing.Queue 141 | Manager = multiprocessing.Process 142 | 143 | in_queue = Q() 144 | out_queue = Q() 145 | 146 | def main_loop(): 147 | 148 | while True: 149 | try: 150 | 151 | res = in_queue.get(block, timeout) 152 | 153 | if isinstance(res, StopWorker): 154 | break 155 | 156 | out_queue.put(func(res)) 157 | 158 | except Empty: 159 | pass 160 | except KeyboardInterrupt: 161 | break 162 | except Exception as e: 163 | out_queue.put(e) 164 | 165 | @wraps(func) 166 | def wrapper(*args, **kwargs): 167 | return func(*args, **kwargs) 168 | 169 | def get(block=block, timeout=timeout): 170 | res = out_queue.get(block, timeout) 171 | if isinstance(res, Exception): 172 | raise res 173 | return res 174 | 175 | wrapper.manager = Manager(target=main_loop) 176 | wrapper.manager.get = get 177 | wrapper.manager.stop = lambda: in_queue.put(StopWorker(), block, timeout) 178 | wrapper.manager.put = lambda x: in_queue.put(x, block, timeout) 179 | wrapper.start = lambda: wrapper.manager.start() or wrapper.manager 180 | 181 | return wrapper 182 | 183 | return decorator 184 | -------------------------------------------------------------------------------- /batbelt/objects.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | 6 | 7 | from functools import wraps 8 | 9 | 10 | __all__ = ['import_from_path', 'attr', 'dynamicmethod', 'NullObject', 'Null'] 11 | 12 | 13 | def import_from_path(path): 14 | """ 15 | Import a class dynamically, given it's dotted path. 16 | """ 17 | module_name, class_name = path.rsplit('.', 1) 18 | try: 19 | return getattr(__import__(module_name, fromlist=[class_name]), class_name) 20 | except AttributeError: 21 | raise ImportError('Unable to import %s' % path) 22 | 23 | 24 | 25 | def attr(obj, *attrs, **kwargs): 26 | """ 27 | Follow chained attributes and get the value of the last attributes. 28 | If an attribute error is raised, returns the default value. 29 | 30 | res = attr(data, 'test', 'o', 'bla', default="yeah") 31 | 32 | is the equivalent of 33 | 34 | try: 35 | res = getattr(getattr(getattr(data, 'test'), 'o'), 'bla') 36 | except AttributeError: 37 | res = "yeah" 38 | 39 | """ 40 | try: 41 | value = getattr(obj, attrs[0]) 42 | 43 | for attr in attrs[1:]: 44 | value = getattr(value, attr) 45 | except (IndexError, AttributeError): 46 | return kwargs.get('default', None) 47 | 48 | return value 49 | 50 | 51 | class InstanceAndClassMethodDescriptor(object): 52 | """ 53 | Return a wrapper around the method calling the method as a classmethod 54 | or as a normal method depending of if it's called from an instance 55 | or a class. 56 | """ 57 | 58 | def __init__(self, method): 59 | self.method = method 60 | 61 | def __get__(self, obj, obj_type): 62 | 63 | @wraps(self.method) 64 | def wrapper(*args, **kwargs): 65 | return self.method(obj or obj_type, *args, **kwargs) 66 | 67 | return wrapper 68 | 69 | 70 | 71 | def dynamicmethod(func): 72 | """ 73 | Wrap the method so it becomes a function wich will receives the first 74 | parameter as been the class or the instance depanding of if it's called 75 | from a class or an instance. 76 | """ 77 | return InstanceAndClassMethodDescriptor(func) 78 | 79 | 80 | class Singleton(type): 81 | """ 82 | Metaclass implementing the Singleton pattern. 83 | 84 | Usage: 85 | 86 | >>> class Test(object): 87 | ... __metaclass__ = Singleton 88 | ... 89 | >>> id(Test()) == id(Test()) 90 | True 91 | """ 92 | 93 | _instances = {} 94 | 95 | def __call__(cls, *args, **kwargs): 96 | if cls not in cls._instances: 97 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 98 | return cls._instances[cls] 99 | 100 | 101 | 102 | class NullObject(object): 103 | """ 104 | 105 | Null object pattern implementation. This object is a singleton. It 106 | accept any parameters, and attribute lookup and any method calls, 107 | and always return itself. 108 | 109 | Example: 110 | 111 | >>> n = NullObject() 112 | >>> n 113 | NullObject() 114 | >>> n == NullObject('value') == NullObject('value', param='value') 115 | True 116 | >>> n() == n('value') == n('value', param='value') == n 117 | True 118 | >>> n.attr1 119 | NullObject() 120 | >>> n.attr1.attr2 121 | NullObject() 122 | >>> n.method1() 123 | NullObject() 124 | >>> n.method1().method2() 125 | NullObject() 126 | >>> n.method('value') 127 | NullObject() 128 | >>> n.method(param='value') 129 | NullObject() 130 | >>> n.method('value', param='value') 131 | NullObject() 132 | >>> n.attr1.method1() 133 | NullObject() 134 | >>> n.method1().attr1 135 | NullObject() 136 | >>> n.attr1 = 'value' 137 | >>> n.attr1.attr2 = 'value' 138 | >>> del n.attr1 139 | >>> del n.attr1.attr2.attr3 140 | >>> str(n) == '' 141 | True 142 | >>> n + 1 / 7 % 3 143 | NullObject() 144 | >>> n[1] == n[:4] == n 145 | True 146 | >>> 'test' in n 147 | False 148 | >>> n['test'] 149 | NullObject() 150 | >>> NullObject() >> 1 151 | NullObject() 152 | >>> NullObject() == None 153 | True 154 | 155 | 156 | Iterating on NullObject() will end up in a infinite loop with the next item 157 | always beeing NullObject(). Stuff like sorted() will hang. 158 | """ 159 | 160 | def __init__(self, *args, **kwargs): 161 | """ 162 | NullObject accept any arguments 163 | """ 164 | pass 165 | 166 | def __repr__(self): 167 | return "NullObject()" 168 | 169 | def __str__(self): 170 | return "" 171 | 172 | def __eq__(self, other): 173 | """ 174 | NullObject is only equal to itself or None 175 | """ 176 | return isinstance(other, NullObject) or other is None 177 | 178 | # Like None, NullObject is False is a boolean context 179 | __nonzero__ = __bool__ = lambda self: False 180 | 181 | # Any attribute lookup, method call or operation on NullObject returns NullObject 182 | nullify = lambda self, *x, **kwargs: self 183 | 184 | __call__ = nullify 185 | __getattr__ = __setattr__ = __delattr__ = nullify 186 | __cmp__ = __ne__ = __lt__ = __gt__ = __le__ = __ge__ = nullify 187 | __pos__ = __neg__ = __abs__ = __invert__ = nullify 188 | __add__ = __sub__ = __mul__ = __mod__ = __pow__ = nullify 189 | __floordiv__ = __div__ = __truediv__ = __divmod__ = nullify 190 | __lshift__ = __rshift__ = __and__ = __or__ = __xor__ = nullify 191 | __radd__ = __rsub__ = __rmul__ = __rmod__ = __rpow__ = nullify 192 | __rfloordiv__ = __rdiv__ = __rtruediv__ = __rdivmod__ = nullify 193 | __rlshift__ = __rrshift__ = __rand__ = __ror__ = __rxor__ = nullify 194 | __iadd__ = __isub__ = __imul__ = __imod__ = __ipow__ = nullify 195 | __ifloordiv__ = __idiv__ = __itruediv__ = __idivmod__ = nullify 196 | __ilshift__ = __irshift__ = __iand__ = __ior__ = __ixor__ = nullify 197 | __getitem__ = __setitem__ = __delitem__ = nullify 198 | __getslice__ = __setslice__ = __delslice__ = nullify 199 | __reversed__ = nullify 200 | __contains__ = __missing__ = nullify 201 | __enter__ = __exit__ = nullify 202 | 203 | # Some spacial methods cannot be transformed because they should 204 | # return special types: 205 | # __int__ = __long__ = __float__ = __complex__ = __oct__ = __hex__ = ... 206 | # __index__ = __trunc__ = __coerce_ = ... 207 | # __len__ = ... 208 | # __iter__ = ... 209 | # __round__ = __floor__ = __ceil__ = __trunc__ = ... 210 | 211 | 212 | # One official instance off NullObject, that can be used like None 213 | Null = type('Null', (NullObject,), {"__repr__": lambda s: "Null"})() 214 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ************************************************************* 2 | A collection of gagdets that makes Python even more powerful 3 | ************************************************************* 4 | 5 | There is not real structure for this lib, it's just a bunch of snippets I put together because I use them often. 6 | 7 | Not all of them are documented here, few of them have tests, it's zlib licence, you know the drill... 8 | 9 | 10 | To timestamp 11 | ============= 12 | 13 | datetime.fromtimestamp exists but not the other away around, and it's not likely to change anytime soon (see: http://bugs.python.org/issue2736). In the meantime:: 14 | 15 | >>> from datetime import datetime 16 | >>> to_timestamp(datetime(2000, 1, 1, 2, 1, 1)) 17 | 946692061 18 | >>> datetime.fromtimestamp(946688461) # tu as codé celle là et pas l'autre connard ! 19 | datetime.datetime(2000, 1, 1, 2, 1, 1) 20 | 21 | 22 | Get this nest value or a default 23 | ================================= 24 | 25 | Don't:: 26 | 27 | try: 28 | res = data['key'][0]['other key'][1] 29 | except (KeyError, IndexError): 30 | res = "value" 31 | 32 | 33 | Do:: 34 | 35 | get(data, 'key', 0, 'other key, 1, default="value") 36 | 37 | 38 | For attributes:: 39 | 40 | devise = attr(car, 'insurance', 'expiration_date', 'timezone') 41 | 42 | 43 | Iteration tools missing in itertools 44 | =================================================================================== 45 | 46 | 47 | Iteration by chunk or with a sliding window:: 48 | 49 | >>> for chunk in chunks(l, 3): 50 | ... print list(chunk) 51 | ... 52 | [0, 1, 2] 53 | [3, 4, 5] 54 | [6, 7, 8] 55 | [9] 56 | >>> for slide in window(l, 3): 57 | ... print list(slide) 58 | ... 59 | [0, 1, 2] 60 | [1, 2, 3] 61 | [2, 3, 4] 62 | [3, 4, 5] 63 | [4, 5, 6] 64 | [5, 6, 7] 65 | [6, 7, 8] 66 | [7, 8, 9] 67 | 68 | 69 | Get the first element an any iterable (not just indexable) or the first one to be True:: 70 | 71 | >>> first(xrange(10)) 72 | 0 73 | >>> first_true(xrange(10)) 74 | 1 75 | >>> first([], default="What the one thing we say to the God of Death ?") 76 | 'What the one thing we say to the God of Death ?' 77 | 78 | Sorted Set 79 | =================================================================================== 80 | 81 | Slow but useful data structure:: 82 | 83 | >>> for x in sset((3, 2, 2, 2, 1, 2)): 84 | ... print x 85 | ... 86 | 3 87 | 2 88 | 1 89 | 90 | 91 | Dictionaries one liners 92 | =================================================================================== 93 | 94 | 95 | I wish + was overloaded for dicts:: 96 | 97 | >>> dmerge({"a": 1, "b": 2}, {"b": 2, "c": 3}) 98 | {'a': 1, 'c': 3, 'b': 2} 99 | 100 | 101 | Sometimes you do not want to simply overwrite the values inside the original dict, but merge them in custom fashion:: 102 | 103 | >>> def my_merge(v1, v2): 104 | ... if isinstance(v1, dict) and isinstance(v2, dict): 105 | ... return dmerge(v1, v2) 106 | ... return v2 107 | >>> dmerge({"a": 1, "b": {'ok': 5}}, {"b": {'ko': 5 }, "c": 3}, my_merge) 108 | {'a': 1, 'c': 3, 'b': {'ko': 5, 'ok': 5}} 109 | 110 | Original dicts are not modified, but this will modify them:: 111 | 112 | >>> from batbelt.structs import rename 113 | >>> rename({"a": 1, "b": 2}) 114 | >>> rename({"a": 1, "b": 2}, 'b', 'z') 115 | {u'a': 1, u'z': 2} 116 | 117 | (not thread safe). 118 | 119 | Twited but satisfying:: 120 | 121 | >>> from batbelt.structs import unpack 122 | >>> dct = {'a': 2, 'b': 4, 'z': 42} 123 | >>> a, b, c = unpack(dct, 'a', 'b', 'c', default=1) 124 | >>> a 125 | 2 126 | >>> b 127 | 4 128 | >>> c 129 | 1 130 | 131 | 132 | String tools 133 | =================================================================================== 134 | 135 | The mandatory "slufigy":: 136 | 137 | >>> slugify(u"Hélo Whorde") 138 | helo-whorde 139 | 140 | You get better slugification if you install the `unidecode` lib, but it's optional. You can specify `separator` if you don't like `-` or call directly `normalize()` (the underlying function) if you wish more control. 141 | 142 | The module also feature html_escape/unescape that is not useless and json_dumps/loads that understand datetime by default. Look at the source for these, I'm lazy (PL for documentation are welcome). 143 | 144 | There is also a poor man template system using the `format()` string method on a file content. No loop, but still nice for quick and dirty file generation : 145 | 146 | from batbelt.strings import render 147 | 148 | render('stuff.conf.tpl', {"var": "value"}, "/etc/stuff.conf") 149 | 150 | 151 | Import this 152 | =================================================================================== 153 | 154 | 155 | `__import__` is weird. Let's abstract that :: 156 | 157 | TaClasse = import_from_path('foo.bar.TaClasse') 158 | ton_obj = TaClasse() 159 | 160 | 161 | Catpure prints 162 | =================================================================================== 163 | 164 | 165 | A context manager to deal with this libs that print the result instead of returning it : 166 | 167 | 168 | >>> with capture_ouput() as (stdout, stderr): 169 | ... print "hello", 170 | ... 171 | >>> print stdout.read() 172 | hello 173 | >>> stdout.close() 174 | 175 | 176 | Create a decorator that accept arguments 177 | =================================================================================== 178 | 179 | 180 | I never remember how to do this. And I don't have to anymore. 181 | 182 | First, write the decorator:: 183 | 184 | # all arguments after 'func' are your decorator argument 185 | @decorator_with_args() 186 | def your_decorator(func, arg1, arg2=None): 187 | 188 | if arg1: 189 | # do stuff here 190 | 191 | # do your usual decorator jimbo jumbo, wrapping, calling, returning... 192 | def wrapper(): 193 | return func(arg2) 194 | 195 | 196 | return wrapper 197 | 198 | 199 | 200 | Enjoy : 201 | 202 | @your_decorator(False, 1) 203 | def hop(un_arg): 204 | # do stuff in the decorated function 205 | 206 | 207 | 208 | Add a any directory to the PYTHON PATH 209 | =========================================== 210 | 211 | Accepts shell variables and relative paths : 212 | 213 | from batbelt.utils import add_to_pythonpath 214 | add_to_pythonpath("~/..") 215 | 216 | You can (and probably wants) specify a starting point if you pass a relative path. The default starting point is the result is `os.getcwd()` while you probably wants the directory containing you script. To to so, pass `__file__`: 217 | 218 | add_to_pythonpath("../..", starting_point=__file__) 219 | 220 | `starting_point` can be a file path (basename will be stripped) or a directory name. If will be from there that the reltive path will be calculated. 221 | 222 | You can also choose where in the `sys.path` list the your path will be added by passing `insertion_index`, which default to the after the last existing item. 223 | 224 | 225 | Poor man task queue 226 | =================================================================================================== 227 | 228 | 229 | You don't always need the guaranty of a big lib, you just need a little worker to do the job outside of the main thread:: 230 | 231 | 232 | 233 | from batbelt.parallel import worker 234 | 235 | @worker() 236 | def task(arg): 237 | arg = arg + 10 238 | return arg 239 | 240 | 241 | # start the worker 242 | process = task.start() 243 | 244 | # send tasks 245 | for x in range(10): 246 | process.put(x) 247 | 248 | # (optionaly) get results 249 | for x in range(10): 250 | print process.get() 251 | 252 | ## 10 253 | ## 11 254 | ## 12 255 | ## 13 256 | ## 14 257 | ## 15 258 | ## 16 259 | ## 17 260 | ## 18 261 | ## 19 262 | 263 | # stop the worker 264 | process.stop() 265 | 266 | The worker use multiprocessing by default, but if you prefer threads: `@worker(method="tread")`. 267 | 268 | If you look for it in the source code, you'll see goodies such as Singletong, Null Pattern implementation and other things you don't use that often. 269 | -------------------------------------------------------------------------------- /batbelt/hack.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | 6 | import sys 7 | 8 | from functools import wraps 9 | 10 | from io import BytesIO 11 | from contextlib import contextmanager 12 | 13 | 14 | @contextmanager 15 | def capture_ouput(stdout_to=None, stderr_to=None): 16 | """ 17 | Context manager that captures any printed ouput in the 'with' block. 18 | 19 | :Example: 20 | 21 | >>> with capture_ouput() as (stdout, stderr): 22 | ... print "hello", 23 | ... 24 | >>> print stdout.getvalue().upper() 25 | HELLO 26 | >>> with capture_ouput() as (stdout, stderr): # doctest: +IGNORE_EXCEPTION_DETAIL 27 | ... assert False 28 | ... 29 | Traceback (most recent call last): 30 | AssertionError 31 | >>> from tempfile import NamedTemporaryFile 32 | >>> f = NamedTemporaryFile(mode="rw+b") 33 | >>> with capture_ouput(f) as (stdout, stderr): 34 | ... print "hello", 35 | ... 36 | >>> print stdout.read() 37 | hello 38 | 39 | 40 | .. :warning: this is NOT thread safe. 41 | 42 | .. :note: The file like objects containing the capture are not closed 43 | automatically by this context manager. You are responsible 44 | to do it. 45 | 46 | It does not capture exception, so they bubble out and print the stack 47 | trace anyway. 48 | """ 49 | 50 | try: 51 | 52 | stdout, stderr = sys.stdout, sys.stderr 53 | sys.stdout = c1 = stdout_to or BytesIO() 54 | sys.stderr = c2 = stderr_to or BytesIO() 55 | yield c1, c2 56 | 57 | finally: 58 | 59 | sys.stdout = stdout 60 | sys.stderr = stderr 61 | 62 | try: 63 | c1.flush() 64 | c1.seek(0) 65 | except (ValueError, TypeError): 66 | pass 67 | 68 | try: 69 | c2.flush() 70 | c2.seek(0) 71 | except (ValueError, TypeError): 72 | pass 73 | 74 | 75 | def decorator_with_args(wrap=True, 76 | function_assigned=('__module__', '__name__', '__doc__'), 77 | function_updated=('__dict__',), 78 | decorator_assigned=('__module__', '__name__', '__doc__'), 79 | decorator_updated=('__dict__',), 80 | ): 81 | """ 82 | Use this decorator on a wannabe decorator. 83 | 84 | It will turn it into a decorator that accept any arguments and 85 | wraps the resulting decorated function unless you set wrap=False. 86 | 87 | Usage: 88 | 89 | # You use @decorator_with_args on a function you wish to 90 | # be a decorator accepting arguments 91 | @decorator_with_args() 92 | def your_decorator(func, *args, **kwargs): 93 | def wrapper(): 94 | # do stuff 95 | return func() 96 | return wrapper 97 | 98 | Your decorator must accept the function as the first argument, and 99 | expact the other arguments after that. It doesn't have to be *args, 100 | **kwargs, it can be any signature, as long as the first argument, 101 | is the function to decorate. 102 | 103 | One your wannabe decorator decorated, you can use it this way: 104 | 105 | # When you use YOUR decorator, you will be able to pass arguments 106 | @your_decorator(arg1, arg2, arg3='foo') 107 | def a_function(): 108 | # do stuff 109 | 110 | # If you don't use arguments, you still need the parenthesis 111 | @your_decorator() 112 | def another_function(): 113 | # do stuff 114 | 115 | By default, @decorator_with_args will attempt to apply functools.wraps on 116 | the wrapper your wannabe decorator returns. If you don't wish that, 117 | pass wrap=False: 118 | 119 | @decorator_with_args(wrap=False) 120 | def your_decorator(func, *args, **kwargs): 121 | def wrapper(): 122 | # do stuff 123 | return func() 124 | # This will be passed to functools.wraps() if you don't 125 | # set wrap=False 126 | return wrapper 127 | 128 | You can also pass the same arguments you would pass to functools.wraps 129 | directly to @decorator_with_args. They will be passed to along: 130 | 131 | @decorator_with_args(function_assigned=('__module__', '__name__', '__doc__')) 132 | def your_decorator(func, *args, **kwargs): 133 | def wrapper(): 134 | # do stuff 135 | return func() 136 | # this will apply functools.wrap() with assigned being set to 137 | # ('__module__', '__name__') 138 | return wrapper 139 | 140 | The params are named function_assigned and function_updated instead of 141 | just assigned and updated like in functools.wraps. 142 | 143 | Also, @decorator_with_args will ALWAYS apply functools.wraps to the 144 | wrapper around your wannabe decorator. You can also control what's 145 | copied by passing decorator_assigned and decorator_updated the same way: 146 | 147 | # functools.wrap will always be applied to your_decorator() 148 | # but you can choose with which arguments 149 | @decorator_with_args(decorator_updated=('__dict__',)) 150 | def your_decorator(func, *args, **kwargs): 151 | def wrapper(): 152 | # do stuff 153 | return func() 154 | return wrapper 155 | 156 | """ 157 | # decorator() will return this function, wich will be the real decorator 158 | # called on the wannabe decorator. 159 | def _decorator(wannabe_decorator): 160 | 161 | # This is the function that will return your wrapped wannabe decorator. 162 | # Il will add a wrapper that will call your wannabe decorator with 163 | # the arguments stored in a closure under the hood. 164 | def decorator_maker(*args, **kwargs): 165 | 166 | # This is the the wrapper around your wannabe decorator. It 167 | # replaces your function so it can pass arguments to it. 168 | # We apply @wraps on it so it takes all metadata from 169 | # the wannabe decorator and attach them to itself. 170 | @wraps(wannabe_decorator, decorator_assigned, decorator_updated) 171 | def decorator_wrapper(func): 172 | 173 | # The wrapper calls your wannabe decorator, passing the 174 | # function to decorate and arguments to it. 175 | # It will get the wrapper your wannabe decorator returns, 176 | # and if, wrap=True (default), will apply @wraps on it too. 177 | d = wannabe_decorator(func, *args, **kwargs) 178 | if wraps: 179 | d = wraps(func, function_assigned, function_updated)(d) 180 | return d 181 | 182 | return decorator_wrapper 183 | 184 | return decorator_maker 185 | 186 | return _decorator 187 | 188 | 189 | class MultiStopIteration(StopIteration): 190 | def throw(self): 191 | raise self 192 | 193 | 194 | 195 | @contextmanager 196 | def multibreak(): 197 | ''' 198 | 199 | Context manager which allow to break multiple nested for loops at once. 200 | 201 | Example: 202 | 203 | >>> with multibreak() as stop: 204 | ... for x in range(1, 4): 205 | ... for z in range(1, 4): 206 | ... for w in range(1, 4): 207 | ... print w 208 | ... if x * z * w == 2 * 2 * 2: 209 | ... print 'stop' 210 | ... stop() 211 | ... 212 | 1 213 | 2 214 | 3 215 | 1 216 | 2 217 | 3 218 | 1 219 | 2 220 | 3 221 | 1 222 | 2 223 | 3 224 | 1 225 | 2 226 | stop 227 | ''' 228 | 229 | try: 230 | yield MultiStopIteration().throw 231 | except MultiStopIteration: 232 | pass 233 | 234 | 235 | def accept_callbacks(func): 236 | """ 237 | A decorator to allow any function to be able to accept callbacks. 238 | 239 | :Example: 240 | 241 | # make your function accept callbacks 242 | @accept_callbacks 243 | def add(a, b): 244 | return a + b 245 | 246 | # write a callback that accept 'result' as the first parameter 247 | # and the function paramters as other parameters 248 | def my_callback(result, a, b): 249 | print("Function called with a=%s et b=%s !" % (a, b)) 250 | print("It returned '%s'" % result) 251 | 252 | # add the callback to the callback list 253 | add.callbacks.append(my_callback) 254 | 255 | # enjoy 256 | >>> add(1, 2) 257 | Function called with a=1 et b=2 ! 258 | It returned '3' 259 | 3 260 | 261 | """ 262 | 263 | callbacks = [] 264 | 265 | @wraps(func) 266 | def wrapper(*args, **kwargs): 267 | 268 | result = func(*args, **kwargs) 269 | 270 | for callback in callbacks: 271 | callback(result, *args, **kwargs) 272 | 273 | return result 274 | 275 | wrapper.callbacks = callbacks 276 | 277 | return wrapper 278 | -------------------------------------------------------------------------------- /batbelt/strings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | r""" 6 | 7 | Various tools to manipulate strings. Features tools to normalize and 8 | slugify strings. Also some ways to escape HTML and a JSON serializer 9 | that deals with datetime objects. 10 | 11 | Here is an example of the generic slugifier utility (as for 'normalize', 12 | 'escape_html', 'unescape_html' and "json_dumps" are pretty straightforward 13 | and don't need that much of an explanation. Ok, it's just because I'm 14 | too lazy to write it down really). 15 | 16 | Works out of the box for Latin-based scripts. 17 | 18 | Example: 19 | 20 | >>> from strings import slugify 21 | >>> slugify(u"C'est No\xebl !") 22 | u'cest-noel' 23 | >>> slugify(u"C'est No\xebl !", separator="_") 24 | u'cest_noel' 25 | 26 | It will handle all unicode equivalences if (and only if) the optional 27 | unidecode library is installed. 28 | 29 | Example: 30 | 31 | >>> slugify(u"\u5317\u4eb0") 32 | u'bei-jing' 33 | 34 | More about it: 35 | - http://en.wikipedia.org/wiki/Unicode_equivalence; 36 | - http://pypi.python.org/pypi/Unidecode. 37 | 38 | If you do have unidecode installed, but wish not to use it, use the 39 | unicodedata_slugify fonction: 40 | 41 | >>> slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") # slugify() uses unidecode if it can 42 | u'hello-world' 43 | >>> unicodedata_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") # this will more limited 44 | u'hell-world' 45 | 46 | In case you wish to keep the non ASCII characters "as-is", use 47 | unicode_slugify(): 48 | 49 | >>> print unicode_slugify(u"C'est No\xebl !") 50 | cest-no\xebl 51 | 52 | """ 53 | 54 | import re 55 | import os 56 | import codecs 57 | import json 58 | import unicodedata 59 | 60 | from datetime import datetime, timedelta, date, time 61 | from xml.sax.saxutils import escape, unescape 62 | 63 | from utils import CLASSIC_DATETIME_FORMAT, CLASSIC_DATETIME_PATTERN 64 | 65 | 66 | 67 | def unicode_slugify(string, separator=r'-'): 68 | r""" 69 | Slugify a unicode string using to normalize the string, but without trying 70 | to convert or strip non ASCII characters. 71 | 72 | Example: 73 | 74 | >>> print unicode_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") 75 | h\xe9ll\xf8-w\xc3\xb6rld 76 | >>> unidecode_slugify(u"Bonjour, tout l'monde !", separator="_") 77 | u'bonjour_tout_lmonde' 78 | >>> unidecode_slugify(u"\tStuff with -- dashes and... spaces \n") 79 | u'stuff-with-dashes-and-spaces' 80 | """ 81 | 82 | string = re.sub(r'[^\w\s' + separator + ']', '', string, flags=re.U) 83 | string = string.strip().lower() 84 | return unicode(re.sub(r'[' + separator + '\s]+', 85 | separator, string, flags=re.U)) 86 | 87 | 88 | def unicodedata_slugify(string, separator=r'-'): 89 | r""" 90 | Slugify a unicode string using unicodedata to normalize the string. 91 | 92 | Example: 93 | 94 | >>> unicodedata_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") 95 | u'hell-world' 96 | >>> unidecode_slugify(u"Bonjour, tout l'monde !", separator="_") 97 | u'bonjour_tout_lmonde' 98 | >>> unidecode_slugify(u"\tStuff with -- dashes and... spaces \n") 99 | u'stuff-with-dashes-and-spaces' 100 | """ 101 | 102 | string = unicodedata.normalize('NFKD', string).encode('ascii', 'ignore') 103 | string = re.sub(r'[^\w\s' + separator + ']', '', string).strip().lower() 104 | return unicode(re.sub(r'[' + separator + '\s]+', separator, string)) 105 | 106 | 107 | def unidecode_slugify(string, separator=r'-'): 108 | r""" 109 | Slugify a unicode string using unidecode to normalize the string. 110 | 111 | Example: 112 | 113 | >>> unidecode_slugify(u"H\xe9ll\xf8 W\xc3\xb6rld") 114 | u'hello-world' 115 | >>> unidecode_slugify(u"Bonjour, tout l'monde !", separator="_") 116 | u'bonjour_tout_lmonde' 117 | >>> unidecode_slugify(u"\tStuff with -- dashes and... spaces \n") 118 | u'stuff-with-dashes-and-spaces' 119 | """ 120 | 121 | string = unidecode.unidecode(string) 122 | string = re.sub(r'[^\w\s' + separator + ']', '', string).strip().lower() 123 | return unicode(re.sub(r'[' + separator + '\s]+', separator, string)) 124 | 125 | 126 | def unicodedata_normalize(string): 127 | r""" 128 | Returns a new string withou non ASCII characters, trying to replace 129 | them with their ASCII closest counter parts when possible. 130 | 131 | :Example: 132 | 133 | >>> normalize(u"H\xe9ll\xf8 W\xc3\xb6rld") 134 | 'Hell World' 135 | 136 | 137 | This version use unicodedata and provide limited yet 138 | useful results. 139 | """ 140 | return unicodedata.normalize('NFKD', string).encode('ascii', 'ignore') 141 | 142 | 143 | def unidecode_normalize(string): 144 | r""" 145 | Returns a new string withou non ASCII characters, trying to replace 146 | them with their ASCII closest counter parts when possible. 147 | 148 | :Example: 149 | 150 | >>> normalize(u"H\xe9ll\xf8 W\xc3\xb6rld") 151 | 'Hello World' 152 | 153 | This version use unidecode and provide enhanced results. 154 | """ 155 | return unidecode.unidecode(string) 156 | 157 | 158 | try: 159 | import unidecode 160 | slugify = unidecode_slugify 161 | normalize = unidecode_normalize 162 | except ImportError: 163 | slugify = unicodedata_slugify 164 | normalize = unicodedata_normalize 165 | 166 | 167 | def escape_html(text, additional_escape={'"': """, "'": "'"}): 168 | """ 169 | Turn HTML tag caracters into HTML entities. 170 | 171 | 172 | Example: 173 | 174 | >>> escape_html("Ben & Jelly's !") 175 | '<strong>Ben & Jelly's !</strong>' 176 | 177 | """ 178 | return escape(text, additional_escape) 179 | 180 | 181 | def unescape_html(text, additional_escape={""": '"', "'": "'"}): 182 | """ 183 | Turn HTML tag entities into ASCII caracters. 184 | 185 | Example: 186 | 187 | >>> unescape_html('<strong>Ben & Jelly's !</strong>') 188 | "Ben & Jelly's !" 189 | """ 190 | return unescape(text, additional_escape) 191 | 192 | 193 | 194 | class JSONEncoder(json.JSONEncoder): 195 | """ 196 | Json encoder with date and time handling. 197 | 198 | You should use naive datetime only. If you have timezone information, 199 | store them in a separate field. 200 | """ 201 | 202 | 203 | DATETIME_FORMAT = CLASSIC_DATETIME_FORMAT 204 | DATE_FORMAT, TIME_FORMAT = DATETIME_FORMAT.split() 205 | TIMEDELTA_FORMAT = "timedelta(seconds='%s')" 206 | 207 | 208 | def __init__(self, datetime_format=None, date_format=None, time_format=None, 209 | timedelta_format=None, *args, **kwargs): 210 | 211 | self.datetime_format = datetime_format or self.DATETIME_FORMAT 212 | self.date_format = date_format or self.DATE_FORMAT 213 | self.time_format = time_format or self.TIME_FORMAT 214 | self.timedelta_format = timedelta_format or self.TIMEDELTA_FORMAT 215 | 216 | super(JSONEncoder, self).__init__(self, *args, **kwargs) 217 | 218 | 219 | def default(self, obj): 220 | 221 | if isinstance(obj, datetime): 222 | return obj.strftime(self.datetime_format) 223 | 224 | if isinstance(obj, date): 225 | return obj.strftime(self.date_format) 226 | 227 | if isinstance(obj, time): 228 | return obj.strftime(self.time_format) 229 | 230 | if isinstance(obj, timedelta): 231 | return self.timedelta_format % obj.total_seconds() 232 | 233 | return json.JSONEncoder.default(self, obj) 234 | 235 | 236 | 237 | class JSONDecoder(json.JSONDecoder): 238 | """ 239 | Json decoder that decode JSON encoded with JSONEncoder 240 | """ 241 | 242 | DATETIME_PATTERN = CLASSIC_DATETIME_PATTERN 243 | DATE_PATTERN, TIME_PATTERN = DATETIME_PATTERN.split() 244 | TIMEDELTA_PATTERN = r"timedelta\(seconds='(?P\d+(?:\.\d+)*)'\)" 245 | 246 | 247 | def __init__(self, datetime_pattern=None, date_pattern=None, 248 | time_pattern=None, timedelta_pattern=None, datetime_format=None, 249 | date_format=None, time_format=None, *args, **kwargs): 250 | 251 | self.datetime_format = datetime_format or JSONEncoder.DATETIME_FORMAT 252 | self.date_format = date_format or JSONEncoder.DATE_FORMAT 253 | self.time_format = time_format or JSONEncoder.TIME_FORMAT 254 | 255 | self.datetime_pattern = re.compile(datetime_pattern or self.DATETIME_PATTERN) 256 | self.date_pattern = re.compile(date_pattern or self.DATE_PATTERN) 257 | self.time_pattern = re.compile(time_pattern or self.TIME_PATTERN) 258 | self.timedelta_pattern = re.compile(timedelta_pattern or self.TIMEDELTA_PATTERN) 259 | 260 | super(JSONDecoder, self).__init__(object_pairs_hook=self.object_pairs_hook, 261 | *args, **kwargs) 262 | 263 | def object_pairs_hook(self, obj): 264 | return dict((k, self.decode_on_match(v)) for k, v in obj) 265 | 266 | 267 | def decode_on_match(self, obj): 268 | """ 269 | Try to match the string, and if it fits any date format, 270 | parse it and returns a Python object. 271 | """ 272 | 273 | string = unicode(obj) 274 | 275 | match = re.search(self.datetime_pattern, string) 276 | if match: 277 | return datetime.strptime(match.string, self.datetime_format) 278 | 279 | match = re.search(self.date_pattern, string) 280 | if match: 281 | return datetime.strptime(match.string, self.date_format).date() 282 | 283 | match = re.search(self.time_pattern, string) 284 | if match: 285 | return datetime.strptime(match.string, self.time_format).time() 286 | 287 | match = re.search(self.timedelta_pattern, string) 288 | if match: 289 | return timedelta(seconds=float(match.groupdict()['seconds'])) 290 | 291 | return obj 292 | 293 | 294 | def json_dumps(data, datetime_format=None, date_format=None, time_format=None, 295 | timedelta_format=None, *args, **kwargs): 296 | r""" 297 | Same as Python's json.dumps but also serialize datetime, date, time 298 | and timedelta. 299 | 300 | Example: 301 | >>> import datetime 302 | >>> json_dumps({'test': datetime.datetime(2000, 1, 1, 1, 1, 1)}) 303 | '{"test": "2000-01-01 01:01:01.000000"}' 304 | >>> json_dumps({'test': datetime.date(2000, 1, 1)}) 305 | '{"test": "2000-01-01"}' 306 | >>> json_dumps({'test': datetime.time(1, 1, 1)}) 307 | '{"test": "01:01:01.000000"}' 308 | >>> json_dumps({'test': datetime.timedelta(1, 1)}) 309 | '{"test": "timedelta(seconds=\'86401.0\')"}' 310 | >>> json_dumps({u'test': datetime.timedelta(1, 1), u'a': [1, 2]}) 311 | '{"test": "timedelta(seconds=\'86401.0\')", "a": [1, 2]}' 312 | 313 | """ 314 | return JSONEncoder(datetime_format, date_format, time_format, 315 | timedelta_format, *args, **kwargs).encode(data) 316 | 317 | 318 | def json_loads(string, datetime_pattern=None, date_pattern=None, 319 | time_pattern=None, timedelta_pattern=None, datetime_format=None, 320 | date_format=None, time_format=None, *args, **kwargs): 321 | r""" 322 | Same as Python's json.loads, but handles formats from batbelt.json_dumps 323 | which are currently mainly date formats. 324 | 325 | Example: 326 | 327 | >>> json_loads('{"test": "2000-01-01 01:01:01.000000"}') 328 | {u'test': datetime.datetime(2000, 1, 1, 1, 1, 1)} 329 | >>> json_loads('{"test": "2000-01-01"}') 330 | {u'test': datetime.date(2000, 1, 1)} 331 | >>> json_loads('{"test": "01:01:01.000000"}') 332 | {u'test': datetime.time(1, 1, 1)} 333 | >>> json_loads('{"test": "timedelta(seconds=\'86401.0\')"}') 334 | {u'test': datetime.timedelta(1, 1)} 335 | >>> json_loads('{"test": "timedelta(seconds=\'86401.0\')", "a": [1, 2]}') 336 | {u'test': datetime.timedelta(1, 1), u'a': [1, 2]} 337 | 338 | """ 339 | return JSONDecoder(datetime_pattern, date_pattern, time_pattern, 340 | timedelta_pattern, datetime_format, date_format, 341 | time_format, *args, **kwargs).decode(string) 342 | 343 | 344 | def template(tpl, context): 345 | """ 346 | Use the given a template file, call .format() on it's content, 347 | and returns it as a string. 348 | 349 | Template file can be a path or a file like object. 350 | """ 351 | 352 | try: 353 | tpl = open(tpl) 354 | except TypeError: 355 | pass 356 | 357 | return tpl.read().format(**context) 358 | 359 | 360 | def render(tpl, context, target): 361 | """ 362 | Render the template and write the result in a file. 363 | 364 | Template and target files can be a path or a file like objects. 365 | """ 366 | 367 | try: 368 | target = open(target, 'w') 369 | except TypeError: 370 | pass 371 | 372 | res = template(tpl, context) 373 | target.write(res) 374 | 375 | target.close() 376 | 377 | 378 | def write(path, *args, **kwargs): 379 | """ 380 | Try to write to the file at `path` the values passed as `args` as lines. 381 | 382 | It will attempt decoding / encoding and casting automatically each value 383 | to a string. 384 | 385 | This is an utility function : its slow and doesn't consider edge cases, 386 | but allow to do just what you want most of the time in one line. 387 | 388 | :Example: 389 | 390 | s = '/tmp/test' 391 | write(s, 'test', '\xe9', 1, ['fdjskl']) 392 | print open(s).read() 393 | test 394 | \xe9 395 | 1 396 | ['fdjskl'] 397 | 398 | You can optionally pass : 399 | 400 | mode : among 'a', 'w', which default to 'w'. Binary mode is forced. 401 | encoding : which default to utf8 and will condition decoding AND encoding 402 | errors : what to do when en encoding error occurs : 'replace' by default, 403 | which replace faulty caracters with '?' 404 | 405 | You can pass string or unicode as *args, but if you pass strings, 406 | make sure you pass them with the same encoding you wish to write to 407 | the file. 408 | """ 409 | 410 | mode = kwargs.get('mode', 'w') 411 | encoding = kwargs.get('encoding', 'utf8') 412 | errors = kwargs.get('encoding', 'replace') 413 | 414 | with codecs.open(path, mode=mode, encoding=encoding, errors=errors) as f: 415 | 416 | for line in args: 417 | 418 | if isinstance(line, str): 419 | line = line.decode(encoding, errors) 420 | 421 | if not isinstance(line, unicode): 422 | line = repr(line) 423 | 424 | f.write(line + os.linesep) 425 | 426 | 427 | 428 | if __name__ == "__main__": 429 | import doctest 430 | doctest.testmod() 431 | 432 | -------------------------------------------------------------------------------- /batbelt/structs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: ai ts=4 sts=4 et sw=4 nu 4 | 5 | 6 | from collections import MutableSet, deque 7 | 8 | from itertools import islice, chain 9 | 10 | 11 | __all__ = ['chunks', 'dmerge', 'get', 'window', 'dswap', 'subdict', 'first', 12 | 'first_true', 'sset'] 13 | 14 | 15 | def chunks(seq, chunksize, process=tuple): 16 | """ 17 | Yields items from an iterator in iterable chunks. 18 | """ 19 | it = iter(seq) 20 | while True: 21 | yield process(chain([it.next()], islice(it, chunksize - 1))) 22 | 23 | 24 | 25 | def window(iterable, size=2): 26 | """ 27 | Yields iterms by bunch of a given size, but rolling only one item 28 | in and out at a time when iterating. 29 | """ 30 | iterable = iter(iterable) 31 | d = deque(islice(iterable, size), size) 32 | yield d 33 | for x in iterable: 34 | d.append(x) 35 | yield d 36 | 37 | 38 | def dmerge(d1, d2, merge_func=None): 39 | """ 40 | Create a new dictionary being the merge of the two passed as a 41 | parameter. If a key is in both dictionaries, the values are processed 42 | with the merge_func. 43 | 44 | By default the value in the second dictionary erases the value in the 45 | first one. 46 | """ 47 | d = {} 48 | 49 | d.update(d1) 50 | 51 | if merge_func is None: 52 | d.update(d2) 53 | return d 54 | 55 | for k, v in d2.iteritems(): 56 | if k in d: 57 | d[k] = merge_func(d[k], v) 58 | else: 59 | d[k] = v 60 | return d 61 | 62 | 63 | def dswap(dct): 64 | """ 65 | Swap key and values of a given dictionary. Return a new dictionary. 66 | 67 | If you have duplicate values, the last one in the dictionary order 68 | will be used. Since dictionary order is not predictable, you should 69 | make sure to either remove duplicates values before processing, or 70 | just make sure loosing some keys is not a problem for you. 71 | 72 | 73 | example: 74 | 75 | >>> sorted(dswap({'a': 1, 'b': 2}).items()) 76 | [(1, 'a'), (2, 'b')] 77 | """ 78 | return dict((value, key) for key, value in dct.iteritems()) 79 | 80 | 81 | def get(data, *keys, **kwargs): 82 | """ 83 | Extract a data from nested mapping and sequences using a list of keys 84 | and indices to apply successively. If a key error or an index error 85 | is raised, returns the default value. 86 | 87 | res = get(data, 'test', 0, 'bla', default="yeah") 88 | 89 | is the equivalent of 90 | 91 | try: 92 | res = data['test'][0]['bla'] 93 | except (KeyError, IndexError): 94 | res = "yeah" 95 | 96 | """ 97 | try: 98 | value = data[keys[0]] 99 | 100 | for key in keys[1:]: 101 | value = value[key] 102 | except (KeyError, IndexError, TypeError): 103 | return kwargs.get('default', None) 104 | 105 | return value 106 | 107 | 108 | def iget(data, value, default=None): 109 | """ 110 | Same as indexing, but works with any iterable, 111 | and accept a default value. 112 | 113 | :Example: 114 | 115 | >>> iget(xrange(10), 0) 116 | 0 117 | >>> iget(xrange(10), 5) 118 | 5 119 | >>> iget(xrange(10), 10000, default='wololo') 120 | u'wololo' 121 | """ 122 | 123 | for x in islice(data, value, None): 124 | return x 125 | return default 126 | 127 | 128 | 129 | def rename(dct, old_name, new_name): 130 | """ 131 | Rename a key in a dictionary. No effect if the key does not exists. 132 | 133 | Return the dictiony passed as parameter. 134 | """ 135 | 136 | try: 137 | dct[new_name] = dct[old_name] 138 | del dct[old_name] 139 | except KeyError: 140 | pass 141 | 142 | return dct 143 | 144 | 145 | def unpack(indexable, *args, **kwargs): 146 | """ 147 | Return an generator with the values for the given keys/indices or 148 | a default value. 149 | 150 | :Example: 151 | 152 | >>> dct = {'a': 2, 'b': 4, 'z': 42} 153 | >>> a, b, c = unpack(dct, 'a', 'b', 'c', default=1) 154 | >>> a 155 | 2 156 | >>> b 157 | 4 158 | >>> c 159 | 1 160 | >>> list(unpack(range(5, 10), 2, 4)) 161 | [7, 9] 162 | """ 163 | 164 | default = kwargs.get('default', None) 165 | 166 | for key in args: 167 | yield get(indexable, key, default=default) 168 | 169 | 170 | 171 | def subdict(dct, include=(), exclude=()): 172 | """ 173 | Return a dictionary that is a copy of the given one. 174 | 175 | All values in `include` are used as key to be copied to 176 | the resulting dictionary. 177 | 178 | You can also pass a list of key to exclude instead by setting 179 | `exclude`. But you can't use both `include` and `exclude`: if you do, 180 | `exclude will be ignored` 181 | 182 | Example: 183 | 184 | >>> subdict({1:None, 2: False, 3: True}, [1, 2]) 185 | {1: None, 2: False} 186 | >>> subdict({1:None, 2: False, 3: True}, exclude=[1, 2]) 187 | {3: True} 188 | 189 | """ 190 | 191 | if include: 192 | return dict((k, v) for k, v in dct.iteritems() if k in include) 193 | 194 | return dict((k, v) for k, v in dct.iteritems() if k not in exclude) 195 | 196 | 197 | 198 | # aliased for compat, but should probably be removed 199 | first = lambda data, default=None: iget(data, 0, default) 200 | 201 | 202 | def first_true(iterable, key=lambda x: x, default=None): 203 | """ 204 | Return the first item of any iterable for which the key is True. 205 | 206 | By default the key is the entire element. 207 | 208 | If the iterable is empty, return the default value. 209 | """ 210 | for x in iterable: 211 | if key(x): 212 | return x 213 | return default 214 | 215 | 216 | def skip_duplicates(iterable, key=lambda x: x): 217 | """ 218 | Returns a generator that will yield all objects from iterable, skipping 219 | duplicates. 220 | 221 | Duplicates are identified using the `key` function to calculate a 222 | unique fingerprint. This does not use natural equality, but the 223 | result use a set() to remove duplicates, so defining __eq__ 224 | on your objects would have not effect. 225 | 226 | By default the fingerprint is the object itself, 227 | which ensure the functions works as-is with iterable of primitives 228 | such as int, str or tuple. 229 | 230 | :Example: 231 | 232 | >>> list(skip_duplicates([1, 2, 3, 4, 4, 2, 1, 3 , 4])) 233 | [1, 2, 3, 4] 234 | 235 | The return value of `key` MUST be hashable, which means for 236 | non hashable objects such as dict, set or list, you need to specify 237 | a a function that returns a hashable fingerprint. 238 | 239 | :Example: 240 | 241 | >>> list(skip_duplicates(([], [], (), [1, 2], (1, 2)), lambda x: tuple(x))) 242 | [[], [1, 2]] 243 | >>> list(skip_duplicates(([], [], (), [1, 2], (1, 2)), lambda x: (type(x), tuple(x)))) 244 | [[], (), [1, 2], (1, 2)] 245 | 246 | For more complex types, such as custom classes, the default behavior 247 | is to remove nothing. You MUST provide a `key` function is you wish 248 | to filter those. 249 | 250 | :Example: 251 | 252 | >>> class Test(object): 253 | def __init__(self, foo='bar'): 254 | self.foo = foo 255 | def __repr__(self): 256 | return "Test('%s')" % self.foo 257 | ... 258 | >>> list(skip_duplicates([Test(), Test(), Test('other')])) 259 | [Test('bar'), Test('bar'), Test('other')] 260 | >>> list(skip_duplicates([Test(), Test(), Test('other')], lambda x: x.foo)) 261 | [Test('bar'), Test('other')] 262 | 263 | See also : 264 | - strip_duplicates : a simpler, slower function that returns a list 265 | of elements with no duplicates. It accepts 266 | non hashable elements and honors __eq__. 267 | - remove_duplicates : remove duplicates from a list in place. 268 | Most ressource efficient merthod. 269 | """ 270 | fingerprints = set() 271 | 272 | try: 273 | for x in iterable: 274 | fingerprint = key(x) 275 | if fingerprint not in fingerprints: 276 | yield x 277 | fingerprints.add(fingerprint) 278 | except TypeError as e: 279 | try: 280 | hash(fingerprint) 281 | except TypeError: 282 | raise TypeError( 283 | "Calculating the key on one element resulted in a non hashable " 284 | "object of type '%s'. Change the 'key' parameter to a function " 285 | "that always, returns a hashable object. Hint : primitives " 286 | "like int, str or tuple, are hashable, dict, set and list are " 287 | "not. \nThe object that triggered the error was:\n%s" % ( 288 | type(fingerprint), x) 289 | ) 290 | else: 291 | raise 292 | 293 | 294 | 295 | def strip_duplicates(iterable, equals=lambda x, y: x == y): 296 | """ 297 | Return a list of elements from iterable, without duplicates. 298 | 299 | This uses equality to find duplicates, and will honor __eq__, but 300 | will not work on infinite iterables. 301 | 302 | :Examples: 303 | 304 | >>> strip_duplicates('fdjqkslfjdmkfdsqjkfmjqsdmlkfjqslkmfjsdklfl') 305 | ['f', 'd', 'j', 'q', 'k', 's', 'l', 'm'] 306 | >>> strip_duplicates(([], [], (), [1, 2], (1, 2))) 307 | [[], (), [1, 2], (1, 2)] 308 | >>> strip_duplicates(([], [], (), [1, 2], (1, 2)), lambda x, y: tuple(x) == tuple(y)) 309 | [[], [1, 2]] 310 | >>> class Test(object): 311 | def __init__(self, foo='bar'): 312 | self.foo = foo 313 | def __repr__(self): 314 | return "Test('%s')" % self.foo 315 | def __eq__(self, other): 316 | return self.foo == other.foo 317 | >>> strip_duplicates([Test(), Test(), Test('other')]) 318 | [Test('bar'), Test('other')] 319 | 320 | See also : 321 | - skip_duplicates : returns a generator yielding elements without 322 | duplicates. Faster, works on infinite iterables, 323 | but uses hashes instead of equality. 324 | - remove_duplicates : remove duplicates from a list in place. 325 | Most ressource efficient merthod. 326 | """ 327 | 328 | iterable = iter(iterable) 329 | 330 | res = [] 331 | while True: 332 | 333 | try: 334 | elem = next(iterable) 335 | except StopIteration: 336 | break 337 | 338 | res.append(elem) 339 | 340 | iterable = iter([x for x in iterable if not equals(elem, x)]) 341 | 342 | return res 343 | 344 | 345 | def remove_duplicates(lst, equals=lambda x, y: x == y): 346 | """ 347 | Removes duplicates from a list, in place. 348 | 349 | Works only with lists and modifies the list, but it's pretty ressource 350 | saving compared to other methods. 351 | 352 | See also : 353 | - skip_duplicates : returns a generator yielding elements without 354 | duplicates. Faster, works on infinite iterables, 355 | but uses hashes instead of equality. 356 | - strip_duplicates : a simpler, slower function that returns a list 357 | of elements with no duplicates. It accepts 358 | non hashable elements and honors __eq__. 359 | """ 360 | 361 | if not isinstance(lst, list): 362 | raise TypeError('This function works only with lists.') 363 | 364 | i1 = 0 365 | l = (len(lst) - 1) 366 | 367 | while i1 < l: 368 | 369 | elem = lst[i1] 370 | 371 | i2 = i1 + 1 372 | while i2 <= l: 373 | if equals(elem, lst[i2]): 374 | del lst[i2] 375 | l -= 1 376 | i2 += 1 377 | 378 | i1 += 1 379 | 380 | return lst 381 | 382 | 383 | 384 | KEY, PREV, NEXT = range(3) 385 | 386 | 387 | class sset(MutableSet): 388 | """ 389 | Set that preserves ordering. 390 | 391 | From http://code.activestate.com/recipes/576694/ 392 | """ 393 | 394 | def __init__(self, iterable=None): 395 | self.end = end = [] 396 | end += [None, end, end] # sentinel node for doubly linked list 397 | self.map = {} # key --> [key, prev, next] 398 | if iterable is not None: 399 | self |= iterable 400 | 401 | def __len__(self): 402 | return len(self.map) 403 | 404 | def __contains__(self, key): 405 | return key in self.map 406 | 407 | def add(self, key): 408 | if key not in self.map: 409 | end = self.end 410 | curr = end[PREV] 411 | curr[NEXT] = end[PREV] = self.map[key] = [key, curr, end] 412 | 413 | def discard(self, key): 414 | if key in self.map: 415 | key, prev, next = self.map.pop(key) 416 | prev[NEXT] = next 417 | next[PREV] = prev 418 | 419 | def __iter__(self): 420 | end = self.end 421 | curr = end[NEXT] 422 | while curr is not end: 423 | yield curr[KEY] 424 | curr = curr[NEXT] 425 | 426 | def __reversed__(self): 427 | end = self.end 428 | curr = end[PREV] 429 | while curr is not end: 430 | yield curr[KEY] 431 | curr = curr[PREV] 432 | 433 | def pop(self, last=True): 434 | if not self: 435 | raise KeyError('set is empty') 436 | key = next(reversed(self)) if last else next(iter(self)) 437 | self.discard(key) 438 | return key 439 | 440 | def __repr__(self): 441 | if not self: 442 | return '%s()' % (self.__class__.__name__,) 443 | return '%s(%r)' % (self.__class__.__name__, list(self)) 444 | 445 | def __eq__(self, other): 446 | if isinstance(other, sset): 447 | return len(self) == len(other) and list(self) == list(other) 448 | return set(self) == set(other) 449 | 450 | def __del__(self): 451 | self.clear() # remove circular references 452 | 453 | 454 | 455 | class Flattener(object): 456 | """ 457 | Create a flattener that you can call on a deeply nested data 458 | structures to iterate over the items as it if it were a flat iterable. 459 | 460 | The flattener returns a generator that lazily yield the items and 461 | deals with up to hundred of levels of nesting (~800 on my machine, 462 | and you can control it with sys.setrecursionlimit). 463 | 464 | A default flattener named 'flatten' is available by default. 465 | 466 | :Example: 467 | 468 | a = [] 469 | for i in range(10): 470 | a = [a, i] 471 | print(a) 472 | 473 | [[[[[[[[[[[], 0], 1], 2], 3], 4], 5], 6], 7], 8], 9] 474 | 475 | print(list(flatten(a))) 476 | 477 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 478 | 479 | By default, it flattens all the types listed in 480 | Flattener.DEFAULT_FLATTEN_TYPES but you can pass you list via 481 | flatten_types while calling a Flatener instance. 482 | 483 | For ambigious types like dict, you can pass iterable_getters, a 484 | mapping type / callback letting you define how to extract items from 485 | each type. 486 | 487 | :Example: 488 | 489 | a = [] 490 | for i in range(2): 491 | a = [a, i] + [{'a': 1., 'b': {'c': 3.}}] 492 | print(a) 493 | 494 | [[[], 0, {'a': 1.0, 'b': {'c': 3.0}}], 1, {'a': 1.0, 'b': {'c': 3.0}}] 495 | 496 | new_ft = Flattener.DEFAULT_FLATTEN_TYPES + (dict,) 497 | 498 | dico_flatten = Flattener(flatten_types=new_ft, 499 | iterable_getters={dict: lambda x: x.items()}) 500 | 501 | print(list(dico_flatten(a))) 502 | 503 | [0, u'a', 1.0, u'b', u'c', 3.0, 1, u'a', 1.0, u'b', u'c', 3.0] 504 | 505 | """ 506 | 507 | DEFAULT_FLATTEN_TYPES = ( 508 | list, 509 | tuple, 510 | set, 511 | (x for x in ()).__class__, 512 | xrange, 513 | deque, 514 | MutableSet, 515 | # Sequence # warning, a string is a subclass of Sequence 516 | ) 517 | 518 | 519 | def __init__(self, flatten_types=None, iterable_getters={}): 520 | self.flatten_types = flatten_types or self.DEFAULT_FLATTEN_TYPES 521 | self.iterable_getters = iterable_getters 522 | 523 | 524 | def should_flatten(self, obj): 525 | """ 526 | Returns if the object should be flatten or not, checking if the 527 | objects is an instance of type listed in DEFAULT_FLATTEN_TYPES 528 | by default. 529 | """ 530 | return isinstance(obj, self.flatten_types) 531 | 532 | 533 | def transform_iterable(self, obj): 534 | """ 535 | Apply a pre-processing to an object before iterate on it. Can 536 | be useful for types such as dict on which you may want to call 537 | values() or items() before iteration. 538 | 539 | By defaut, it check if the object is an DIRECT instance (not 540 | a subclass) of any key in iterable_getters, passed in __init__ 541 | and apply the transform. 542 | 543 | iterable_getter should be a mapping with types as key and 544 | transformation function as values, such as : 545 | 546 | {dict: lambda x: x.items()} 547 | 548 | iterable_getter default value is {}, making transform_iterable 549 | a noop. 550 | """ 551 | if obj.__class__ in self.iterable_getters: 552 | return self.iterable_getters[obj.__class__](obj) 553 | return obj 554 | 555 | 556 | def __call__(self, iterable): 557 | """ 558 | Returns a generator yieling items from a deeply nested iterable 559 | like it would be a flat one. 560 | """ 561 | for e in iterable: 562 | if self.should_flatten(e): 563 | for f in self(self.transform_iterable(e)): 564 | yield f 565 | else: 566 | yield e 567 | 568 | 569 | 570 | flatten = Flattener() 571 | 572 | --------------------------------------------------------------------------------