├── .gitignore ├── .idea ├── encodings.xml ├── modules.xml └── python-glob2.iml ├── CHANGES ├── LICENSE ├── MANIFEST.in ├── README.rst ├── RELEASING.md ├── TODO ├── glob2 ├── __init__.py ├── compat.py ├── fnmatch.py └── impl.py ├── setup.cfg ├── setup.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /LOCAL_TODO 3 | 4 | # distutils/setuptools 5 | /dist/ 6 | *egg-info 7 | 8 | # IDEs 9 | *.wpr 10 | /.idea/ 11 | 12 | # Folder config file 13 | [Dd]esktop.ini 14 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/python-glob2.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | 0.7 (2019-06-11) 2 | - Fix deprecation warning. 3 | 4 | 0.6 (2017-08-17) 5 | - Support dummy recursive parameter in APIs to match the new 6 | call signature of Python 3. 7 | - Fix lru_cache (Kostis Anagnostopoulos). 8 | - Introduce norm_paths and case_sensitive options to optionally 9 | change the behaviour (Kostis Anagnostopoulos). 10 | 11 | 0.5 (2016-11-04) 12 | - include_hidden option. 13 | - Python 3 fixes. 14 | - Publish a wheel. 15 | 16 | 0.4 (2013-05-08) 17 | - Support Python 3. 18 | 19 | 0.3 (2012-01-19) 20 | - Fix non-glob patterns (patch by Zalan). 21 | - Don't shadow internal "glob" module. 22 | 23 | 0.2 (2011-06-14) 24 | - Initial release. 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008, Michael Elsdörfer 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials 14 | provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst CHANGES LICENSE 2 | include test.py 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | python-glob2 2 | ============ 3 | 4 | This is an extended version of Python's builtin glob module 5 | (http://docs.python.org/library/glob.html) which adds: 6 | 7 | - The ability to capture the text matched by glob patterns, and 8 | return those matches alongside the filenames. 9 | 10 | - A recursive '**' globbing syntax, akin for example to the ``globstar`` 11 | option of the bash shell. 12 | 13 | - The ability to replace the filesystem functions used, in order to glob 14 | on virtual filesystems. 15 | 16 | - Compatible with Python 2 and Python 3 (tested with 3.3). 17 | 18 | It's currently based on the glob code from Python 3.3.1. 19 | 20 | 21 | Examples 22 | -------- 23 | 24 | Matches being returned: 25 | ~~~~~~~~~~~~~~~~~~~~~~~ 26 | 27 | :: 28 | 29 | import glob2 30 | 31 | for filename, (version,) in glob2.iglob('./binaries/project-*.zip', with_matches=True): 32 | print version 33 | 34 | 35 | Recursive glob: 36 | ~~~~~~~~~~~~~~~ 37 | 38 | :: 39 | 40 | >>> import glob2 41 | >>> all_header_files = glob2.glob('src/**/*.h') 42 | ['src/fs.h', 'src/media/mp3.h', 'src/media/mp3/frame.h', ...] 43 | 44 | 45 | Note that ``**`` must appear on it's own as a directory 46 | element to have its special meaning. ``**h`` will not have the 47 | desired effect. 48 | 49 | ``**`` will match ".", so ``**/*.py`` returns Python files in the 50 | current directory. If this is not wanted, ``*/**/*.py`` should be used 51 | instead. 52 | 53 | 54 | Custom Globber: 55 | ~~~~~~~~~~~~~~~ 56 | 57 | :: 58 | 59 | from glob2 import Globber 60 | 61 | class VirtualStorageGlobber(Globber): 62 | def __init__(self, storage): 63 | self.storage = storage 64 | def listdir(self, path): 65 | # Must raise os.error if path is not a directory 66 | return self.storage.listdir(path) 67 | def exists(self, path): 68 | return self.storage.exists(path) 69 | def isdir(self, path): 70 | # Used only for trailing slash syntax (``foo/``). 71 | return self.storage.isdir(path) 72 | def islink(self, path): 73 | # Used only for recursive glob (``**``). 74 | return self.storage.islink(path) 75 | 76 | globber = VirtualStorageGlobber(sftp_storage) 77 | globber.glob('/var/www/**/*.js') 78 | 79 | 80 | If ``isdir`` and/or ``islink`` cannot be implemented for a storage, you can 81 | make them return a fixed value, with the following consequences: 82 | 83 | - If ``isdir`` returns ``True``, a glob expression ending with a slash 84 | will return all items, even non-directories, if it returns ``False``, 85 | the same glob expression will return nothing. 86 | 87 | - Return ``islink`` ``True``, the recursive globbing syntax ** will 88 | follow all links. If you return ``False``, it will not work at all. 89 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | - Update CHANGES. 2 | - Update glob2/__init__.py 3 | - git tag -a v0.X 4 | - par 5 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | Because our implementation of recursive directory search (**) using 2 | os.walk, and the matching using fnmatch, are both not using iterators, 3 | something like /** currently needs to read the whole filesystem into 4 | memory before returning anything. 5 | -------------------------------------------------------------------------------- /glob2/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .impl import * 3 | 4 | 5 | __version__ = (0, 7) 6 | -------------------------------------------------------------------------------- /glob2/compat.py: -------------------------------------------------------------------------------- 1 | # Back-port functools.lru_cache to Python 2 (and <= 3.2) 2 | # {{{ http://code.activestate.com/recipes/578078/ (r6) 3 | 4 | from collections import namedtuple 5 | from functools import update_wrapper 6 | from threading import RLock 7 | 8 | _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) 9 | 10 | class _HashedSeq(list): 11 | __slots__ = 'hashvalue' 12 | 13 | def __init__(self, tup, hash=hash): 14 | self[:] = tup 15 | self.hashvalue = hash(tup) 16 | 17 | def __hash__(self): 18 | return self.hashvalue 19 | 20 | def _make_key(args, kwds, typed, 21 | kwd_mark = (object(),), 22 | fasttypes = set((int, str, frozenset, type(None))), 23 | sorted=sorted, tuple=tuple, type=type, len=len): 24 | 'Make a cache key from optionally typed positional and keyword arguments' 25 | key = args 26 | if kwds: 27 | sorted_items = sorted(kwds.items()) 28 | key += kwd_mark 29 | for item in sorted_items: 30 | key += item 31 | if typed: 32 | key += tuple(type(v) for v in args) 33 | if kwds: 34 | key += tuple(type(v) for k, v in sorted_items) 35 | elif len(key) == 1 and type(key[0]) in fasttypes: 36 | return key[0] 37 | return _HashedSeq(key) 38 | 39 | def lru_cache(maxsize=100, typed=False): 40 | """Least-recently-used cache decorator. 41 | 42 | If *maxsize* is set to None, the LRU features are disabled and the cache 43 | can grow without bound. 44 | 45 | If *typed* is True, arguments of different types will be cached separately. 46 | For example, f(3.0) and f(3) will be treated as distinct calls with 47 | distinct results. 48 | 49 | Arguments to the cached function must be hashable. 50 | 51 | View the cache statistics named tuple (hits, misses, maxsize, currsize) with 52 | f.cache_info(). Clear the cache and statistics with f.cache_clear(). 53 | Access the underlying function with f.__wrapped__. 54 | 55 | See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used 56 | 57 | """ 58 | 59 | # Users should only access the lru_cache through its public API: 60 | # cache_info, cache_clear, and f.__wrapped__ 61 | # The internals of the lru_cache are encapsulated for thread safety and 62 | # to allow the implementation to change (including a possible C version). 63 | 64 | def decorating_function(user_function): 65 | 66 | cache = dict() 67 | stats = [0, 0] # make statistics updateable non-locally 68 | HITS, MISSES = 0, 1 # names for the stats fields 69 | make_key = _make_key 70 | cache_get = cache.get # bound method to lookup key or return None 71 | _len = len # localize the global len() function 72 | lock = RLock() # because linkedlist updates aren't threadsafe 73 | root = [] # root of the circular doubly linked list 74 | root[:] = [root, root, None, None] # initialize by pointing to self 75 | nonlocal_root = [root] # make updateable non-locally 76 | PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields 77 | 78 | if maxsize == 0: 79 | 80 | def wrapper(*args, **kwds): 81 | # no caching, just do a statistics update after a successful call 82 | result = user_function(*args, **kwds) 83 | stats[MISSES] += 1 84 | return result 85 | 86 | elif maxsize is None: 87 | 88 | def wrapper(*args, **kwds): 89 | # simple caching without ordering or size limit 90 | key = make_key(args, kwds, typed) 91 | result = cache_get(key, root) # root used here as a unique not-found sentinel 92 | if result is not root: 93 | stats[HITS] += 1 94 | return result 95 | result = user_function(*args, **kwds) 96 | cache[key] = result 97 | stats[MISSES] += 1 98 | return result 99 | 100 | else: 101 | 102 | def wrapper(*args, **kwds): 103 | # size limited caching that tracks accesses by recency 104 | key = make_key(args, kwds, typed) if kwds or typed else args 105 | with lock: 106 | link = cache_get(key) 107 | if link is not None: 108 | # record recent use of the key by moving it to the front of the list 109 | root, = nonlocal_root 110 | link_prev, link_next, key, result = link 111 | link_prev[NEXT] = link_next 112 | link_next[PREV] = link_prev 113 | last = root[PREV] 114 | last[NEXT] = root[PREV] = link 115 | link[PREV] = last 116 | link[NEXT] = root 117 | stats[HITS] += 1 118 | return result 119 | result = user_function(*args, **kwds) 120 | with lock: 121 | root, = nonlocal_root 122 | if key in cache: 123 | # getting here means that this same key was added to the 124 | # cache while the lock was released. since the link 125 | # update is already done, we need only return the 126 | # computed result and update the count of misses. 127 | pass 128 | elif _len(cache) >= maxsize: 129 | # use the old root to store the new key and result 130 | oldroot = root 131 | oldroot[KEY] = key 132 | oldroot[RESULT] = result 133 | # empty the oldest link and make it the new root 134 | root = nonlocal_root[0] = oldroot[NEXT] 135 | oldkey = root[KEY] 136 | oldvalue = root[RESULT] 137 | root[KEY] = root[RESULT] = None 138 | # now update the cache dictionary for the new links 139 | del cache[oldkey] 140 | cache[key] = oldroot 141 | else: 142 | # put result in a new link at the front of the list 143 | last = root[PREV] 144 | link = [last, root, key, result] 145 | last[NEXT] = root[PREV] = cache[key] = link 146 | stats[MISSES] += 1 147 | return result 148 | 149 | def cache_info(): 150 | """Report cache statistics""" 151 | with lock: 152 | return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) 153 | 154 | def cache_clear(): 155 | """Clear the cache and cache statistics""" 156 | with lock: 157 | cache.clear() 158 | root = nonlocal_root[0] 159 | root[:] = [root, root, None, None] 160 | stats[:] = [0, 0] 161 | 162 | wrapper.__wrapped__ = user_function 163 | wrapper.cache_info = cache_info 164 | wrapper.cache_clear = cache_clear 165 | return update_wrapper(wrapper, user_function) 166 | 167 | return decorating_function -------------------------------------------------------------------------------- /glob2/fnmatch.py: -------------------------------------------------------------------------------- 1 | """Filename matching with shell patterns. 2 | 3 | fnmatch(FILENAME, PATTERN) matches according to the local convention. 4 | fnmatchcase(FILENAME, PATTERN) always takes case in account. 5 | 6 | The functions operate by translating the pattern into a regular 7 | expression. They cache the compiled regular expressions for speed. 8 | 9 | The function translate(PATTERN) returns a regular expression 10 | corresponding to PATTERN. (It does not compile it.) 11 | """ 12 | import os 13 | import re 14 | try: 15 | from functools import lru_cache 16 | except ImportError: 17 | from .compat import lru_cache 18 | 19 | __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] 20 | 21 | 22 | def _norm_paths(path, norm_paths, sep): 23 | if norm_paths is None: 24 | path = re.sub(r'\/', sep or os.sep, path) # cached internally 25 | elif norm_paths: 26 | path = os.path.normcase(path) 27 | return path 28 | 29 | 30 | def fnmatch(name, pat, norm_paths=True, case_sensitive=True, sep=None): 31 | """Test whether FILENAME matches PATTERN. 32 | 33 | Patterns are Unix shell style: 34 | 35 | * matches everything 36 | ? matches any single character 37 | [seq] matches any character in seq 38 | [!seq] matches any char not in seq 39 | 40 | An initial period in FILENAME is not special. 41 | Both FILENAME and PATTERN are first case-normalized 42 | if the operating system requires it. 43 | If you don't want this, use fnmatchcase(FILENAME, PATTERN). 44 | 45 | :param slashes: 46 | :param norm_paths: 47 | A tri-state boolean: 48 | when true, invokes `os.path,.normcase()` on both paths, 49 | when `None`, just equalize slashes/backslashes to `os.sep`, 50 | when false, does not touch paths at all. 51 | 52 | Note that a side-effect of `normcase()` on *Windows* is that 53 | it converts to lower-case all matches of `?glob()` functions. 54 | :param case_sensitive: 55 | defines the case-sensitiviness of regex doing the matches 56 | :param sep: 57 | in case only slahes replaced, what sep-char to substitute with; 58 | if false, `os.sep` is used. 59 | 60 | Notice that by default, `normcase()` causes insensitive matching 61 | on *Windows*, regardless of `case_insensitive` param. 62 | Set ``norm_paths=None, case_sensitive=False`` to preserve 63 | verbatim mathces. 64 | """ 65 | name, pat = [_norm_paths(p, norm_paths, sep) 66 | for p in (name, pat)] 67 | 68 | return fnmatchcase(name, pat, case_sensitive=case_sensitive) 69 | 70 | 71 | @lru_cache(maxsize=256, typed=True) 72 | def _compile_pattern(pat, case_sensitive): 73 | if isinstance(pat, bytes): 74 | pat_str = pat.decode('ISO-8859-1') 75 | res_str = translate(pat_str) 76 | res = res_str.encode('ISO-8859-1') 77 | else: 78 | res = translate(pat) 79 | flags = 0 if case_sensitive else re.IGNORECASE 80 | return re.compile(res, flags).match 81 | 82 | 83 | def filter(names, pat, norm_paths=True, case_sensitive=True, sep=None): 84 | """Return the subset of the list NAMES that match PAT.""" 85 | result = [] 86 | pat = _norm_paths(pat, norm_paths, sep) 87 | match = _compile_pattern(pat, case_sensitive) 88 | for name in names: 89 | m = match(_norm_paths(name, norm_paths, sep)) 90 | if m: 91 | result.append((name, 92 | tuple(_norm_paths(p, norm_paths, sep) for p in m.groups()))) 93 | return result 94 | 95 | 96 | def fnmatchcase(name, pat, case_sensitive=True): 97 | """Test whether FILENAME matches PATTERN, including case. 98 | 99 | This is a version of fnmatch() which doesn't case-normalize 100 | its arguments. 101 | """ 102 | match = _compile_pattern(pat, case_sensitive) 103 | return match(name) is not None 104 | 105 | 106 | def translate(pat): 107 | """Translate a shell PATTERN to a regular expression. 108 | 109 | There is no way to quote meta-characters. 110 | """ 111 | 112 | i, n = 0, len(pat) 113 | res = '' 114 | while i < n: 115 | c = pat[i] 116 | i = i+1 117 | if c == '*': 118 | res = res + '(.*)' 119 | elif c == '?': 120 | res = res + '(.)' 121 | elif c == '[': 122 | j = i 123 | if j < n and pat[j] == '!': 124 | j = j+1 125 | if j < n and pat[j] == ']': 126 | j = j+1 127 | while j < n and pat[j] != ']': 128 | j = j+1 129 | if j >= n: 130 | res = res + '\\[' 131 | else: 132 | stuff = pat[i:j].replace('\\','\\\\') 133 | i = j+1 134 | if stuff[0] == '!': 135 | stuff = '^' + stuff[1:] 136 | elif stuff[0] == '^': 137 | stuff = '\\' + stuff 138 | res = '%s([%s])' % (res, stuff) 139 | else: 140 | res = res + re.escape(c) 141 | return '(?ms)' + res + '\Z' 142 | -------------------------------------------------------------------------------- /glob2/impl.py: -------------------------------------------------------------------------------- 1 | """Filename globbing utility.""" 2 | 3 | from __future__ import absolute_import 4 | 5 | import sys 6 | import os 7 | import re 8 | from os.path import join 9 | from . import fnmatch 10 | 11 | try: 12 | from itertools import imap 13 | except ImportError: 14 | imap = map 15 | 16 | 17 | class Globber(object): 18 | 19 | listdir = staticmethod(os.listdir) 20 | isdir = staticmethod(os.path.isdir) 21 | islink = staticmethod(os.path.islink) 22 | exists = staticmethod(os.path.lexists) 23 | 24 | def walk(self, top, followlinks=False, sep=None): 25 | """A simplified version of os.walk (code copied) that uses 26 | ``self.listdir``, and the other local filesystem methods. 27 | 28 | Because we don't care about file/directory distinctions, only 29 | a single list is returned. 30 | """ 31 | try: 32 | names = self.listdir(top) 33 | except os.error as err: 34 | return 35 | 36 | items = [] 37 | for name in names: 38 | items.append(name) 39 | 40 | yield top, items 41 | 42 | for name in items: 43 | new_path = _join_paths([top, name], sep=sep) 44 | if followlinks or not self.islink(new_path): 45 | for x in self.walk(new_path, followlinks): 46 | yield x 47 | 48 | def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True, 49 | norm_paths=True, case_sensitive=True, sep=None): 50 | """Return a list of paths matching a pathname pattern. 51 | 52 | The pattern may contain simple shell-style wildcards a la 53 | fnmatch. However, unlike fnmatch, filenames starting with a 54 | dot are special cases that are not matched by '*' and '?' 55 | patterns. 56 | 57 | If ``include_hidden`` is True, then files and folders starting with 58 | a dot are also returned. 59 | """ 60 | return list(self.iglob(pathname, with_matches, include_hidden, 61 | norm_paths, case_sensitive, sep)) 62 | 63 | def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True, 64 | norm_paths=True, case_sensitive=True, sep=None): 65 | """Return an iterator which yields the paths matching a pathname 66 | pattern. 67 | 68 | The pattern may contain simple shell-style wildcards a la 69 | fnmatch. However, unlike fnmatch, filenames starting with a 70 | dot are special cases that are not matched by '*' and '?' 71 | patterns. 72 | 73 | If ``with_matches`` is True, then for each matching path 74 | a 2-tuple will be returned; the second element if the tuple 75 | will be a list of the parts of the path that matched the individual 76 | wildcards. 77 | 78 | If ``include_hidden`` is True, then files and folders starting with 79 | a dot are also returned. 80 | """ 81 | result = self._iglob(pathname, True, include_hidden, 82 | norm_paths, case_sensitive, sep) 83 | if with_matches: 84 | return result 85 | return imap(lambda s: s[0], result) 86 | 87 | def _iglob(self, pathname, rootcall, include_hidden, 88 | norm_paths, case_sensitive, sep): 89 | """Internal implementation that backs :meth:`iglob`. 90 | 91 | ``rootcall`` is required to differentiate between the user's call to 92 | iglob(), and subsequent recursive calls, for the purposes of resolving 93 | certain special cases of ** wildcards. Specifically, "**" is supposed 94 | to include the current directory for purposes of globbing, but the 95 | directory itself should never be returned. So if ** is the lastmost 96 | part of the ``pathname`` given the user to the root call, we want to 97 | ignore the current directory. For this, we need to know which the root 98 | call is. 99 | """ 100 | 101 | # Short-circuit if no glob magic 102 | if not has_magic(pathname): 103 | if self.exists(pathname): 104 | yield pathname, () 105 | return 106 | 107 | # If no directory part is left, assume the working directory 108 | dirname, basename = os.path.split(pathname) 109 | 110 | # If the directory is globbed, recurse to resolve. 111 | # If at this point there is no directory part left, we simply 112 | # continue with dirname="", which will search the current dir. 113 | # `os.path.split()` returns the argument itself as a dirname if it is a 114 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 115 | # contains magic characters (i.e. r'\\?\C:'). 116 | if dirname != pathname and has_magic(dirname): 117 | # Note that this may return files, which will be ignored 118 | # later when we try to use them as directories. 119 | # Prefiltering them here would only require more IO ops. 120 | dirs = self._iglob(dirname, False, include_hidden, 121 | norm_paths, case_sensitive, sep) 122 | else: 123 | dirs = [(dirname, ())] 124 | 125 | # Resolve ``basename`` expr for every directory found 126 | for dirname, dir_groups in dirs: 127 | for name, groups in self.resolve_pattern(dirname, basename, 128 | not rootcall, include_hidden, 129 | norm_paths, case_sensitive, sep): 130 | yield _join_paths([dirname, name], sep=sep), dir_groups + groups 131 | 132 | def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden, 133 | norm_paths, case_sensitive, sep): 134 | """Apply ``pattern`` (contains no path elements) to the 135 | literal directory in ``dirname``. 136 | 137 | If pattern=='', this will filter for directories. This is 138 | a special case that happens when the user's glob expression ends 139 | with a slash (in which case we only want directories). It simpler 140 | and faster to filter here than in :meth:`_iglob`. 141 | """ 142 | 143 | if sys.version_info[0] == 3: 144 | if isinstance(pattern, bytes): 145 | dirname = bytes(os.curdir, 'ASCII') 146 | else: 147 | if isinstance(pattern, unicode) and not isinstance(dirname, unicode): 148 | dirname = unicode(dirname, sys.getfilesystemencoding() or 149 | sys.getdefaultencoding()) 150 | 151 | # If no magic, short-circuit, only check for existence 152 | if not has_magic(pattern): 153 | if pattern == '': 154 | if self.isdir(dirname): 155 | return [(pattern, ())] 156 | else: 157 | if self.exists(_join_paths([dirname, pattern], sep=sep)): 158 | return [(pattern, ())] 159 | return [] 160 | 161 | if not dirname: 162 | dirname = os.curdir 163 | 164 | try: 165 | if pattern == '**': 166 | # Include the current directory in **, if asked; by adding 167 | # an empty string as opposed to '.', we spare ourselves 168 | # having to deal with os.path.normpath() later. 169 | names = [''] if globstar_with_root else [] 170 | for top, entries in self.walk(dirname, sep=sep): 171 | _mkabs = lambda s: _join_paths([top[len(dirname) + 1:], s], sep=sep) 172 | names.extend(map(_mkabs, entries)) 173 | # Reset pattern so that fnmatch(), which does not understand 174 | # ** specifically, will only return a single group match. 175 | pattern = '*' 176 | else: 177 | names = self.listdir(dirname) 178 | except os.error: 179 | return [] 180 | 181 | if not include_hidden and not _ishidden(pattern): 182 | # Remove hidden files, but take care to ensure 183 | # that the empty string we may have added earlier remains. 184 | # Do not filter out the '' that we might have added earlier 185 | names = filter(lambda x: not x or not _ishidden(x), names) 186 | return fnmatch.filter(names, pattern, norm_paths, case_sensitive, sep) 187 | 188 | 189 | default_globber = Globber() 190 | glob = default_globber.glob 191 | iglob = default_globber.iglob 192 | del default_globber 193 | 194 | 195 | magic_check = re.compile('[*?[]') 196 | magic_check_bytes = re.compile(b'[*?[]') 197 | 198 | 199 | def has_magic(s): 200 | if isinstance(s, bytes): 201 | match = magic_check_bytes.search(s) 202 | else: 203 | match = magic_check.search(s) 204 | return match is not None 205 | 206 | 207 | def _ishidden(path): 208 | return path[0] in ('.', b'.'[0]) 209 | 210 | 211 | def _join_paths(paths, sep=None): 212 | path = join(*paths) 213 | if sep: 214 | path = re.sub(r'\/', sep, path) # cached internally 215 | return path 216 | 217 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | from setuptools import setup, find_packages 4 | 5 | 6 | # Figure out the version 7 | import re 8 | here = os.path.dirname(os.path.abspath(__file__)) 9 | version_re = re.compile( 10 | r'__version__ = (\(.*?\))') 11 | fp = open(os.path.join(here, 'glob2', '__init__.py')) 12 | version = None 13 | for line in fp: 14 | match = version_re.search(line) 15 | if match: 16 | version = eval(match.group(1)) 17 | break 18 | else: 19 | raise Exception("Cannot find version in __init__.py") 20 | fp.close() 21 | 22 | 23 | setup( 24 | name = 'glob2', 25 | version = ".".join(map(str, version)), 26 | description = 'Version of the glob module that can capture patterns '+ 27 | 'and supports recursive wildcards', 28 | author = 'Michael Elsdoerfer', 29 | author_email = 'michael@elsdoerfer.com', 30 | license='BSD', 31 | url = 'http://github.com/miracle2k/python-glob2/', 32 | classifiers = [ 33 | 'Development Status :: 3 - Alpha', 34 | 'Intended Audience :: Developers', 35 | 'License :: OSI Approved :: BSD License', 36 | 'Operating System :: OS Independent', 37 | 'Programming Language :: Python', 38 | 'Programming Language :: Python :: 3', 39 | 'Topic :: Software Development :: Libraries', 40 | ], 41 | packages = find_packages() 42 | ) 43 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import path 3 | import shutil 4 | import tempfile 5 | 6 | import glob2 7 | from glob2 import fnmatch 8 | 9 | 10 | class TestFnmatch(object): 11 | 12 | def test_filter_everything(self): 13 | names = ( 14 | 'fooABC', 'barABC', 'foo',) 15 | assert fnmatch.filter(names, 'foo*') == [ 16 | ('fooABC', ('ABC',)), 17 | ('foo', ('',)) 18 | ] 19 | assert fnmatch.filter(names, '*AB*') == [ 20 | ('fooABC', ('foo', 'C')), 21 | ('barABC', ('bar', 'C')) 22 | ] 23 | 24 | def test_filter_single_character(self): 25 | names = ( 26 | 'fooA', 'barA', 'foo',) 27 | assert fnmatch.filter(names, 'foo?') == [ 28 | ('fooA', ('A',)), 29 | ] 30 | assert fnmatch.filter(names, '???A') == [ 31 | ('fooA', ('f', 'o', 'o',)), 32 | ('barA', ('b', 'a', 'r',)), 33 | ] 34 | 35 | def test_sequence(self): 36 | names = ( 37 | 'fooA', 'fooB', 'fooC', 'foo',) 38 | assert fnmatch.filter(names, 'foo[AB]') == [ 39 | ('fooA', ('A',)), 40 | ('fooB', ('B',)), 41 | ] 42 | assert fnmatch.filter(names, 'foo[!AB]') == [ 43 | ('fooC', ('C',)), 44 | ] 45 | 46 | 47 | class BaseTest(object): 48 | 49 | def setup(self): 50 | self.basedir = tempfile.mkdtemp() 51 | self._old_cwd = os.getcwd() 52 | os.chdir(self.basedir) 53 | 54 | self.setup_files() 55 | 56 | def setup_files(self): 57 | pass 58 | 59 | def teardown(self): 60 | os.chdir(self._old_cwd) 61 | shutil.rmtree(self.basedir) 62 | 63 | def makedirs(self, *names): 64 | for name in names: 65 | os.makedirs(path.join(self.basedir, name)) 66 | 67 | def touch(self, *names): 68 | for name in names: 69 | open(path.join(self.basedir, name), 'w').close() 70 | 71 | 72 | class TestPatterns(BaseTest): 73 | 74 | def test(self): 75 | self.makedirs('dir1', 'dir22') 76 | self.touch( 77 | 'dir1/a-file', 'dir1/b-file', 'dir22/a-file', 'dir22/b-file') 78 | assert glob2.glob('dir?/a-*', True) == [ 79 | ('dir1/a-file', ('1', 'file')) 80 | ] 81 | 82 | 83 | class TestRecursive(BaseTest): 84 | 85 | def setup_files(self): 86 | self.makedirs('a', 'b', 'a/foo') 87 | self.touch('file.py', 'file.txt', 'a/bar.py', 'README', 'b/py', 88 | 'b/bar.py', 'a/foo/hello.py', 'a/foo/world.txt') 89 | 90 | def test_recursive(self): 91 | # ** includes the current directory 92 | assert sorted(glob2.glob('**/*.py', True)) == [ 93 | ('a/bar.py', ('a', 'bar')), 94 | ('a/foo/hello.py', ('a/foo', 'hello')), 95 | ('b/bar.py', ('b', 'bar')), 96 | ('file.py', ('', 'file')), 97 | ] 98 | 99 | def test_exclude_root_directory(self): 100 | # If files from the root directory should not be included, 101 | # this is the syntax to use: 102 | assert sorted(glob2.glob('*/**/*.py', True)) == [ 103 | ('a/bar.py', ('a', '', 'bar')), 104 | ('a/foo/hello.py', ('a', 'foo', 'hello')), 105 | ('b/bar.py', ('b', '', 'bar')) 106 | ] 107 | 108 | def test_only_directories(self): 109 | # Return directories only 110 | assert sorted(glob2.glob('**/', True)) == [ 111 | ('a/', ('a',)), 112 | ('a/foo/', ('a/foo',)), 113 | ('b/', ('b',)), 114 | ] 115 | 116 | def test_parent_dir(self): 117 | # Make sure ".." can be used 118 | os.chdir(path.join(self.basedir, 'b')) 119 | assert sorted(glob2.glob('../a/**/*.py', True)), [ 120 | ('../a/bar.py', ('', 'bar')), 121 | ('../a/foo/hello.py', ('foo', 'hello')) 122 | ] 123 | 124 | def test_fixed_basename(self): 125 | assert sorted(glob2.glob('**/bar.py', True)) == [ 126 | ('a/bar.py', ('a',)), 127 | ('b/bar.py', ('b',)), 128 | ] 129 | 130 | def test_all_files(self): 131 | # Return all files 132 | os.chdir(path.join(self.basedir, 'a')) 133 | assert sorted(glob2.glob('**', True)) == [ 134 | ('bar.py', ('bar.py',)), 135 | ('foo', ('foo',)), 136 | ('foo/hello.py', ('foo/hello.py',)), 137 | ('foo/world.txt', ('foo/world.txt',)), 138 | ] 139 | 140 | def test_root_directory_not_returned(self): 141 | # Ensure that a certain codepath (when the basename is globbed 142 | # with ** as opposed to the dirname) does not cause 143 | # the root directory to be part of the result. 144 | # -> b/ is NOT in the result! 145 | assert sorted(glob2.glob('b/**', True)) == [ 146 | ('b/bar.py', ('bar.py',)), 147 | ('b/py', ('py',)), 148 | ] 149 | 150 | def test_non_glob(self): 151 | # Test without patterns. 152 | assert glob2.glob(__file__, True) == [ 153 | (__file__, ()) 154 | ] 155 | assert glob2.glob(__file__) == [ 156 | (__file__) 157 | ] 158 | 159 | 160 | class TestIncludeHidden(BaseTest): 161 | 162 | def setup_files(self): 163 | self.makedirs('a', 'b', 'a/.foo') 164 | self.touch('file.py', 'file.txt', 'a/.bar', 'README', 'b/py', 165 | 'b/.bar', 'a/.foo/hello.py', 'a/.foo/world.txt') 166 | 167 | def test_hidden(self): 168 | # ** includes the current directory 169 | assert sorted(glob2.glob('*/*', True, include_hidden=True)), [ 170 | ('a/.bar', ('a', '.bar')), 171 | ('a/.foo', ('a', '.foo')), 172 | ('b/.bar', ('b', '.bar')), 173 | ('b/py', ('b', 'py')), 174 | ] 175 | --------------------------------------------------------------------------------