├── .gitignore
├── .idea
├── encodings.xml
├── modules.xml
└── python-glob2.iml
├── CHANGES
├── LICENSE
├── MANIFEST.in
├── README.rst
├── RELEASING.md
├── TODO
├── glob2
├── __init__.py
├── compat.py
├── fnmatch.py
└── impl.py
├── setup.cfg
├── setup.py
└── test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | /LOCAL_TODO
3 |
4 | # distutils/setuptools
5 | /dist/
6 | *egg-info
7 |
8 | # IDEs
9 | *.wpr
10 | /.idea/
11 |
12 | # Folder config file
13 | [Dd]esktop.ini
14 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/python-glob2.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
1 | 0.7 (2019-06-11)
2 | - Fix deprecation warning.
3 |
4 | 0.6 (2017-08-17)
5 | - Support dummy recursive parameter in APIs to match the new
6 | call signature of Python 3.
7 | - Fix lru_cache (Kostis Anagnostopoulos).
8 | - Introduce norm_paths and case_sensitive options to optionally
9 | change the behaviour (Kostis Anagnostopoulos).
10 |
11 | 0.5 (2016-11-04)
12 | - include_hidden option.
13 | - Python 3 fixes.
14 | - Publish a wheel.
15 |
16 | 0.4 (2013-05-08)
17 | - Support Python 3.
18 |
19 | 0.3 (2012-01-19)
20 | - Fix non-glob patterns (patch by Zalan).
21 | - Don't shadow internal "glob" module.
22 |
23 | 0.2 (2011-06-14)
24 | - Initial release.
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2008, Michael Elsdörfer
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions
6 | are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials
14 | provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst CHANGES LICENSE
2 | include test.py
3 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | python-glob2
2 | ============
3 |
4 | This is an extended version of Python's builtin glob module
5 | (http://docs.python.org/library/glob.html) which adds:
6 |
7 | - The ability to capture the text matched by glob patterns, and
8 | return those matches alongside the filenames.
9 |
10 | - A recursive '**' globbing syntax, akin for example to the ``globstar``
11 | option of the bash shell.
12 |
13 | - The ability to replace the filesystem functions used, in order to glob
14 | on virtual filesystems.
15 |
16 | - Compatible with Python 2 and Python 3 (tested with 3.3).
17 |
18 | It's currently based on the glob code from Python 3.3.1.
19 |
20 |
21 | Examples
22 | --------
23 |
24 | Matches being returned:
25 | ~~~~~~~~~~~~~~~~~~~~~~~
26 |
27 | ::
28 |
29 | import glob2
30 |
31 | for filename, (version,) in glob2.iglob('./binaries/project-*.zip', with_matches=True):
32 | print version
33 |
34 |
35 | Recursive glob:
36 | ~~~~~~~~~~~~~~~
37 |
38 | ::
39 |
40 | >>> import glob2
41 | >>> all_header_files = glob2.glob('src/**/*.h')
42 | ['src/fs.h', 'src/media/mp3.h', 'src/media/mp3/frame.h', ...]
43 |
44 |
45 | Note that ``**`` must appear on it's own as a directory
46 | element to have its special meaning. ``**h`` will not have the
47 | desired effect.
48 |
49 | ``**`` will match ".", so ``**/*.py`` returns Python files in the
50 | current directory. If this is not wanted, ``*/**/*.py`` should be used
51 | instead.
52 |
53 |
54 | Custom Globber:
55 | ~~~~~~~~~~~~~~~
56 |
57 | ::
58 |
59 | from glob2 import Globber
60 |
61 | class VirtualStorageGlobber(Globber):
62 | def __init__(self, storage):
63 | self.storage = storage
64 | def listdir(self, path):
65 | # Must raise os.error if path is not a directory
66 | return self.storage.listdir(path)
67 | def exists(self, path):
68 | return self.storage.exists(path)
69 | def isdir(self, path):
70 | # Used only for trailing slash syntax (``foo/``).
71 | return self.storage.isdir(path)
72 | def islink(self, path):
73 | # Used only for recursive glob (``**``).
74 | return self.storage.islink(path)
75 |
76 | globber = VirtualStorageGlobber(sftp_storage)
77 | globber.glob('/var/www/**/*.js')
78 |
79 |
80 | If ``isdir`` and/or ``islink`` cannot be implemented for a storage, you can
81 | make them return a fixed value, with the following consequences:
82 |
83 | - If ``isdir`` returns ``True``, a glob expression ending with a slash
84 | will return all items, even non-directories, if it returns ``False``,
85 | the same glob expression will return nothing.
86 |
87 | - Return ``islink`` ``True``, the recursive globbing syntax ** will
88 | follow all links. If you return ``False``, it will not work at all.
89 |
--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | - Update CHANGES.
2 | - Update glob2/__init__.py
3 | - git tag -a v0.X
4 | - par
5 |
--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | Because our implementation of recursive directory search (**) using
2 | os.walk, and the matching using fnmatch, are both not using iterators,
3 | something like /** currently needs to read the whole filesystem into
4 | memory before returning anything.
5 |
--------------------------------------------------------------------------------
/glob2/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .impl import *
3 |
4 |
5 | __version__ = (0, 7)
6 |
--------------------------------------------------------------------------------
/glob2/compat.py:
--------------------------------------------------------------------------------
1 | # Back-port functools.lru_cache to Python 2 (and <= 3.2)
2 | # {{{ http://code.activestate.com/recipes/578078/ (r6)
3 |
4 | from collections import namedtuple
5 | from functools import update_wrapper
6 | from threading import RLock
7 |
8 | _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
9 |
10 | class _HashedSeq(list):
11 | __slots__ = 'hashvalue'
12 |
13 | def __init__(self, tup, hash=hash):
14 | self[:] = tup
15 | self.hashvalue = hash(tup)
16 |
17 | def __hash__(self):
18 | return self.hashvalue
19 |
20 | def _make_key(args, kwds, typed,
21 | kwd_mark = (object(),),
22 | fasttypes = set((int, str, frozenset, type(None))),
23 | sorted=sorted, tuple=tuple, type=type, len=len):
24 | 'Make a cache key from optionally typed positional and keyword arguments'
25 | key = args
26 | if kwds:
27 | sorted_items = sorted(kwds.items())
28 | key += kwd_mark
29 | for item in sorted_items:
30 | key += item
31 | if typed:
32 | key += tuple(type(v) for v in args)
33 | if kwds:
34 | key += tuple(type(v) for k, v in sorted_items)
35 | elif len(key) == 1 and type(key[0]) in fasttypes:
36 | return key[0]
37 | return _HashedSeq(key)
38 |
39 | def lru_cache(maxsize=100, typed=False):
40 | """Least-recently-used cache decorator.
41 |
42 | If *maxsize* is set to None, the LRU features are disabled and the cache
43 | can grow without bound.
44 |
45 | If *typed* is True, arguments of different types will be cached separately.
46 | For example, f(3.0) and f(3) will be treated as distinct calls with
47 | distinct results.
48 |
49 | Arguments to the cached function must be hashable.
50 |
51 | View the cache statistics named tuple (hits, misses, maxsize, currsize) with
52 | f.cache_info(). Clear the cache and statistics with f.cache_clear().
53 | Access the underlying function with f.__wrapped__.
54 |
55 | See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
56 |
57 | """
58 |
59 | # Users should only access the lru_cache through its public API:
60 | # cache_info, cache_clear, and f.__wrapped__
61 | # The internals of the lru_cache are encapsulated for thread safety and
62 | # to allow the implementation to change (including a possible C version).
63 |
64 | def decorating_function(user_function):
65 |
66 | cache = dict()
67 | stats = [0, 0] # make statistics updateable non-locally
68 | HITS, MISSES = 0, 1 # names for the stats fields
69 | make_key = _make_key
70 | cache_get = cache.get # bound method to lookup key or return None
71 | _len = len # localize the global len() function
72 | lock = RLock() # because linkedlist updates aren't threadsafe
73 | root = [] # root of the circular doubly linked list
74 | root[:] = [root, root, None, None] # initialize by pointing to self
75 | nonlocal_root = [root] # make updateable non-locally
76 | PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
77 |
78 | if maxsize == 0:
79 |
80 | def wrapper(*args, **kwds):
81 | # no caching, just do a statistics update after a successful call
82 | result = user_function(*args, **kwds)
83 | stats[MISSES] += 1
84 | return result
85 |
86 | elif maxsize is None:
87 |
88 | def wrapper(*args, **kwds):
89 | # simple caching without ordering or size limit
90 | key = make_key(args, kwds, typed)
91 | result = cache_get(key, root) # root used here as a unique not-found sentinel
92 | if result is not root:
93 | stats[HITS] += 1
94 | return result
95 | result = user_function(*args, **kwds)
96 | cache[key] = result
97 | stats[MISSES] += 1
98 | return result
99 |
100 | else:
101 |
102 | def wrapper(*args, **kwds):
103 | # size limited caching that tracks accesses by recency
104 | key = make_key(args, kwds, typed) if kwds or typed else args
105 | with lock:
106 | link = cache_get(key)
107 | if link is not None:
108 | # record recent use of the key by moving it to the front of the list
109 | root, = nonlocal_root
110 | link_prev, link_next, key, result = link
111 | link_prev[NEXT] = link_next
112 | link_next[PREV] = link_prev
113 | last = root[PREV]
114 | last[NEXT] = root[PREV] = link
115 | link[PREV] = last
116 | link[NEXT] = root
117 | stats[HITS] += 1
118 | return result
119 | result = user_function(*args, **kwds)
120 | with lock:
121 | root, = nonlocal_root
122 | if key in cache:
123 | # getting here means that this same key was added to the
124 | # cache while the lock was released. since the link
125 | # update is already done, we need only return the
126 | # computed result and update the count of misses.
127 | pass
128 | elif _len(cache) >= maxsize:
129 | # use the old root to store the new key and result
130 | oldroot = root
131 | oldroot[KEY] = key
132 | oldroot[RESULT] = result
133 | # empty the oldest link and make it the new root
134 | root = nonlocal_root[0] = oldroot[NEXT]
135 | oldkey = root[KEY]
136 | oldvalue = root[RESULT]
137 | root[KEY] = root[RESULT] = None
138 | # now update the cache dictionary for the new links
139 | del cache[oldkey]
140 | cache[key] = oldroot
141 | else:
142 | # put result in a new link at the front of the list
143 | last = root[PREV]
144 | link = [last, root, key, result]
145 | last[NEXT] = root[PREV] = cache[key] = link
146 | stats[MISSES] += 1
147 | return result
148 |
149 | def cache_info():
150 | """Report cache statistics"""
151 | with lock:
152 | return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
153 |
154 | def cache_clear():
155 | """Clear the cache and cache statistics"""
156 | with lock:
157 | cache.clear()
158 | root = nonlocal_root[0]
159 | root[:] = [root, root, None, None]
160 | stats[:] = [0, 0]
161 |
162 | wrapper.__wrapped__ = user_function
163 | wrapper.cache_info = cache_info
164 | wrapper.cache_clear = cache_clear
165 | return update_wrapper(wrapper, user_function)
166 |
167 | return decorating_function
--------------------------------------------------------------------------------
/glob2/fnmatch.py:
--------------------------------------------------------------------------------
1 | """Filename matching with shell patterns.
2 |
3 | fnmatch(FILENAME, PATTERN) matches according to the local convention.
4 | fnmatchcase(FILENAME, PATTERN) always takes case in account.
5 |
6 | The functions operate by translating the pattern into a regular
7 | expression. They cache the compiled regular expressions for speed.
8 |
9 | The function translate(PATTERN) returns a regular expression
10 | corresponding to PATTERN. (It does not compile it.)
11 | """
12 | import os
13 | import re
14 | try:
15 | from functools import lru_cache
16 | except ImportError:
17 | from .compat import lru_cache
18 |
19 | __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
20 |
21 |
22 | def _norm_paths(path, norm_paths, sep):
23 | if norm_paths is None:
24 | path = re.sub(r'\/', sep or os.sep, path) # cached internally
25 | elif norm_paths:
26 | path = os.path.normcase(path)
27 | return path
28 |
29 |
30 | def fnmatch(name, pat, norm_paths=True, case_sensitive=True, sep=None):
31 | """Test whether FILENAME matches PATTERN.
32 |
33 | Patterns are Unix shell style:
34 |
35 | * matches everything
36 | ? matches any single character
37 | [seq] matches any character in seq
38 | [!seq] matches any char not in seq
39 |
40 | An initial period in FILENAME is not special.
41 | Both FILENAME and PATTERN are first case-normalized
42 | if the operating system requires it.
43 | If you don't want this, use fnmatchcase(FILENAME, PATTERN).
44 |
45 | :param slashes:
46 | :param norm_paths:
47 | A tri-state boolean:
48 | when true, invokes `os.path,.normcase()` on both paths,
49 | when `None`, just equalize slashes/backslashes to `os.sep`,
50 | when false, does not touch paths at all.
51 |
52 | Note that a side-effect of `normcase()` on *Windows* is that
53 | it converts to lower-case all matches of `?glob()` functions.
54 | :param case_sensitive:
55 | defines the case-sensitiviness of regex doing the matches
56 | :param sep:
57 | in case only slahes replaced, what sep-char to substitute with;
58 | if false, `os.sep` is used.
59 |
60 | Notice that by default, `normcase()` causes insensitive matching
61 | on *Windows*, regardless of `case_insensitive` param.
62 | Set ``norm_paths=None, case_sensitive=False`` to preserve
63 | verbatim mathces.
64 | """
65 | name, pat = [_norm_paths(p, norm_paths, sep)
66 | for p in (name, pat)]
67 |
68 | return fnmatchcase(name, pat, case_sensitive=case_sensitive)
69 |
70 |
71 | @lru_cache(maxsize=256, typed=True)
72 | def _compile_pattern(pat, case_sensitive):
73 | if isinstance(pat, bytes):
74 | pat_str = pat.decode('ISO-8859-1')
75 | res_str = translate(pat_str)
76 | res = res_str.encode('ISO-8859-1')
77 | else:
78 | res = translate(pat)
79 | flags = 0 if case_sensitive else re.IGNORECASE
80 | return re.compile(res, flags).match
81 |
82 |
83 | def filter(names, pat, norm_paths=True, case_sensitive=True, sep=None):
84 | """Return the subset of the list NAMES that match PAT."""
85 | result = []
86 | pat = _norm_paths(pat, norm_paths, sep)
87 | match = _compile_pattern(pat, case_sensitive)
88 | for name in names:
89 | m = match(_norm_paths(name, norm_paths, sep))
90 | if m:
91 | result.append((name,
92 | tuple(_norm_paths(p, norm_paths, sep) for p in m.groups())))
93 | return result
94 |
95 |
96 | def fnmatchcase(name, pat, case_sensitive=True):
97 | """Test whether FILENAME matches PATTERN, including case.
98 |
99 | This is a version of fnmatch() which doesn't case-normalize
100 | its arguments.
101 | """
102 | match = _compile_pattern(pat, case_sensitive)
103 | return match(name) is not None
104 |
105 |
106 | def translate(pat):
107 | """Translate a shell PATTERN to a regular expression.
108 |
109 | There is no way to quote meta-characters.
110 | """
111 |
112 | i, n = 0, len(pat)
113 | res = ''
114 | while i < n:
115 | c = pat[i]
116 | i = i+1
117 | if c == '*':
118 | res = res + '(.*)'
119 | elif c == '?':
120 | res = res + '(.)'
121 | elif c == '[':
122 | j = i
123 | if j < n and pat[j] == '!':
124 | j = j+1
125 | if j < n and pat[j] == ']':
126 | j = j+1
127 | while j < n and pat[j] != ']':
128 | j = j+1
129 | if j >= n:
130 | res = res + '\\['
131 | else:
132 | stuff = pat[i:j].replace('\\','\\\\')
133 | i = j+1
134 | if stuff[0] == '!':
135 | stuff = '^' + stuff[1:]
136 | elif stuff[0] == '^':
137 | stuff = '\\' + stuff
138 | res = '%s([%s])' % (res, stuff)
139 | else:
140 | res = res + re.escape(c)
141 | return '(?ms)' + res + '\Z'
142 |
--------------------------------------------------------------------------------
/glob2/impl.py:
--------------------------------------------------------------------------------
1 | """Filename globbing utility."""
2 |
3 | from __future__ import absolute_import
4 |
5 | import sys
6 | import os
7 | import re
8 | from os.path import join
9 | from . import fnmatch
10 |
11 | try:
12 | from itertools import imap
13 | except ImportError:
14 | imap = map
15 |
16 |
17 | class Globber(object):
18 |
19 | listdir = staticmethod(os.listdir)
20 | isdir = staticmethod(os.path.isdir)
21 | islink = staticmethod(os.path.islink)
22 | exists = staticmethod(os.path.lexists)
23 |
24 | def walk(self, top, followlinks=False, sep=None):
25 | """A simplified version of os.walk (code copied) that uses
26 | ``self.listdir``, and the other local filesystem methods.
27 |
28 | Because we don't care about file/directory distinctions, only
29 | a single list is returned.
30 | """
31 | try:
32 | names = self.listdir(top)
33 | except os.error as err:
34 | return
35 |
36 | items = []
37 | for name in names:
38 | items.append(name)
39 |
40 | yield top, items
41 |
42 | for name in items:
43 | new_path = _join_paths([top, name], sep=sep)
44 | if followlinks or not self.islink(new_path):
45 | for x in self.walk(new_path, followlinks):
46 | yield x
47 |
48 | def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True,
49 | norm_paths=True, case_sensitive=True, sep=None):
50 | """Return a list of paths matching a pathname pattern.
51 |
52 | The pattern may contain simple shell-style wildcards a la
53 | fnmatch. However, unlike fnmatch, filenames starting with a
54 | dot are special cases that are not matched by '*' and '?'
55 | patterns.
56 |
57 | If ``include_hidden`` is True, then files and folders starting with
58 | a dot are also returned.
59 | """
60 | return list(self.iglob(pathname, with_matches, include_hidden,
61 | norm_paths, case_sensitive, sep))
62 |
63 | def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True,
64 | norm_paths=True, case_sensitive=True, sep=None):
65 | """Return an iterator which yields the paths matching a pathname
66 | pattern.
67 |
68 | The pattern may contain simple shell-style wildcards a la
69 | fnmatch. However, unlike fnmatch, filenames starting with a
70 | dot are special cases that are not matched by '*' and '?'
71 | patterns.
72 |
73 | If ``with_matches`` is True, then for each matching path
74 | a 2-tuple will be returned; the second element if the tuple
75 | will be a list of the parts of the path that matched the individual
76 | wildcards.
77 |
78 | If ``include_hidden`` is True, then files and folders starting with
79 | a dot are also returned.
80 | """
81 | result = self._iglob(pathname, True, include_hidden,
82 | norm_paths, case_sensitive, sep)
83 | if with_matches:
84 | return result
85 | return imap(lambda s: s[0], result)
86 |
87 | def _iglob(self, pathname, rootcall, include_hidden,
88 | norm_paths, case_sensitive, sep):
89 | """Internal implementation that backs :meth:`iglob`.
90 |
91 | ``rootcall`` is required to differentiate between the user's call to
92 | iglob(), and subsequent recursive calls, for the purposes of resolving
93 | certain special cases of ** wildcards. Specifically, "**" is supposed
94 | to include the current directory for purposes of globbing, but the
95 | directory itself should never be returned. So if ** is the lastmost
96 | part of the ``pathname`` given the user to the root call, we want to
97 | ignore the current directory. For this, we need to know which the root
98 | call is.
99 | """
100 |
101 | # Short-circuit if no glob magic
102 | if not has_magic(pathname):
103 | if self.exists(pathname):
104 | yield pathname, ()
105 | return
106 |
107 | # If no directory part is left, assume the working directory
108 | dirname, basename = os.path.split(pathname)
109 |
110 | # If the directory is globbed, recurse to resolve.
111 | # If at this point there is no directory part left, we simply
112 | # continue with dirname="", which will search the current dir.
113 | # `os.path.split()` returns the argument itself as a dirname if it is a
114 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
115 | # contains magic characters (i.e. r'\\?\C:').
116 | if dirname != pathname and has_magic(dirname):
117 | # Note that this may return files, which will be ignored
118 | # later when we try to use them as directories.
119 | # Prefiltering them here would only require more IO ops.
120 | dirs = self._iglob(dirname, False, include_hidden,
121 | norm_paths, case_sensitive, sep)
122 | else:
123 | dirs = [(dirname, ())]
124 |
125 | # Resolve ``basename`` expr for every directory found
126 | for dirname, dir_groups in dirs:
127 | for name, groups in self.resolve_pattern(dirname, basename,
128 | not rootcall, include_hidden,
129 | norm_paths, case_sensitive, sep):
130 | yield _join_paths([dirname, name], sep=sep), dir_groups + groups
131 |
132 | def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden,
133 | norm_paths, case_sensitive, sep):
134 | """Apply ``pattern`` (contains no path elements) to the
135 | literal directory in ``dirname``.
136 |
137 | If pattern=='', this will filter for directories. This is
138 | a special case that happens when the user's glob expression ends
139 | with a slash (in which case we only want directories). It simpler
140 | and faster to filter here than in :meth:`_iglob`.
141 | """
142 |
143 | if sys.version_info[0] == 3:
144 | if isinstance(pattern, bytes):
145 | dirname = bytes(os.curdir, 'ASCII')
146 | else:
147 | if isinstance(pattern, unicode) and not isinstance(dirname, unicode):
148 | dirname = unicode(dirname, sys.getfilesystemencoding() or
149 | sys.getdefaultencoding())
150 |
151 | # If no magic, short-circuit, only check for existence
152 | if not has_magic(pattern):
153 | if pattern == '':
154 | if self.isdir(dirname):
155 | return [(pattern, ())]
156 | else:
157 | if self.exists(_join_paths([dirname, pattern], sep=sep)):
158 | return [(pattern, ())]
159 | return []
160 |
161 | if not dirname:
162 | dirname = os.curdir
163 |
164 | try:
165 | if pattern == '**':
166 | # Include the current directory in **, if asked; by adding
167 | # an empty string as opposed to '.', we spare ourselves
168 | # having to deal with os.path.normpath() later.
169 | names = [''] if globstar_with_root else []
170 | for top, entries in self.walk(dirname, sep=sep):
171 | _mkabs = lambda s: _join_paths([top[len(dirname) + 1:], s], sep=sep)
172 | names.extend(map(_mkabs, entries))
173 | # Reset pattern so that fnmatch(), which does not understand
174 | # ** specifically, will only return a single group match.
175 | pattern = '*'
176 | else:
177 | names = self.listdir(dirname)
178 | except os.error:
179 | return []
180 |
181 | if not include_hidden and not _ishidden(pattern):
182 | # Remove hidden files, but take care to ensure
183 | # that the empty string we may have added earlier remains.
184 | # Do not filter out the '' that we might have added earlier
185 | names = filter(lambda x: not x or not _ishidden(x), names)
186 | return fnmatch.filter(names, pattern, norm_paths, case_sensitive, sep)
187 |
188 |
189 | default_globber = Globber()
190 | glob = default_globber.glob
191 | iglob = default_globber.iglob
192 | del default_globber
193 |
194 |
195 | magic_check = re.compile('[*?[]')
196 | magic_check_bytes = re.compile(b'[*?[]')
197 |
198 |
199 | def has_magic(s):
200 | if isinstance(s, bytes):
201 | match = magic_check_bytes.search(s)
202 | else:
203 | match = magic_check.search(s)
204 | return match is not None
205 |
206 |
207 | def _ishidden(path):
208 | return path[0] in ('.', b'.'[0])
209 |
210 |
211 | def _join_paths(paths, sep=None):
212 | path = join(*paths)
213 | if sep:
214 | path = re.sub(r'\/', sep, path) # cached internally
215 | return path
216 |
217 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | from setuptools import setup, find_packages
4 |
5 |
6 | # Figure out the version
7 | import re
8 | here = os.path.dirname(os.path.abspath(__file__))
9 | version_re = re.compile(
10 | r'__version__ = (\(.*?\))')
11 | fp = open(os.path.join(here, 'glob2', '__init__.py'))
12 | version = None
13 | for line in fp:
14 | match = version_re.search(line)
15 | if match:
16 | version = eval(match.group(1))
17 | break
18 | else:
19 | raise Exception("Cannot find version in __init__.py")
20 | fp.close()
21 |
22 |
23 | setup(
24 | name = 'glob2',
25 | version = ".".join(map(str, version)),
26 | description = 'Version of the glob module that can capture patterns '+
27 | 'and supports recursive wildcards',
28 | author = 'Michael Elsdoerfer',
29 | author_email = 'michael@elsdoerfer.com',
30 | license='BSD',
31 | url = 'http://github.com/miracle2k/python-glob2/',
32 | classifiers = [
33 | 'Development Status :: 3 - Alpha',
34 | 'Intended Audience :: Developers',
35 | 'License :: OSI Approved :: BSD License',
36 | 'Operating System :: OS Independent',
37 | 'Programming Language :: Python',
38 | 'Programming Language :: Python :: 3',
39 | 'Topic :: Software Development :: Libraries',
40 | ],
41 | packages = find_packages()
42 | )
43 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | from os import path
3 | import shutil
4 | import tempfile
5 |
6 | import glob2
7 | from glob2 import fnmatch
8 |
9 |
10 | class TestFnmatch(object):
11 |
12 | def test_filter_everything(self):
13 | names = (
14 | 'fooABC', 'barABC', 'foo',)
15 | assert fnmatch.filter(names, 'foo*') == [
16 | ('fooABC', ('ABC',)),
17 | ('foo', ('',))
18 | ]
19 | assert fnmatch.filter(names, '*AB*') == [
20 | ('fooABC', ('foo', 'C')),
21 | ('barABC', ('bar', 'C'))
22 | ]
23 |
24 | def test_filter_single_character(self):
25 | names = (
26 | 'fooA', 'barA', 'foo',)
27 | assert fnmatch.filter(names, 'foo?') == [
28 | ('fooA', ('A',)),
29 | ]
30 | assert fnmatch.filter(names, '???A') == [
31 | ('fooA', ('f', 'o', 'o',)),
32 | ('barA', ('b', 'a', 'r',)),
33 | ]
34 |
35 | def test_sequence(self):
36 | names = (
37 | 'fooA', 'fooB', 'fooC', 'foo',)
38 | assert fnmatch.filter(names, 'foo[AB]') == [
39 | ('fooA', ('A',)),
40 | ('fooB', ('B',)),
41 | ]
42 | assert fnmatch.filter(names, 'foo[!AB]') == [
43 | ('fooC', ('C',)),
44 | ]
45 |
46 |
47 | class BaseTest(object):
48 |
49 | def setup(self):
50 | self.basedir = tempfile.mkdtemp()
51 | self._old_cwd = os.getcwd()
52 | os.chdir(self.basedir)
53 |
54 | self.setup_files()
55 |
56 | def setup_files(self):
57 | pass
58 |
59 | def teardown(self):
60 | os.chdir(self._old_cwd)
61 | shutil.rmtree(self.basedir)
62 |
63 | def makedirs(self, *names):
64 | for name in names:
65 | os.makedirs(path.join(self.basedir, name))
66 |
67 | def touch(self, *names):
68 | for name in names:
69 | open(path.join(self.basedir, name), 'w').close()
70 |
71 |
72 | class TestPatterns(BaseTest):
73 |
74 | def test(self):
75 | self.makedirs('dir1', 'dir22')
76 | self.touch(
77 | 'dir1/a-file', 'dir1/b-file', 'dir22/a-file', 'dir22/b-file')
78 | assert glob2.glob('dir?/a-*', True) == [
79 | ('dir1/a-file', ('1', 'file'))
80 | ]
81 |
82 |
83 | class TestRecursive(BaseTest):
84 |
85 | def setup_files(self):
86 | self.makedirs('a', 'b', 'a/foo')
87 | self.touch('file.py', 'file.txt', 'a/bar.py', 'README', 'b/py',
88 | 'b/bar.py', 'a/foo/hello.py', 'a/foo/world.txt')
89 |
90 | def test_recursive(self):
91 | # ** includes the current directory
92 | assert sorted(glob2.glob('**/*.py', True)) == [
93 | ('a/bar.py', ('a', 'bar')),
94 | ('a/foo/hello.py', ('a/foo', 'hello')),
95 | ('b/bar.py', ('b', 'bar')),
96 | ('file.py', ('', 'file')),
97 | ]
98 |
99 | def test_exclude_root_directory(self):
100 | # If files from the root directory should not be included,
101 | # this is the syntax to use:
102 | assert sorted(glob2.glob('*/**/*.py', True)) == [
103 | ('a/bar.py', ('a', '', 'bar')),
104 | ('a/foo/hello.py', ('a', 'foo', 'hello')),
105 | ('b/bar.py', ('b', '', 'bar'))
106 | ]
107 |
108 | def test_only_directories(self):
109 | # Return directories only
110 | assert sorted(glob2.glob('**/', True)) == [
111 | ('a/', ('a',)),
112 | ('a/foo/', ('a/foo',)),
113 | ('b/', ('b',)),
114 | ]
115 |
116 | def test_parent_dir(self):
117 | # Make sure ".." can be used
118 | os.chdir(path.join(self.basedir, 'b'))
119 | assert sorted(glob2.glob('../a/**/*.py', True)), [
120 | ('../a/bar.py', ('', 'bar')),
121 | ('../a/foo/hello.py', ('foo', 'hello'))
122 | ]
123 |
124 | def test_fixed_basename(self):
125 | assert sorted(glob2.glob('**/bar.py', True)) == [
126 | ('a/bar.py', ('a',)),
127 | ('b/bar.py', ('b',)),
128 | ]
129 |
130 | def test_all_files(self):
131 | # Return all files
132 | os.chdir(path.join(self.basedir, 'a'))
133 | assert sorted(glob2.glob('**', True)) == [
134 | ('bar.py', ('bar.py',)),
135 | ('foo', ('foo',)),
136 | ('foo/hello.py', ('foo/hello.py',)),
137 | ('foo/world.txt', ('foo/world.txt',)),
138 | ]
139 |
140 | def test_root_directory_not_returned(self):
141 | # Ensure that a certain codepath (when the basename is globbed
142 | # with ** as opposed to the dirname) does not cause
143 | # the root directory to be part of the result.
144 | # -> b/ is NOT in the result!
145 | assert sorted(glob2.glob('b/**', True)) == [
146 | ('b/bar.py', ('bar.py',)),
147 | ('b/py', ('py',)),
148 | ]
149 |
150 | def test_non_glob(self):
151 | # Test without patterns.
152 | assert glob2.glob(__file__, True) == [
153 | (__file__, ())
154 | ]
155 | assert glob2.glob(__file__) == [
156 | (__file__)
157 | ]
158 |
159 |
160 | class TestIncludeHidden(BaseTest):
161 |
162 | def setup_files(self):
163 | self.makedirs('a', 'b', 'a/.foo')
164 | self.touch('file.py', 'file.txt', 'a/.bar', 'README', 'b/py',
165 | 'b/.bar', 'a/.foo/hello.py', 'a/.foo/world.txt')
166 |
167 | def test_hidden(self):
168 | # ** includes the current directory
169 | assert sorted(glob2.glob('*/*', True, include_hidden=True)), [
170 | ('a/.bar', ('a', '.bar')),
171 | ('a/.foo', ('a', '.foo')),
172 | ('b/.bar', ('b', '.bar')),
173 | ('b/py', ('b', 'py')),
174 | ]
175 |
--------------------------------------------------------------------------------