├── .git-blame-ignore-revs
├── .github
    ├── SECURITY.md
    └── workflows
    │   └── test.yml
├── .gitignore
├── .pytest.ini
├── LICENSE
├── README.rst
├── parse.py
├── pyproject.toml
└── tests
    ├── requirements.txt
    ├── test_bugs.py
    ├── test_findall.py
    ├── test_parse.py
    ├── test_parsetype.py
    ├── test_pattern.py
    ├── test_result.py
    └── test_search.py


/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | 4cc9e9f398b8b80e6f0a68d25774cb1c0c32f2fb  # black string normalization
2 | 


--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Reporting a Vulnerability
 4 | 
 5 | Security concerns may be reported to **[wim.glenn@gmail.com](mailto:wim.glenn+parse@gmail.com)**.
 6 | 
 7 | Please provide the following information in your report:
 8 | 
 9 | - A description of the vulnerability and its impact
10 | - How to reproduce the issue
11 | - Which versions of the library are affected
12 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: parse
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   run-test:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: [ubuntu-latest, macos-latest, windows-latest]
18 |         python-version:
19 |         - "3.8"
20 |         - "3.9"
21 |         - "3.10"
22 |         - "3.11"
23 |         - "3.12"
24 |         - "pypy-3.9"
25 |     steps:
26 |       - uses: actions/checkout@v3
27 |       - name: Set up Python ${{ matrix.python-version }}
28 |         uses: actions/setup-python@v4
29 |         with:
30 |           python-version: ${{ matrix.python-version }}
31 |       - name: Run tests
32 |         run: |
33 |           pip install -r tests/requirements.txt --editable .
34 |           pytest
35 |       - name: Upload coverage to Codecov
36 |         uses: codecov/codecov-action@v3
37 |         with:
38 |           token: ${{ secrets.CODECOV_TOKEN }}
39 | 
40 |   tests-37:
41 |     name: Python 3.7 on ubuntu-20.04
42 |     runs-on: ubuntu-20.04
43 |     container:
44 |       image: python:3.7
45 |     steps:
46 |       - uses: actions/checkout@v3
47 |       - name: Run tests
48 |         run: |
49 |           pip install -r tests/requirements.txt
50 |           python -m pytest
51 |       - name: Upload coverage to Codecov
52 |         uses: codecov/codecov-action@v3
53 |         with:
54 |           token: ${{ secrets.CODECOV_TOKEN }}
55 | 
56 |   tests-27:
57 |     name: Python 2.7 on ubuntu-20.04
58 |     runs-on: ubuntu-20.04
59 |     container:
60 |       image: python:2.7-buster
61 |     steps:
62 |       - uses: actions/checkout@v3
63 |       - name: Run tests
64 |         run: |
65 |           pip install -r tests/requirements.txt
66 |           python -m pytest
67 |       - name: Upload coverage to Codecov
68 |         uses: codecov/codecov-action@v3
69 |         with:
70 |           token: ${{ secrets.CODECOV_TOKEN }}
71 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | venv
 2 | *.pyc
 3 | *.pyo
 4 | 
 5 | .vscode/
 6 | .cache/
 7 | .idea/
 8 | .tox/
 9 | __pycache__/
10 | .coverage
11 | .pytest_cache
12 | parse.egg-info
13 | .python-version
14 | 
15 | MANIFEST
16 | build
17 | dist
18 | .ropeproject
19 | 


--------------------------------------------------------------------------------
/.pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --cov=parse --cov-report=term-missing --cov-append --cov-branch --doctest-modules --doctest-glob=README.rst
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2019 Richard Jones <richard@python.org>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 |  The above copyright notice and this permission notice shall be included in
11 |  all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ------------
  3 | 
  4 | .. code-block:: pycon
  5 | 
  6 |     pip install parse
  7 | 
  8 | Usage
  9 | -----
 10 | 
 11 | Parse strings using a specification based on the Python `format()`_ syntax.
 12 | 
 13 |    ``parse()`` is the opposite of ``format()``
 14 | 
 15 | The module is set up to only export ``parse()``, ``search()``, ``findall()``,
 16 | and ``with_pattern()`` when ``import *`` is used:
 17 | 
 18 | >>> from parse import *
 19 | 
 20 | From there it's a simple thing to parse a string:
 21 | 
 22 | .. code-block:: pycon
 23 | 
 24 |     >>> parse("It's {}, I love it!", "It's spam, I love it!")
 25 |     <Result ('spam',) {}>
 26 |     >>> _[0]
 27 |     'spam'
 28 | 
 29 | Or to search a string for some pattern:
 30 | 
 31 | .. code-block:: pycon
 32 | 
 33 |     >>> search('Age: {:d}\n', 'Name: Rufus\nAge: 42\nColor: red\n')
 34 |     <Result (42,) {}>
 35 | 
 36 | Or find all the occurrences of some pattern in a string:
 37 | 
 38 | .. code-block:: pycon
 39 | 
 40 |     >>> ''.join(r[0] for r in findall(">{}<", "<p>the <b>bold</b> text</p>"))
 41 |     'the bold text'
 42 | 
 43 | If you're going to use the same pattern to match lots of strings you can
 44 | compile it once:
 45 | 
 46 | .. code-block:: pycon
 47 | 
 48 |     >>> from parse import compile
 49 |     >>> p = compile("It's {}, I love it!")
 50 |     >>> print(p)
 51 |     <Parser "It's {}, I love it!">
 52 |     >>> p.parse("It's spam, I love it!")
 53 |     <Result ('spam',) {}>
 54 | 
 55 | ("compile" is not exported for ``import *`` usage as it would override the
 56 | built-in ``compile()`` function)
 57 | 
 58 | The default behaviour is to match strings case insensitively. You may match with
 59 | case by specifying `case_sensitive=True`:
 60 | 
 61 | .. code-block:: pycon
 62 | 
 63 |     >>> parse('SPAM', 'spam', case_sensitive=True) is None
 64 |     True
 65 | 
 66 | .. _format():
 67 |   https://docs.python.org/3/library/stdtypes.html#str.format
 68 | 
 69 | 
 70 | Format Syntax
 71 | -------------
 72 | 
 73 | A basic version of the `Format String Syntax`_ is supported with anonymous
 74 | (fixed-position), named and formatted fields::
 75 | 
 76 |    {[field name]:[format spec]}
 77 | 
 78 | Field names must be a valid Python identifiers, including dotted names;
 79 | element indexes imply dictionaries (see below for example).
 80 | 
 81 | Numbered fields are also not supported: the result of parsing will include
 82 | the parsed fields in the order they are parsed.
 83 | 
 84 | The conversion of fields to types other than strings is done based on the
 85 | type in the format specification, which mirrors the ``format()`` behaviour.
 86 | There are no "!" field conversions like ``format()`` has.
 87 | 
 88 | Some simple parse() format string examples:
 89 | 
 90 | .. code-block:: pycon
 91 | 
 92 |     >>> parse("Bring me a {}", "Bring me a shrubbery")
 93 |     <Result ('shrubbery',) {}>
 94 |     >>> r = parse("The {} who {} {}", "The knights who say Ni!")
 95 |     >>> print(r)
 96 |     <Result ('knights', 'say', 'Ni!') {}>
 97 |     >>> print(r.fixed)
 98 |     ('knights', 'say', 'Ni!')
 99 |     >>> print(r[0])
100 |     knights
101 |     >>> print(r[1:])
102 |     ('say', 'Ni!')
103 |     >>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade")
104 |     >>> print(r)
105 |     <Result () {'item': 'hand grenade'}>
106 |     >>> print(r.named)
107 |     {'item': 'hand grenade'}
108 |     >>> print(r['item'])
109 |     hand grenade
110 |     >>> 'item' in r
111 |     True
112 | 
113 | Note that `in` only works if you have named fields.
114 | 
115 | Dotted names and indexes are possible with some limits. Only word identifiers
116 | are supported (ie. no numeric indexes) and the application must make additional
117 | sense of the result:
118 | 
119 | .. code-block:: pycon
120 | 
121 |     >>> r = parse("Mmm, {food.type}, I love it!", "Mmm, spam, I love it!")
122 |     >>> print(r)
123 |     <Result () {'food.type': 'spam'}>
124 |     >>> print(r.named)
125 |     {'food.type': 'spam'}
126 |     >>> print(r['food.type'])
127 |     spam
128 |     >>> r = parse("My quest is {quest[name]}", "My quest is to seek the holy grail!")
129 |     >>> print(r)
130 |     <Result () {'quest': {'name': 'to seek the holy grail!'}}>
131 |     >>> print(r['quest'])
132 |     {'name': 'to seek the holy grail!'}
133 |     >>> print(r['quest']['name'])
134 |     to seek the holy grail!
135 | 
136 | If the text you're matching has braces in it you can match those by including
137 | a double-brace ``{{`` or ``}}`` in your format string, just like format() does.
138 | 
139 | 
140 | Format Specification
141 | --------------------
142 | 
143 | Most often a straight format-less ``{}`` will suffice where a more complex
144 | format specification might have been used.
145 | 
146 | Most of `format()`'s `Format Specification Mini-Language`_ is supported:
147 | 
148 |    [[fill]align][sign][0][width][.precision][type]
149 | 
150 | The differences between `parse()` and `format()` are:
151 | 
152 | - The align operators will cause spaces (or specified fill character) to be
153 |   stripped from the parsed value. The width is not enforced; it just indicates
154 |   there may be whitespace or "0"s to strip.
155 | - Numeric parsing will automatically handle a "0b", "0o" or "0x" prefix.
156 |   That is, the "#" format character is handled automatically by d, b, o
157 |   and x formats. For "d" any will be accepted, but for the others the correct
158 |   prefix must be present if at all.
159 | - Numeric sign is handled automatically.  A sign specifier can be given, but
160 |   has no effect.
161 | - The thousands separator is handled automatically if the "n" type is used.
162 | - The types supported are a slightly different mix to the format() types.  Some
163 |   format() types come directly over: "d", "n", "%", "f", "e", "b", "o" and "x".
164 |   In addition some regular expression character group types "D", "w", "W", "s"
165 |   and "S" are also available.
166 | - The "e" and "g" types are case-insensitive so there is not need for
167 |   the "E" or "G" types. The "e" type handles Fortran formatted numbers (no
168 |   leading 0 before the decimal point).
169 | 
170 | ===== =========================================== ========
171 | Type  Characters Matched                          Output
172 | ===== =========================================== ========
173 | l     Letters (ASCII)                             str
174 | w     Letters, numbers and underscore             str
175 | W     Not letters, numbers and underscore         str
176 | s     Whitespace                                  str
177 | S     Non-whitespace                              str
178 | d     Integer numbers (optional sign, digits)     int
179 | D     Non-digit                                   str
180 | n     Numbers with thousands separators (, or .)  int
181 | %     Percentage (converted to value/100.0)       float
182 | f     Fixed-point numbers                         float
183 | F     Decimal numbers                             Decimal
184 | e     Floating-point numbers with exponent        float
185 |       e.g. 1.1e-10, NAN (all case insensitive)
186 | g     General number format (either d, f or e)    float
187 | b     Binary numbers                              int
188 | o     Octal numbers                               int
189 | x     Hexadecimal numbers (lower and upper case)  int
190 | ti    ISO 8601 format date/time                   datetime
191 |       e.g. 1972-01-20T10:21:36Z ("T" and "Z"
192 |       optional)
193 | te    RFC2822 e-mail format date/time             datetime
194 |       e.g. Mon, 20 Jan 1972 10:21:36 +1000
195 | tg    Global (day/month) format date/time         datetime
196 |       e.g. 20/1/1972 10:21:36 AM +1:00
197 | ta    US (month/day) format date/time             datetime
198 |       e.g. 1/20/1972 10:21:36 PM +10:30
199 | tc    ctime() format date/time                    datetime
200 |       e.g. Sun Sep 16 01:03:52 1973
201 | th    HTTP log format date/time                   datetime
202 |       e.g. 21/Nov/2011:00:07:11 +0000
203 | ts    Linux system log format date/time           datetime
204 |       e.g. Nov  9 03:37:44
205 | tt    Time                                        time
206 |       e.g. 10:21:36 PM -5:30
207 | ===== =========================================== ========
208 | 
209 | The type can also be a datetime format string, following the
210 | `1989 C standard format codes`_, e.g. ``%Y-%m-%d``. Depending on the
211 | directives contained in the format string, parsed output may be an instance
212 | of ``datetime.datetime``, ``datetime.time``, or ``datetime.date``.
213 | 
214 | .. code-block:: pycon
215 | 
216 |     >>> parse("{:%Y-%m-%d %H:%M:%S}", "2023-11-23 12:56:47")
217 |     <Result (datetime.datetime(2023, 11, 23, 12, 56, 47),) {}>
218 |     >>> parse("{:%H:%M}", "10:26")
219 |     <Result (datetime.time(10, 26),) {}>
220 |     >>> parse("{:%Y/%m/%d}", "2023/11/25")
221 |     <Result (datetime.date(2023, 11, 25),) {}>
222 | 
223 | 
224 | Some examples of typed parsing with ``None`` returned if the typing
225 | does not match:
226 | 
227 | .. code-block:: pycon
228 | 
229 |     >>> parse('Our {:d} {:w} are...', 'Our 3 weapons are...')
230 |     <Result (3, 'weapons') {}>
231 |     >>> parse('Our {:d} {:w} are...', 'Our three weapons are...')
232 |     >>> parse('Meet at {:tg}', 'Meet at 1/2/2011 11:00 PM')
233 |     <Result (datetime.datetime(2011, 2, 1, 23, 0),) {}>
234 | 
235 | And messing about with alignment:
236 | 
237 | .. code-block:: pycon
238 | 
239 |     >>> parse('with {:>} herring', 'with     a herring')
240 |     <Result ('a',) {}>
241 |     >>> parse('spam {:^} spam', 'spam    lovely     spam')
242 |     <Result ('lovely',) {}>
243 | 
244 | Note that the "center" alignment does not test to make sure the value is
245 | centered - it just strips leading and trailing whitespace.
246 | 
247 | Width and precision may be used to restrict the size of matched text
248 | from the input. Width specifies a minimum size and precision specifies
249 | a maximum. For example:
250 | 
251 | .. code-block:: pycon
252 | 
253 |     >>> parse('{:.2}{:.2}', 'look')           # specifying precision
254 |     <Result ('lo', 'ok') {}>
255 |     >>> parse('{:4}{:4}', 'look at that')     # specifying width
256 |     <Result ('look', 'at that') {}>
257 |     >>> parse('{:4}{:.4}', 'look at that')    # specifying both
258 |     <Result ('look at ', 'that') {}>
259 |     >>> parse('{:2d}{:2d}', '0440')           # parsing two contiguous numbers
260 |     <Result (4, 40) {}>
261 | 
262 | Some notes for the special date and time types:
263 | 
264 | - the presence of the time part is optional (including ISO 8601, starting
265 |   at the "T"). A full datetime object will always be returned; the time
266 |   will be set to 00:00:00. You may also specify a time without seconds.
267 | - when a seconds amount is present in the input fractions will be parsed
268 |   to give microseconds.
269 | - except in ISO 8601 the day and month digits may be 0-padded.
270 | - the date separator for the tg and ta formats may be "-" or "/".
271 | - named months (abbreviations or full names) may be used in the ta and tg
272 |   formats in place of numeric months.
273 | - as per RFC 2822 the e-mail format may omit the day (and comma), and the
274 |   seconds but nothing else.
275 | - hours greater than 12 will be happily accepted.
276 | - the AM/PM are optional, and if PM is found then 12 hours will be added
277 |   to the datetime object's hours amount - even if the hour is greater
278 |   than 12 (for consistency.)
279 | - in ISO 8601 the "Z" (UTC) timezone part may be a numeric offset
280 | - timezones are specified as "+HH:MM" or "-HH:MM". The hour may be one or two
281 |   digits (0-padded is OK.) Also, the ":" is optional.
282 | - the timezone is optional in all except the e-mail format (it defaults to
283 |   UTC.)
284 | - named timezones are not handled yet.
285 | 
286 | Note: attempting to match too many datetime fields in a single parse() will
287 | currently result in a resource allocation issue. A TooManyFields exception
288 | will be raised in this instance. The current limit is about 15. It is hoped
289 | that this limit will be removed one day.
290 | 
291 | .. _`Format String Syntax`:
292 |   https://docs.python.org/3/library/string.html#format-string-syntax
293 | .. _`Format Specification Mini-Language`:
294 |   https://docs.python.org/3/library/string.html#format-specification-mini-language
295 | .. _`1989 C standard format codes`:
296 |   https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
297 | 
298 | 
299 | 
300 | Result and Match Objects
301 | ------------------------
302 | 
303 | The result of a ``parse()`` and ``search()`` operation is either ``None`` (no match), a
304 | ``Result`` instance or a ``Match`` instance if ``evaluate_result`` is False.
305 | 
306 | The ``Result`` instance has three attributes:
307 | 
308 | ``fixed``
309 |    A tuple of the fixed-position, anonymous fields extracted from the input.
310 | ``named``
311 |    A dictionary of the named fields extracted from the input.
312 | ``spans``
313 |    A dictionary mapping the names and fixed position indices matched to a
314 |    2-tuple slice range of where the match occurred in the input.
315 |    The span does not include any stripped padding (alignment or width).
316 | 
317 | The ``Match`` instance has one method:
318 | 
319 | ``evaluate_result()``
320 |    Generates and returns a ``Result`` instance for this ``Match`` object.
321 | 
322 | 
323 | 
324 | Custom Type Conversions
325 | -----------------------
326 | 
327 | If you wish to have matched fields automatically converted to your own type you
328 | may pass in a dictionary of type conversion information to ``parse()`` and
329 | ``compile()``.
330 | 
331 | The converter will be passed the field string matched. Whatever it returns
332 | will be substituted in the ``Result`` instance for that field.
333 | 
334 | Your custom type conversions may override the builtin types if you supply one
335 | with the same identifier:
336 | 
337 | .. code-block:: pycon
338 | 
339 |     >>> def shouty(string):
340 |     ...    return string.upper()
341 |     ...
342 |     >>> parse('{:shouty} world', 'hello world', {"shouty": shouty})
343 |     <Result ('HELLO',) {}>
344 | 
345 | If the type converter has the optional ``pattern`` attribute, it is used as
346 | regular expression for better pattern matching (instead of the default one):
347 | 
348 | .. code-block:: pycon
349 | 
350 |     >>> def parse_number(text):
351 |     ...    return int(text)
352 |     >>> parse_number.pattern = r'\d+'
353 |     >>> parse('Answer: {number:Number}', 'Answer: 42', {"Number": parse_number})
354 |     <Result () {'number': 42}>
355 |     >>> _ = parse('Answer: {:Number}', 'Answer: Alice', {"Number": parse_number})
356 |     >>> assert _ is None, "MISMATCH"
357 | 
358 | You can also use the ``with_pattern(pattern)`` decorator to add this
359 | information to a type converter function:
360 | 
361 | .. code-block:: pycon
362 | 
363 |     >>> from parse import with_pattern
364 |     >>> @with_pattern(r'\d+')
365 |     ... def parse_number(text):
366 |     ...    return int(text)
367 |     >>> parse('Answer: {number:Number}', 'Answer: 42', {"Number": parse_number})
368 |     <Result () {'number': 42}>
369 | 
370 | A more complete example of a custom type might be:
371 | 
372 | .. code-block:: pycon
373 | 
374 |     >>> yesno_mapping = {
375 |     ...     "yes":  True,   "no":    False,
376 |     ...     "on":   True,   "off":   False,
377 |     ...     "true": True,   "false": False,
378 |     ... }
379 |     >>> @with_pattern(r"|".join(yesno_mapping))
380 |     ... def parse_yesno(text):
381 |     ...     return yesno_mapping[text.lower()]
382 | 
383 | 
384 | If the type converter ``pattern`` uses regex-grouping (with parenthesis),
385 | you should indicate this by using the optional ``regex_group_count`` parameter
386 | in the ``with_pattern()`` decorator:
387 | 
388 | .. code-block:: pycon
389 | 
390 |     >>> @with_pattern(r'((\d+))', regex_group_count=2)
391 |     ... def parse_number2(text):
392 |     ...    return int(text)
393 |     >>> parse('Answer: {:Number2} {:Number2}', 'Answer: 42 43', {"Number2": parse_number2})
394 |     <Result (42, 43) {}>
395 | 
396 | Otherwise, this may cause parsing problems with unnamed/fixed parameters.
397 | 
398 | 
399 | Potential Gotchas
400 | -----------------
401 | 
402 | ``parse()`` will always match the shortest text necessary (from left to right)
403 | to fulfil the parse pattern, so for example:
404 | 
405 | 
406 | .. code-block:: pycon
407 | 
408 |     >>> pattern = '{dir1}/{dir2}'
409 |     >>> data = 'root/parent/subdir'
410 |     >>> sorted(parse(pattern, data).named.items())
411 |     [('dir1', 'root'), ('dir2', 'parent/subdir')]
412 | 
413 | So, even though `{'dir1': 'root/parent', 'dir2': 'subdir'}` would also fit
414 | the pattern, the actual match represents the shortest successful match for
415 | ``dir1``.
416 | 
417 | Developers
418 | ----------
419 | 
420 | Want to contribute to parse? Fork the repo to your own GitHub account, and create a pull-request.
421 | 
422 | .. code-block:: bash
423 | 
424 |    git clone git@github.com:r1chardj0n3s/parse.git
425 |    git remote rename origin upstream
426 |    git remote add origin git@github.com:YOURUSERNAME/parse.git
427 |    git checkout -b myfeature
428 | 
429 | To run the tests locally:
430 | 
431 | .. code-block:: bash
432 | 
433 |    python -m venv .venv
434 |    source .venv/bin/activate
435 |    pip install -r tests/requirements.txt
436 |    pip install -e .
437 |    pytest
438 | 
439 | ----
440 | 
441 | Changelog
442 | ---------
443 | 
444 | - 1.20.2 Template field names can now contain - character i.e. HYPHEN-MINUS, chr(0x2d)
445 | - 1.20.1 The `%f` directive accepts 1-6 digits, like strptime (thanks @bbertincourt)
446 | - 1.20.0 Added support for strptime codes (thanks @bendichter)
447 | - 1.19.1 Added support for sign specifiers in number formats (thanks @anntzer)
448 | - 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen).
449 |   Also corrected matching of *full string* vs. *full line* (thanks @giladreti)
450 |   Fix issue with using digit field numbering and types
451 | - 1.18.0 Correct bug in int parsing introduced in 1.16.0 (thanks @maxxk)
452 | - 1.17.0 Make left- and center-aligned search consume up to next space
453 | - 1.16.0 Make compiled parse objects pickleable (thanks @martinResearch)
454 | - 1.15.0 Several fixes for parsing non-base 10 numbers (thanks @vladikcomper)
455 | - 1.14.0 More broad acceptance of Fortran number format (thanks @purpleskyfall)
456 | - 1.13.1 Project metadata correction.
457 | - 1.13.0 Handle Fortran formatted numbers with no leading 0 before decimal
458 |   point (thanks @purpleskyfall).
459 |   Handle comparison of FixedTzOffset with other types of object.
460 | - 1.12.1 Actually use the `case_sensitive` arg in compile (thanks @jacquev6)
461 | - 1.12.0 Do not assume closing brace when an opening one is found (thanks @mattsep)
462 | - 1.11.1 Revert having unicode char in docstring, it breaks Bamboo builds(?!)
463 | - 1.11.0 Implement `__contains__` for Result instances.
464 | - 1.10.0 Introduce a "letters" matcher, since "w" matches numbers
465 |   also.
466 | - 1.9.1 Fix deprecation warnings around backslashes in regex strings
467 |   (thanks Mickael Schoentgen). Also fix some documentation formatting
468 |   issues.
469 | - 1.9.0 We now honor precision and width specifiers when parsing numbers
470 |   and strings, allowing parsing of concatenated elements of fixed width
471 |   (thanks Julia Signell)
472 | - 1.8.4 Add LICENSE file at request of packagers.
473 |   Correct handling of AM/PM to follow most common interpretation.
474 |   Correct parsing of hexadecimal that looks like a binary prefix.
475 |   Add ability to parse case sensitively.
476 |   Add parsing of numbers to Decimal with "F" (thanks John Vandenberg)
477 | - 1.8.3 Add regex_group_count to with_pattern() decorator to support
478 |   user-defined types that contain brackets/parenthesis (thanks Jens Engel)
479 | - 1.8.2 add documentation for including braces in format string
480 | - 1.8.1 ensure bare hexadecimal digits are not matched
481 | - 1.8.0 support manual control over result evaluation (thanks Timo Furrer)
482 | - 1.7.0 parse dict fields (thanks Mark Visser) and adapted to allow
483 |   more than 100 re groups in Python 3.5+ (thanks David King)
484 | - 1.6.6 parse Linux system log dates (thanks Alex Cowan)
485 | - 1.6.5 handle precision in float format (thanks Levi Kilcher)
486 | - 1.6.4 handle pipe "|" characters in parse string (thanks Martijn Pieters)
487 | - 1.6.3 handle repeated instances of named fields, fix bug in PM time
488 |   overflow
489 | - 1.6.2 fix logging to use local, not root logger (thanks Necku)
490 | - 1.6.1 be more flexible regarding matched ISO datetimes and timezones in
491 |   general, fix bug in timezones without ":" and improve docs
492 | - 1.6.0 add support for optional ``pattern`` attribute in user-defined types
493 |   (thanks Jens Engel)
494 | - 1.5.3 fix handling of question marks
495 | - 1.5.2 fix type conversion error with dotted names (thanks Sebastian Thiel)
496 | - 1.5.1 implement handling of named datetime fields
497 | - 1.5 add handling of dotted field names (thanks Sebastian Thiel)
498 | - 1.4.1 fix parsing of "0" in int conversion (thanks James Rowe)
499 | - 1.4 add __getitem__ convenience access on Result.
500 | - 1.3.3 fix Python 2.5 setup.py issue.
501 | - 1.3.2 fix Python 3.2 setup.py issue.
502 | - 1.3.1 fix a couple of Python 3.2 compatibility issues.
503 | - 1.3 added search() and findall(); removed compile() from ``import *``
504 |   export as it overwrites builtin.
505 | - 1.2 added ability for custom and override type conversions to be
506 |   provided; some cleanup
507 | - 1.1.9 to keep things simpler number sign is handled automatically;
508 |   significant robustification in the face of edge-case input.
509 | - 1.1.8 allow "d" fields to have number base "0x" etc. prefixes;
510 |   fix up some field type interactions after stress-testing the parser;
511 |   implement "%" type.
512 | - 1.1.7 Python 3 compatibility tweaks (2.5 to 2.7 and 3.2 are supported).
513 | - 1.1.6 add "e" and "g" field types; removed redundant "h" and "X";
514 |   removed need for explicit "#".
515 | - 1.1.5 accept textual dates in more places; Result now holds match span
516 |   positions.
517 | - 1.1.4 fixes to some int type conversion; implemented "=" alignment; added
518 |   date/time parsing with a variety of formats handled.
519 | - 1.1.3 type conversion is automatic based on specified field types. Also added
520 |   "f" and "n" types.
521 | - 1.1.2 refactored, added compile() and limited ``from parse import *``
522 | - 1.1.1 documentation improvements
523 | - 1.1.0 implemented more of the `Format Specification Mini-Language`_
524 |   and removed the restriction on mixing fixed-position and named fields
525 | - 1.0.0 initial release
526 | 
527 | This code is copyright 2012-2021 Richard Jones <richard@python.org>
528 | See the end of the source file for the license of use.
529 | 


--------------------------------------------------------------------------------
/parse.py:
--------------------------------------------------------------------------------
   1 | from __future__ import absolute_import
   2 | 
   3 | import logging
   4 | import re
   5 | import sys
   6 | from datetime import datetime
   7 | from datetime import time
   8 | from datetime import timedelta
   9 | from datetime import tzinfo
  10 | from decimal import Decimal
  11 | from functools import partial
  12 | 
  13 | 
  14 | __version__ = "1.20.2"
  15 | __all__ = ["parse", "search", "findall", "with_pattern"]
  16 | 
  17 | log = logging.getLogger(__name__)
  18 | 
  19 | 
  20 | def with_pattern(pattern, regex_group_count=None):
  21 |     r"""Attach a regular expression pattern matcher to a custom type converter
  22 |     function.
  23 | 
  24 |     This annotates the type converter with the :attr:`pattern` attribute.
  25 | 
  26 |     EXAMPLE:
  27 |         >>> import parse
  28 |         >>> @parse.with_pattern(r"\d+")
  29 |         ... def parse_number(text):
  30 |         ...     return int(text)
  31 | 
  32 |     is equivalent to:
  33 | 
  34 |         >>> def parse_number(text):
  35 |         ...     return int(text)
  36 |         >>> parse_number.pattern = r"\d+"
  37 | 
  38 |     :param pattern: regular expression pattern (as text)
  39 |     :param regex_group_count: Indicates how many regex-groups are in pattern.
  40 |     :return: wrapped function
  41 |     """
  42 | 
  43 |     def decorator(func):
  44 |         func.pattern = pattern
  45 |         func.regex_group_count = regex_group_count
  46 |         return func
  47 | 
  48 |     return decorator
  49 | 
  50 | 
  51 | class int_convert:
  52 |     """Convert a string to an integer.
  53 | 
  54 |     The string may start with a sign.
  55 | 
  56 |     It may be of a base other than 2, 8, 10 or 16.
  57 | 
  58 |     If base isn't specified, it will be detected automatically based
  59 |     on a string format. When string starts with a base indicator, 0#nnnn,
  60 |     it overrides the default base of 10.
  61 | 
  62 |     It may also have other non-numeric characters that we can ignore.
  63 |     """
  64 | 
  65 |     CHARS = "0123456789abcdefghijklmnopqrstuvwxyz"
  66 | 
  67 |     def __init__(self, base=None):
  68 |         self.base = base
  69 | 
  70 |     def __call__(self, string, match):
  71 |         if string[0] == "-":
  72 |             sign = -1
  73 |             number_start = 1
  74 |         elif string[0] == "+":
  75 |             sign = 1
  76 |             number_start = 1
  77 |         else:
  78 |             sign = 1
  79 |             number_start = 0
  80 | 
  81 |         base = self.base
  82 |         # If base wasn't specified, detect it automatically
  83 |         if base is None:
  84 |             # Assume decimal number, unless different base is detected
  85 |             base = 10
  86 | 
  87 |             # For number formats starting with 0b, 0o, 0x, use corresponding base ...
  88 |             if string[number_start] == "0" and len(string) - number_start > 2:
  89 |                 if string[number_start + 1] in "bB":
  90 |                     base = 2
  91 |                 elif string[number_start + 1] in "oO":
  92 |                     base = 8
  93 |                 elif string[number_start + 1] in "xX":
  94 |                     base = 16
  95 | 
  96 |         chars = int_convert.CHARS[:base]
  97 |         string = re.sub("[^%s]" % chars, "", string.lower())
  98 |         return sign * int(string, base)
  99 | 
 100 | 
 101 | class convert_first:
 102 |     """Convert the first element of a pair.
 103 |     This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled
 104 |     """
 105 | 
 106 |     def __init__(self, converter):
 107 |         self.converter = converter
 108 | 
 109 |     def __call__(self, string, match):
 110 |         return self.converter(string)
 111 | 
 112 | 
 113 | def percentage(string, match):
 114 |     return float(string[:-1]) / 100.0
 115 | 
 116 | 
 117 | class FixedTzOffset(tzinfo):
 118 |     """Fixed offset in minutes east from UTC."""
 119 | 
 120 |     ZERO = timedelta(0)
 121 | 
 122 |     def __init__(self, offset, name):
 123 |         self._offset = timedelta(minutes=offset)
 124 |         self._name = name
 125 | 
 126 |     def __repr__(self):
 127 |         return "<%s %s %s>" % (self.__class__.__name__, self._name, self._offset)
 128 | 
 129 |     def utcoffset(self, dt):
 130 |         return self._offset
 131 | 
 132 |     def tzname(self, dt):
 133 |         return self._name
 134 | 
 135 |     def dst(self, dt):
 136 |         return self.ZERO
 137 | 
 138 |     def __eq__(self, other):
 139 |         if not isinstance(other, FixedTzOffset):
 140 |             return NotImplemented
 141 |         return self._name == other._name and self._offset == other._offset
 142 | 
 143 | 
 144 | MONTHS_MAP = {
 145 |     "Jan": 1,
 146 |     "January": 1,
 147 |     "Feb": 2,
 148 |     "February": 2,
 149 |     "Mar": 3,
 150 |     "March": 3,
 151 |     "Apr": 4,
 152 |     "April": 4,
 153 |     "May": 5,
 154 |     "Jun": 6,
 155 |     "June": 6,
 156 |     "Jul": 7,
 157 |     "July": 7,
 158 |     "Aug": 8,
 159 |     "August": 8,
 160 |     "Sep": 9,
 161 |     "September": 9,
 162 |     "Oct": 10,
 163 |     "October": 10,
 164 |     "Nov": 11,
 165 |     "November": 11,
 166 |     "Dec": 12,
 167 |     "December": 12,
 168 | }
 169 | DAYS_PAT = r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
 170 | MONTHS_PAT = r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
 171 | ALL_MONTHS_PAT = r"(%s)" % "|".join(MONTHS_MAP)
 172 | TIME_PAT = r"(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)"
 173 | AM_PAT = r"(\s+[AP]M)"
 174 | TZ_PAT = r"(\s+[-+]\d\d?:?\d\d)"
 175 | 
 176 | 
 177 | def date_convert(
 178 |     string,
 179 |     match,
 180 |     ymd=None,
 181 |     mdy=None,
 182 |     dmy=None,
 183 |     d_m_y=None,
 184 |     hms=None,
 185 |     am=None,
 186 |     tz=None,
 187 |     mm=None,
 188 |     dd=None,
 189 | ):
 190 |     """Convert the incoming string containing some date / time info into a
 191 |     datetime instance.
 192 |     """
 193 |     groups = match.groups()
 194 |     time_only = False
 195 |     if mm and dd:
 196 |         y = datetime.today().year
 197 |         m = groups[mm]
 198 |         d = groups[dd]
 199 |     elif ymd is not None:
 200 |         y, m, d = re.split(r"[-/\s]", groups[ymd])
 201 |     elif mdy is not None:
 202 |         m, d, y = re.split(r"[-/\s]", groups[mdy])
 203 |     elif dmy is not None:
 204 |         d, m, y = re.split(r"[-/\s]", groups[dmy])
 205 |     elif d_m_y is not None:
 206 |         d, m, y = d_m_y
 207 |         d = groups[d]
 208 |         m = groups[m]
 209 |         y = groups[y]
 210 |     else:
 211 |         time_only = True
 212 | 
 213 |     H = M = S = u = 0
 214 |     if hms is not None and groups[hms]:
 215 |         t = groups[hms].split(":")
 216 |         if len(t) == 2:
 217 |             H, M = t
 218 |         else:
 219 |             H, M, S = t
 220 |             if "." in S:
 221 |                 S, u = S.split(".")
 222 |                 u = int(float("." + u) * 1000000)
 223 |             S = int(S)
 224 |         H = int(H)
 225 |         M = int(M)
 226 | 
 227 |     if am is not None:
 228 |         am = groups[am]
 229 |         if am:
 230 |             am = am.strip()
 231 |         if am == "AM" and H == 12:
 232 |             # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock
 233 |             H -= 12
 234 |         elif am == "PM" and H == 12:
 235 |             # no correction needed: 12PM is midday, 12:00 by 24 hour clock
 236 |             pass
 237 |         elif am == "PM":
 238 |             H += 12
 239 | 
 240 |     if tz is not None:
 241 |         tz = groups[tz]
 242 |     if tz == "Z":
 243 |         tz = FixedTzOffset(0, "UTC")
 244 |     elif tz:
 245 |         tz = tz.strip()
 246 |         if tz.isupper():
 247 |             # TODO use the awesome python TZ module?
 248 |             pass
 249 |         else:
 250 |             sign = tz[0]
 251 |             if ":" in tz:
 252 |                 tzh, tzm = tz[1:].split(":")
 253 |             elif len(tz) == 4:  # 'snnn'
 254 |                 tzh, tzm = tz[1], tz[2:4]
 255 |             else:
 256 |                 tzh, tzm = tz[1:3], tz[3:5]
 257 |             offset = int(tzm) + int(tzh) * 60
 258 |             if sign == "-":
 259 |                 offset = -offset
 260 |             tz = FixedTzOffset(offset, tz)
 261 | 
 262 |     if time_only:
 263 |         d = time(H, M, S, u, tzinfo=tz)
 264 |     else:
 265 |         y = int(y)
 266 |         if m.isdigit():
 267 |             m = int(m)
 268 |         else:
 269 |             m = MONTHS_MAP[m]
 270 |         d = int(d)
 271 |         d = datetime(y, m, d, H, M, S, u, tzinfo=tz)
 272 | 
 273 |     return d
 274 | 
 275 | 
 276 | def strf_date_convert(x, _, type):
 277 |     is_date = any("%" + x in type for x in "aAwdbBmyYjUW")
 278 |     is_time = any("%" + x in type for x in "HIpMSfz")
 279 | 
 280 |     dt = datetime.strptime(x, type)
 281 |     if "%y" not in type and "%Y" not in type:  # year not specified
 282 |         dt = dt.replace(year=datetime.today().year)
 283 | 
 284 |     if is_date and is_time:
 285 |         return dt
 286 |     elif is_date:
 287 |         return dt.date()
 288 |     elif is_time:
 289 |         return dt.time()
 290 |     else:
 291 |         raise ValueError("Datetime not a date nor a time?")
 292 | 
 293 | 
 294 | # ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
 295 | dt_format_to_regex = {
 296 |     "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)",
 297 |     "%A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)",
 298 |     "%w": "[0-6]",
 299 |     "%d": "[0-9]{1,2}",
 300 |     "%b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)",
 301 |     "%B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)",
 302 |     "%m": "[0-9]{1,2}",
 303 |     "%y": "[0-9]{2}",
 304 |     "%Y": "[0-9]{4}",
 305 |     "%H": "[0-9]{1,2}",
 306 |     "%I": "[0-9]{1,2}",
 307 |     "%p": "(?:AM|PM)",
 308 |     "%M": "[0-9]{2}",
 309 |     "%S": "[0-9]{2}",
 310 |     "%f": "[0-9]{1,6}",
 311 |     "%z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?",
 312 |     # "%Z": punt
 313 |     "%j": "[0-9]{1,3}",
 314 |     "%U": "[0-9]{1,2}",
 315 |     "%W": "[0-9]{1,2}",
 316 | }
 317 | 
 318 | # Compile a regular expression pattern that matches any date/time format symbol.
 319 | dt_format_symbols_re = re.compile("|".join(dt_format_to_regex))
 320 | 
 321 | 
 322 | def get_regex_for_datetime_format(format_):
 323 |     """
 324 |     Generate a regex pattern for a given datetime format string.
 325 | 
 326 |     Parameters:
 327 |         format_ (str): The datetime format string.
 328 | 
 329 |     Returns:
 330 |         str: A regex pattern corresponding to the datetime format string.
 331 |     """
 332 |     # Replace all format symbols with their regex patterns.
 333 |     return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)], format_)
 334 | 
 335 | 
 336 | class TooManyFields(ValueError):
 337 |     pass
 338 | 
 339 | 
 340 | class RepeatedNameError(ValueError):
 341 |     pass
 342 | 
 343 | 
 344 | # note: {} are handled separately
 345 | REGEX_SAFETY = re.compile(r"([?\\.[\]()*+^$!|])")
 346 | 
 347 | # allowed field types
 348 | ALLOWED_TYPES = set(list("nbox%fFegwWdDsSl") + ["t" + c for c in "ieahgcts"])
 349 | 
 350 | 
 351 | def extract_format(format, extra_types):
 352 |     """Pull apart the format [[fill]align][sign][0][width][.precision][type]"""
 353 |     fill = align = None
 354 |     if format[0] in "<>=^":
 355 |         align = format[0]
 356 |         format = format[1:]
 357 |     elif len(format) > 1 and format[1] in "<>=^":
 358 |         fill = format[0]
 359 |         align = format[1]
 360 |         format = format[2:]
 361 | 
 362 |     if format.startswith(("+", "-", " ")):
 363 |         format = format[1:]
 364 | 
 365 |     zero = False
 366 |     if format and format[0] == "0":
 367 |         zero = True
 368 |         format = format[1:]
 369 | 
 370 |     width = ""
 371 |     while format:
 372 |         if not format[0].isdigit():
 373 |             break
 374 |         width += format[0]
 375 |         format = format[1:]
 376 | 
 377 |     if format.startswith("."):
 378 |         # Precision isn't needed but we need to capture it so that
 379 |         # the ValueError isn't raised.
 380 |         format = format[1:]  # drop the '.'
 381 |         precision = ""
 382 |         while format:
 383 |             if not format[0].isdigit():
 384 |                 break
 385 |             precision += format[0]
 386 |             format = format[1:]
 387 | 
 388 |     # the rest is the type, if present
 389 |     type = format
 390 |     if (
 391 |         type
 392 |         and type not in ALLOWED_TYPES
 393 |         and type not in extra_types
 394 |         and not any(k in type for k in dt_format_to_regex)
 395 |     ):
 396 |         raise ValueError("format spec %r not recognised" % type)
 397 | 
 398 |     return locals()
 399 | 
 400 | 
 401 | PARSE_RE = re.compile(r"({{|}}|{[\w-]*(?:\.[\w-]+|\[[^]]+])*(?::[^}]+)?})")
 402 | 
 403 | 
 404 | class Parser(object):
 405 |     """Encapsulate a format string that may be used to parse other strings."""
 406 | 
 407 |     def __init__(self, format, extra_types=None, case_sensitive=False):
 408 |         # a mapping of a name as in {hello.world} to a regex-group compatible
 409 |         # name, like hello__world. It's used to prevent the transformation of
 410 |         # name-to-group and group to name to fail subtly, such as in:
 411 |         # hello_.world-> hello___world->hello._world
 412 |         self._group_to_name_map = {}
 413 |         # also store the original field name to group name mapping to allow
 414 |         # multiple instances of a name in the format string
 415 |         self._name_to_group_map = {}
 416 |         # and to sanity check the repeated instances store away the first
 417 |         # field type specification for the named field
 418 |         self._name_types = {}
 419 | 
 420 |         self._format = format
 421 |         if extra_types is None:
 422 |             extra_types = {}
 423 |         self._extra_types = extra_types
 424 |         if case_sensitive:
 425 |             self._re_flags = re.DOTALL
 426 |         else:
 427 |             self._re_flags = re.IGNORECASE | re.DOTALL
 428 |         self._fixed_fields = []
 429 |         self._named_fields = []
 430 |         self._group_index = 0
 431 |         self._type_conversions = {}
 432 |         self._expression = self._generate_expression()
 433 |         self.__search_re = None
 434 |         self.__match_re = None
 435 | 
 436 |         log.debug("format %r -> %r", format, self._expression)
 437 | 
 438 |     def __repr__(self):
 439 |         if len(self._format) > 20:
 440 |             return "<%s %r>" % (self.__class__.__name__, self._format[:17] + "...")
 441 |         return "<%s %r>" % (self.__class__.__name__, self._format)
 442 | 
 443 |     @property
 444 |     def _search_re(self):
 445 |         if self.__search_re is None:
 446 |             try:
 447 |                 self.__search_re = re.compile(self._expression, self._re_flags)
 448 |             except AssertionError:
 449 |                 # access error through sys to keep py3k and backward compat
 450 |                 e = str(sys.exc_info()[1])
 451 |                 if e.endswith("this version only supports 100 named groups"):
 452 |                     raise TooManyFields(
 453 |                         "sorry, you are attempting to parse too many complex fields"
 454 |                     )
 455 |         return self.__search_re
 456 | 
 457 |     @property
 458 |     def _match_re(self):
 459 |         if self.__match_re is None:
 460 |             expression = r"\A%s\Z" % self._expression
 461 |             try:
 462 |                 self.__match_re = re.compile(expression, self._re_flags)
 463 |             except AssertionError:
 464 |                 # access error through sys to keep py3k and backward compat
 465 |                 e = str(sys.exc_info()[1])
 466 |                 if e.endswith("this version only supports 100 named groups"):
 467 |                     raise TooManyFields(
 468 |                         "sorry, you are attempting to parse too many complex fields"
 469 |                     )
 470 |             except re.error:
 471 |                 raise NotImplementedError(
 472 |                     "Group names (e.g. (?P<name>) can "
 473 |                     "cause failure, as they are not escaped properly: '%s'" % expression
 474 |                 )
 475 |         return self.__match_re
 476 | 
 477 |     @property
 478 |     def named_fields(self):
 479 |         return self._named_fields[:]
 480 | 
 481 |     @property
 482 |     def fixed_fields(self):
 483 |         return self._fixed_fields[:]
 484 | 
 485 |     @property
 486 |     def format(self):
 487 |         return self._format
 488 | 
 489 |     def parse(self, string, evaluate_result=True):
 490 |         """Match my format to the string exactly.
 491 | 
 492 |         Return a Result or Match instance or None if there's no match.
 493 |         """
 494 |         m = self._match_re.match(string)
 495 |         if m is None:
 496 |             return None
 497 | 
 498 |         if evaluate_result:
 499 |             return self.evaluate_result(m)
 500 |         else:
 501 |             return Match(self, m)
 502 | 
 503 |     def search(self, string, pos=0, endpos=None, evaluate_result=True):
 504 |         """Search the string for my format.
 505 | 
 506 |         Optionally start the search at "pos" character index and limit the
 507 |         search to a maximum index of endpos - equivalent to
 508 |         search(string[:endpos]).
 509 | 
 510 |         If the ``evaluate_result`` argument is set to ``False`` a
 511 |         Match instance is returned instead of the actual Result instance.
 512 | 
 513 |         Return either a Result instance or None if there's no match.
 514 |         """
 515 |         if endpos is None:
 516 |             endpos = len(string)
 517 |         m = self._search_re.search(string, pos, endpos)
 518 |         if m is None:
 519 |             return None
 520 | 
 521 |         if evaluate_result:
 522 |             return self.evaluate_result(m)
 523 |         else:
 524 |             return Match(self, m)
 525 | 
 526 |     def findall(
 527 |         self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True
 528 |     ):
 529 |         """Search "string" for all occurrences of "format".
 530 | 
 531 |         Optionally start the search at "pos" character index and limit the
 532 |         search to a maximum index of endpos - equivalent to
 533 |         search(string[:endpos]).
 534 | 
 535 |         Returns an iterator that holds Result or Match instances for each format match
 536 |         found.
 537 |         """
 538 |         if endpos is None:
 539 |             endpos = len(string)
 540 |         return ResultIterator(
 541 |             self, string, pos, endpos, evaluate_result=evaluate_result
 542 |         )
 543 | 
 544 |     def _expand_named_fields(self, named_fields):
 545 |         result = {}
 546 |         for field, value in named_fields.items():
 547 |             # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...'
 548 |             n = field.find("[")
 549 |             if n == -1:
 550 |                 basename, subkeys = field, ""
 551 |             else:
 552 |                 basename, subkeys = field[:n], field[n:]
 553 | 
 554 |             # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}}
 555 |             d = result
 556 |             k = basename
 557 | 
 558 |             if subkeys:
 559 |                 for subkey in re.findall(r"\[[^]]+]", subkeys):
 560 |                     d = d.setdefault(k, {})
 561 |                     k = subkey[1:-1]
 562 | 
 563 |             # assign the value to the last key
 564 |             d[k] = value
 565 | 
 566 |         return result
 567 | 
 568 |     def evaluate_result(self, m):
 569 |         """Generate a Result instance for the given regex match object"""
 570 |         # ok, figure the fixed fields we've pulled out and type convert them
 571 |         fixed_fields = list(m.groups())
 572 |         for n in self._fixed_fields:
 573 |             if n in self._type_conversions:
 574 |                 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m)
 575 |         fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields)
 576 | 
 577 |         # grab the named fields, converting where requested
 578 |         groupdict = m.groupdict()
 579 |         named_fields = {}
 580 |         name_map = {}
 581 |         for k in self._named_fields:
 582 |             korig = self._group_to_name_map[k]
 583 |             name_map[korig] = k
 584 |             if k in self._type_conversions:
 585 |                 value = self._type_conversions[k](groupdict[k], m)
 586 |             else:
 587 |                 value = groupdict[k]
 588 | 
 589 |             named_fields[korig] = value
 590 | 
 591 |         # now figure the match spans
 592 |         spans = {n: m.span(name_map[n]) for n in named_fields}
 593 |         spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields))
 594 | 
 595 |         # and that's our result
 596 |         return Result(fixed_fields, self._expand_named_fields(named_fields), spans)
 597 | 
 598 |     def _regex_replace(self, match):
 599 |         return "\\" + match.group(1)
 600 | 
 601 |     def _generate_expression(self):
 602 |         # turn my _format attribute into the _expression attribute
 603 |         e = []
 604 |         for part in PARSE_RE.split(self._format):
 605 |             if not part:
 606 |                 continue
 607 |             elif part == "{{":
 608 |                 e.append(r"\{")
 609 |             elif part == "}}":
 610 |                 e.append(r"\}")
 611 |             elif part[0] == "{" and part[-1] == "}":
 612 |                 # this will be a braces-delimited field to handle
 613 |                 e.append(self._handle_field(part))
 614 |             else:
 615 |                 # just some text to match
 616 |                 e.append(REGEX_SAFETY.sub(self._regex_replace, part))
 617 |         return "".join(e)
 618 | 
 619 |     def _to_group_name(self, field):
 620 |         # return a version of field which can be used as capture group, even
 621 |         # though it might contain '.'
 622 |         group = field.replace(".", "_").replace("[", "_").replace("]", "_").replace("-", "_")
 623 | 
 624 |         # make sure we don't collide ("a.b" colliding with "a_b")
 625 |         n = 1
 626 |         while group in self._group_to_name_map:
 627 |             n += 1
 628 |             if "." in field:
 629 |                 group = field.replace(".", "_" * n)
 630 |             elif "_" in field:
 631 |                 group = field.replace("_", "_" * n)
 632 |             elif "-" in field:
 633 |                 group = field.replace("-", "_" * n)
 634 |             else:
 635 |                 raise KeyError("duplicated group name %r" % (field,))
 636 | 
 637 |         # save off the mapping
 638 |         self._group_to_name_map[group] = field
 639 |         self._name_to_group_map[field] = group
 640 |         return group
 641 | 
 642 |     def _handle_field(self, field):
 643 |         # first: lose the braces
 644 |         field = field[1:-1]
 645 | 
 646 |         # now figure whether this is an anonymous or named field, and whether
 647 |         # there's any format specification
 648 |         format = ""
 649 | 
 650 |         if ":" in field:
 651 |             name, format = field.split(":", 1)
 652 |         else:
 653 |             name = field
 654 | 
 655 |         # This *should* be more flexible, but parsing complicated structures
 656 |         # out of the string is hard (and not necessarily useful) ... and I'm
 657 |         # being lazy. So for now `identifier` is "anything starting with a
 658 |         # letter" and digit args don't get attribute or element stuff.
 659 |         if name and name[0].isalpha():
 660 |             if name in self._name_to_group_map:
 661 |                 if self._name_types[name] != format:
 662 |                     raise RepeatedNameError(
 663 |                         'field type %r for field "%s" '
 664 |                         "does not match previous seen type %r"
 665 |                         % (format, name, self._name_types[name])
 666 |                     )
 667 |                 group = self._name_to_group_map[name]
 668 |                 # match previously-seen value
 669 |                 return r"(?P=%s)" % group
 670 |             else:
 671 |                 group = self._to_group_name(name)
 672 |                 self._name_types[name] = format
 673 |             self._named_fields.append(group)
 674 |             # this will become a group, which must not contain dots
 675 |             wrap = r"(?P<%s>%%s)" % group
 676 |         else:
 677 |             self._fixed_fields.append(self._group_index)
 678 |             wrap = r"(%s)"
 679 |             group = self._group_index
 680 | 
 681 |         # simplest case: no type specifier ({} or {name})
 682 |         if not format:
 683 |             self._group_index += 1
 684 |             return wrap % r".+?"
 685 | 
 686 |         # decode the format specification
 687 |         format = extract_format(format, self._extra_types)
 688 | 
 689 |         # figure type conversions, if any
 690 |         type = format["type"]
 691 |         is_numeric = type and type in "n%fegdobx"
 692 |         conv = self._type_conversions
 693 |         if type in self._extra_types:
 694 |             type_converter = self._extra_types[type]
 695 |             s = getattr(type_converter, "pattern", r".+?")
 696 |             regex_group_count = getattr(type_converter, "regex_group_count", 0)
 697 |             if regex_group_count is None:
 698 |                 regex_group_count = 0
 699 |             self._group_index += regex_group_count
 700 |             conv[group] = convert_first(type_converter)
 701 |         elif type == "n":
 702 |             s = r"\d{1,3}([,.]\d{3})*"
 703 |             self._group_index += 1
 704 |             conv[group] = int_convert(10)
 705 |         elif type == "b":
 706 |             s = r"(0[bB])?[01]+"
 707 |             conv[group] = int_convert(2)
 708 |             self._group_index += 1
 709 |         elif type == "o":
 710 |             s = r"(0[oO])?[0-7]+"
 711 |             conv[group] = int_convert(8)
 712 |             self._group_index += 1
 713 |         elif type == "x":
 714 |             s = r"(0[xX])?[0-9a-fA-F]+"
 715 |             conv[group] = int_convert(16)
 716 |             self._group_index += 1
 717 |         elif type == "%":
 718 |             s = r"\d+(\.\d+)?%"
 719 |             self._group_index += 1
 720 |             conv[group] = percentage
 721 |         elif type == "f":
 722 |             s = r"\d*\.\d+"
 723 |             conv[group] = convert_first(float)
 724 |         elif type == "F":
 725 |             s = r"\d*\.\d+"
 726 |             conv[group] = convert_first(Decimal)
 727 |         elif type == "e":
 728 |             s = r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF"
 729 |             conv[group] = convert_first(float)
 730 |         elif type == "g":
 731 |             s = r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF"
 732 |             self._group_index += 2
 733 |             conv[group] = convert_first(float)
 734 |         elif type == "d":
 735 |             if format.get("width"):
 736 |                 width = r"{1,%s}" % int(format["width"])
 737 |             else:
 738 |                 width = "+"
 739 |             s = r"\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}".format(
 740 |                 w=width
 741 |             )
 742 |             conv[group] = int_convert()
 743 |             # do not specify number base, determine it automatically
 744 |         elif any(k in type for k in dt_format_to_regex):
 745 |             s = get_regex_for_datetime_format(type)
 746 |             conv[group] = partial(strf_date_convert, type=type)
 747 |         elif type == "ti":
 748 |             s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT
 749 |             n = self._group_index
 750 |             conv[group] = partial(date_convert, ymd=n + 1, hms=n + 4, tz=n + 7)
 751 |             self._group_index += 7
 752 |         elif type == "tg":
 753 |             s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?"
 754 |             s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT)
 755 |             n = self._group_index
 756 |             conv[group] = partial(
 757 |                 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9
 758 |             )
 759 |             self._group_index += 9
 760 |         elif type == "ta":
 761 |             s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?"
 762 |             s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT)
 763 |             n = self._group_index
 764 |             conv[group] = partial(
 765 |                 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9
 766 |             )
 767 |             self._group_index += 9
 768 |         elif type == "te":
 769 |             # this will allow microseconds through if they're present, but meh
 770 |             s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s"
 771 |             s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT, TZ_PAT)
 772 |             n = self._group_index
 773 |             conv[group] = partial(date_convert, dmy=n + 3, hms=n + 5, tz=n + 8)
 774 |             self._group_index += 8
 775 |         elif type == "th":
 776 |             # slight flexibility here from the stock Apache format
 777 |             s = r"(\d{1,2}[-/]%s[-/]\d{4}):%s%s" % (MONTHS_PAT, TIME_PAT, TZ_PAT)
 778 |             n = self._group_index
 779 |             conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6)
 780 |             self._group_index += 6
 781 |         elif type == "tc":
 782 |             s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})"
 783 |             s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT)
 784 |             n = self._group_index
 785 |             conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5)
 786 |             self._group_index += 8
 787 |         elif type == "tt":
 788 |             s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT)
 789 |             n = self._group_index
 790 |             conv[group] = partial(date_convert, hms=n + 1, am=n + 4, tz=n + 5)
 791 |             self._group_index += 5
 792 |         elif type == "ts":
 793 |             s = r"%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?" % MONTHS_PAT
 794 |             n = self._group_index
 795 |             conv[group] = partial(date_convert, mm=n + 1, dd=n + 3, hms=n + 5)
 796 |             self._group_index += 5
 797 |         elif type == "l":
 798 |             s = r"[A-Za-z]+"
 799 |         elif type:
 800 |             s = r"\%s+" % type
 801 |         elif format.get("precision"):
 802 |             if format.get("width"):
 803 |                 s = r".{%s,%s}?" % (format["width"], format["precision"])
 804 |             else:
 805 |                 s = r".{1,%s}?" % format["precision"]
 806 |         elif format.get("width"):
 807 |             s = r".{%s,}?" % format["width"]
 808 |         else:
 809 |             s = r".+?"
 810 | 
 811 |         align = format["align"]
 812 |         fill = format["fill"]
 813 | 
 814 |         # handle some numeric-specific things like fill and sign
 815 |         if is_numeric:
 816 |             # prefix with something (align "=" trumps zero)
 817 |             if align == "=":
 818 |                 # special case - align "=" acts like the zero above but with
 819 |                 # configurable fill defaulting to "0"
 820 |                 if not fill:
 821 |                     fill = "0"
 822 |                 s = r"%s*" % fill + s
 823 | 
 824 |             # allow numbers to be prefixed with a sign
 825 |             s = r"[-+ ]?" + s
 826 | 
 827 |         if not fill:
 828 |             fill = " "
 829 | 
 830 |         # Place into a group now - this captures the value we want to keep.
 831 |         # Everything else from now is just padding to be stripped off
 832 |         if wrap:
 833 |             s = wrap % s
 834 |             self._group_index += 1
 835 | 
 836 |         if format["width"]:
 837 |             # all we really care about is that if the format originally
 838 |             # specified a width then there will probably be padding - without
 839 |             # an explicit alignment that'll mean right alignment with spaces
 840 |             # padding
 841 |             if not align:
 842 |                 align = ">"
 843 | 
 844 |         if fill in r".\+?*[](){}^$":
 845 |             fill = "\\" + fill
 846 | 
 847 |         # align "=" has been handled
 848 |         if align == "<":
 849 |             s = "%s%s*" % (s, fill)
 850 |         elif align == ">":
 851 |             s = "%s*%s" % (fill, s)
 852 |         elif align == "^":
 853 |             s = "%s*%s%s*" % (fill, s, fill)
 854 | 
 855 |         return s
 856 | 
 857 | 
 858 | class Result(object):
 859 |     """The result of a parse() or search().
 860 | 
 861 |     Fixed results may be looked up using `result[index]`.
 862 |     Slices of fixed results may also be looked up.
 863 | 
 864 |     Named results may be looked up using `result['name']`.
 865 | 
 866 |     Named results may be tested for existence using `'name' in result`.
 867 |     """
 868 | 
 869 |     def __init__(self, fixed, named, spans):
 870 |         self.fixed = fixed
 871 |         self.named = named
 872 |         self.spans = spans
 873 | 
 874 |     def __getitem__(self, item):
 875 |         if isinstance(item, (int, slice)):
 876 |             return self.fixed[item]
 877 |         return self.named[item]
 878 | 
 879 |     def __repr__(self):
 880 |         return "<%s %r %r>" % (self.__class__.__name__, self.fixed, self.named)
 881 | 
 882 |     def __contains__(self, name):
 883 |         return name in self.named
 884 | 
 885 | 
 886 | class Match(object):
 887 |     """The result of a parse() or search() if no results are generated.
 888 | 
 889 |     This class is only used to expose internal used regex match objects
 890 |     to the user and use them for external Parser.evaluate_result calls.
 891 |     """
 892 | 
 893 |     def __init__(self, parser, match):
 894 |         self.parser = parser
 895 |         self.match = match
 896 | 
 897 |     def evaluate_result(self):
 898 |         """Generate results for this Match"""
 899 |         return self.parser.evaluate_result(self.match)
 900 | 
 901 | 
 902 | class ResultIterator(object):
 903 |     """The result of a findall() operation.
 904 | 
 905 |     Each element is a Result instance.
 906 |     """
 907 | 
 908 |     def __init__(self, parser, string, pos, endpos, evaluate_result=True):
 909 |         self.parser = parser
 910 |         self.string = string
 911 |         self.pos = pos
 912 |         self.endpos = endpos
 913 |         self.evaluate_result = evaluate_result
 914 | 
 915 |     def __iter__(self):
 916 |         return self
 917 | 
 918 |     def __next__(self):
 919 |         m = self.parser._search_re.search(self.string, self.pos, self.endpos)
 920 |         if m is None:
 921 |             raise StopIteration()
 922 |         self.pos = m.end()
 923 | 
 924 |         if self.evaluate_result:
 925 |             return self.parser.evaluate_result(m)
 926 |         else:
 927 |             return Match(self.parser, m)
 928 | 
 929 |     # pre-py3k compat
 930 |     next = __next__
 931 | 
 932 | 
 933 | def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False):
 934 |     """Using "format" attempt to pull values from "string".
 935 | 
 936 |     The format must match the string contents exactly. If the value
 937 |     you're looking for is instead just a part of the string use
 938 |     search().
 939 | 
 940 |     If ``evaluate_result`` is True the return value will be an Result instance with two attributes:
 941 | 
 942 |      .fixed - tuple of fixed-position values from the string
 943 |      .named - dict of named values from the string
 944 | 
 945 |     If ``evaluate_result`` is False the return value will be a Match instance with one method:
 946 | 
 947 |      .evaluate_result() - This will return a Result instance like you would get
 948 |                           with ``evaluate_result`` set to True
 949 | 
 950 |     The default behaviour is to match strings case insensitively. You may match with
 951 |     case by specifying case_sensitive=True.
 952 | 
 953 |     If the format is invalid a ValueError will be raised.
 954 | 
 955 |     See the module documentation for the use of "extra_types".
 956 | 
 957 |     In the case there is no match parse() will return None.
 958 |     """
 959 |     p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
 960 |     return p.parse(string, evaluate_result=evaluate_result)
 961 | 
 962 | 
 963 | def search(
 964 |     format,
 965 |     string,
 966 |     pos=0,
 967 |     endpos=None,
 968 |     extra_types=None,
 969 |     evaluate_result=True,
 970 |     case_sensitive=False,
 971 | ):
 972 |     """Search "string" for the first occurrence of "format".
 973 | 
 974 |     The format may occur anywhere within the string. If
 975 |     instead you wish for the format to exactly match the string
 976 |     use parse().
 977 | 
 978 |     Optionally start the search at "pos" character index and limit the search
 979 |     to a maximum index of endpos - equivalent to search(string[:endpos]).
 980 | 
 981 |     If ``evaluate_result`` is True the return value will be an Result instance with two attributes:
 982 | 
 983 |      .fixed - tuple of fixed-position values from the string
 984 |      .named - dict of named values from the string
 985 | 
 986 |     If ``evaluate_result`` is False the return value will be a Match instance with one method:
 987 | 
 988 |      .evaluate_result() - This will return a Result instance like you would get
 989 |                           with ``evaluate_result`` set to True
 990 | 
 991 |     The default behaviour is to match strings case insensitively. You may match with
 992 |     case by specifying case_sensitive=True.
 993 | 
 994 |     If the format is invalid a ValueError will be raised.
 995 | 
 996 |     See the module documentation for the use of "extra_types".
 997 | 
 998 |     In the case there is no match parse() will return None.
 999 |     """
1000 |     p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
1001 |     return p.search(string, pos, endpos, evaluate_result=evaluate_result)
1002 | 
1003 | 
1004 | def findall(
1005 |     format,
1006 |     string,
1007 |     pos=0,
1008 |     endpos=None,
1009 |     extra_types=None,
1010 |     evaluate_result=True,
1011 |     case_sensitive=False,
1012 | ):
1013 |     """Search "string" for all occurrences of "format".
1014 | 
1015 |     You will be returned an iterator that holds Result instances
1016 |     for each format match found.
1017 | 
1018 |     Optionally start the search at "pos" character index and limit the search
1019 |     to a maximum index of endpos - equivalent to search(string[:endpos]).
1020 | 
1021 |     If ``evaluate_result`` is True each returned Result instance has two attributes:
1022 | 
1023 |      .fixed - tuple of fixed-position values from the string
1024 |      .named - dict of named values from the string
1025 | 
1026 |     If ``evaluate_result`` is False each returned value is a Match instance with one method:
1027 | 
1028 |      .evaluate_result() - This will return a Result instance like you would get
1029 |                           with ``evaluate_result`` set to True
1030 | 
1031 |     The default behaviour is to match strings case insensitively. You may match with
1032 |     case by specifying case_sensitive=True.
1033 | 
1034 |     If the format is invalid a ValueError will be raised.
1035 | 
1036 |     See the module documentation for the use of "extra_types".
1037 |     """
1038 |     p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
1039 |     return p.findall(string, pos, endpos, evaluate_result=evaluate_result)
1040 | 
1041 | 
1042 | def compile(format, extra_types=None, case_sensitive=False):
1043 |     """Create a Parser instance to parse "format".
1044 | 
1045 |     The resultant Parser has a method .parse(string) which
1046 |     behaves in the same manner as parse(format, string).
1047 | 
1048 |     The default behaviour is to match strings case insensitively. You may match with
1049 |     case by specifying case_sensitive=True.
1050 | 
1051 |     Use this function if you intend to parse many strings
1052 |     with the same format.
1053 | 
1054 |     See the module documentation for the use of "extra_types".
1055 | 
1056 |     Returns a Parser instance.
1057 |     """
1058 |     return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
1059 | 
1060 | 
1061 | # Copyright (c) 2012-2020 Richard Jones <richard@python.org>
1062 | #
1063 | # Permission is hereby granted, free of charge, to any person obtaining a copy
1064 | # of this software and associated documentation files (the "Software"), to deal
1065 | # in the Software without restriction, including without limitation the rights
1066 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1067 | # copies of the Software, and to permit persons to whom the Software is
1068 | # furnished to do so, subject to the following conditions:
1069 | #
1070 | #  The above copyright notice and this permission notice shall be included in
1071 | #  all copies or substantial portions of the Software.
1072 | #
1073 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1074 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1075 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1076 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1077 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1078 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1079 | # SOFTWARE.
1080 | 
1081 | # vim: set filetype=python ts=4 sw=4 et si tw=75
1082 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.2"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "parse"
 7 | dynamic = ["version"]
 8 | readme = "README.rst"
 9 | description = "parse() is the opposite of format()"
10 | license = {file = "LICENSE"}
11 | classifiers = [
12 |     "Environment :: Web Environment",
13 |     "Intended Audience :: Developers",
14 |     "Programming Language :: Python :: 3",
15 |     "Topic :: Software Development :: Code Generators",
16 |     "Topic :: Software Development :: Libraries :: Python Modules",
17 |     "License :: OSI Approved :: MIT License",
18 | ]
19 | 
20 | [[project.authors]]
21 | name = "Richard Jones"
22 | email = "richard@python.org"
23 | 
24 | [[project.maintainers]]
25 | name = "Wim Glenn"
26 | email = "hey@wimglenn.com"
27 | 
28 | [project.urls]
29 | homepage = "https://github.com/r1chardj0n3s/parse"
30 | 
31 | [tool.setuptools]
32 | py-modules = ["parse"]
33 | 
34 | [tool.setuptools.dynamic]
35 | version = {attr = "parse.__version__"}
36 | 
37 | [tool.distutils.bdist_wheel]
38 | universal = true
39 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | 


--------------------------------------------------------------------------------
/tests/test_bugs.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | from datetime import datetime
  3 | 
  4 | import parse
  5 | 
  6 | 
  7 | def test_tz_compare_to_None():
  8 |     utc = parse.FixedTzOffset(0, "UTC")
  9 |     assert utc is not None
 10 |     assert utc != "spam"
 11 | 
 12 | 
 13 | def test_named_date_issue7():
 14 |     r = parse.parse("on {date:ti}", "on 2012-09-17")
 15 |     assert r["date"] == datetime(2012, 9, 17, 0, 0, 0)
 16 | 
 17 |     # fix introduced regressions
 18 |     r = parse.parse("a {:ti} b", "a 1997-07-16T19:20 b")
 19 |     assert r[0] == datetime(1997, 7, 16, 19, 20, 0)
 20 |     r = parse.parse("a {:ti} b", "a 1997-07-16T19:20Z b")
 21 |     utc = parse.FixedTzOffset(0, "UTC")
 22 |     assert r[0] == datetime(1997, 7, 16, 19, 20, tzinfo=utc)
 23 |     r = parse.parse("a {date:ti} b", "a 1997-07-16T19:20Z b")
 24 |     assert r["date"] == datetime(1997, 7, 16, 19, 20, tzinfo=utc)
 25 | 
 26 | 
 27 | def test_dotted_type_conversion_pull_8():
 28 |     # test pull request 8 which fixes type conversion related to dotted
 29 |     # names being applied correctly
 30 |     r = parse.parse("{a.b:d}", "1")
 31 |     assert r["a.b"] == 1
 32 |     r = parse.parse("{a_b:w} {a.b:d}", "1 2")
 33 |     assert r["a_b"] == "1"
 34 |     assert r["a.b"] == 2
 35 | 
 36 | 
 37 | def test_pm_overflow_issue16():
 38 |     r = parse.parse("Meet at {:tg}", "Meet at 1/2/2011 12:45 PM")
 39 |     assert r[0] == datetime(2011, 2, 1, 12, 45)
 40 | 
 41 | 
 42 | def test_pm_handling_issue57():
 43 |     r = parse.parse("Meet at {:tg}", "Meet at 1/2/2011 12:15 PM")
 44 |     assert r[0] == datetime(2011, 2, 1, 12, 15)
 45 |     r = parse.parse("Meet at {:tg}", "Meet at 1/2/2011 12:15 AM")
 46 |     assert r[0] == datetime(2011, 2, 1, 0, 15)
 47 | 
 48 | 
 49 | def test_user_type_with_group_count_issue60():
 50 |     @parse.with_pattern(r"((\w+))", regex_group_count=2)
 51 |     def parse_word_and_covert_to_uppercase(text):
 52 |         return text.strip().upper()
 53 | 
 54 |     @parse.with_pattern(r"\d+")
 55 |     def parse_number(text):
 56 |         return int(text)
 57 | 
 58 |     # -- CASE: Use named (OK)
 59 |     type_map = {"Name": parse_word_and_covert_to_uppercase, "Number": parse_number}
 60 |     r = parse.parse(
 61 |         "Hello {name:Name} {number:Number}", "Hello Alice 42", extra_types=type_map
 62 |     )
 63 |     assert r.named == {"name": "ALICE", "number": 42}
 64 | 
 65 |     # -- CASE: Use unnamed/fixed (problematic)
 66 |     r = parse.parse("Hello {:Name} {:Number}", "Hello Alice 42", extra_types=type_map)
 67 |     assert r[0] == "ALICE"
 68 |     assert r[1] == 42
 69 | 
 70 | 
 71 | def test_unmatched_brace_doesnt_match():
 72 |     r = parse.parse("{who.txt", "hello")
 73 |     assert r is None
 74 | 
 75 | 
 76 | def test_pickling_bug_110():
 77 |     p = parse.compile("{a:d}")
 78 |     # prior to the fix, this would raise an AttributeError
 79 |     pickle.dumps(p)
 80 | 
 81 | 
 82 | def test_unused_centered_alignment_bug():
 83 |     r = parse.parse("{:^2S}", "foo")
 84 |     assert r[0] == "foo"
 85 |     r = parse.search("{:^2S}", "foo")
 86 |     assert r[0] == "foo"
 87 | 
 88 |     # specifically test for the case in issue #118 as well
 89 |     r = parse.parse("Column {:d}:{:^}", "Column 1: Timestep")
 90 |     assert r[0] == 1
 91 |     assert r[1] == "Timestep"
 92 | 
 93 | 
 94 | def test_unused_left_alignment_bug():
 95 |     r = parse.parse("{:<2S}", "foo")
 96 |     assert r[0] == "foo"
 97 |     r = parse.search("{:<2S}", "foo")
 98 |     assert r[0] == "foo"
 99 | 
100 | 
101 | def test_match_trailing_newline():
102 |     r = parse.parse("{}", "test\n")
103 |     assert r[0] == "test\n"
104 | 


--------------------------------------------------------------------------------
/tests/test_findall.py:
--------------------------------------------------------------------------------
 1 | import parse
 2 | 
 3 | 
 4 | def test_findall():
 5 |     s = "".join(
 6 |         r.fixed[0] for r in parse.findall(">{}<", "<p>some <b>bold</b> text</p>")
 7 |     )
 8 |     assert s == "some bold text"
 9 | 
10 | 
11 | def test_no_evaluate_result():
12 |     s = "".join(
13 |         m.evaluate_result().fixed[0]
14 |         for m in parse.findall(
15 |             ">{}<", "<p>some <b>bold</b> text</p>", evaluate_result=False
16 |         )
17 |     )
18 |     assert s == "some bold text"
19 | 
20 | 
21 | def test_case_sensitivity():
22 |     l = [r.fixed[0] for r in parse.findall("x({})x", "X(hi)X")]
23 |     assert l == ["hi"]
24 | 
25 |     l = [r.fixed[0] for r in parse.findall("x({})x", "X(hi)X", case_sensitive=True)]
26 |     assert l == []
27 | 


--------------------------------------------------------------------------------
/tests/test_parse.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | from datetime import date
  4 | from datetime import datetime
  5 | from datetime import time
  6 | 
  7 | import pytest
  8 | 
  9 | import parse
 10 | 
 11 | 
 12 | def test_no_match():
 13 |     # string does not match format
 14 |     assert parse.parse("{{hello}}", "hello") is None
 15 | 
 16 | 
 17 | def test_nothing():
 18 |     # do no actual parsing
 19 |     r = parse.parse("{{hello}}", "{hello}")
 20 |     assert r.fixed == ()
 21 |     assert r.named == {}
 22 | 
 23 | 
 24 | def test_no_evaluate_result():
 25 |     # pull a fixed value out of string
 26 |     match = parse.parse("hello {}", "hello world", evaluate_result=False)
 27 |     r = match.evaluate_result()
 28 |     assert r.fixed == ("world",)
 29 | 
 30 | 
 31 | def test_regular_expression():
 32 |     # match an actual regular expression
 33 |     s = r"^(hello\s[wW]{}!+.*)$"
 34 |     e = s.replace("{}", "orld")
 35 |     r = parse.parse(s, e)
 36 |     assert r.fixed == ("orld",)
 37 |     e = s.replace("{}", ".*?")
 38 |     r = parse.parse(s, e)
 39 |     assert r.fixed == (".*?",)
 40 | 
 41 | 
 42 | def test_question_mark():
 43 |     # issue9: make sure a ? in the parse string is handled correctly
 44 |     r = parse.parse('"{}"?', '"teststr"?')
 45 |     assert r[0] == "teststr"
 46 | 
 47 | 
 48 | def test_pipe():
 49 |     # issue22: make sure a | in the parse string is handled correctly
 50 |     r = parse.parse("| {}", "| teststr")
 51 |     assert r[0] == "teststr"
 52 | 
 53 | 
 54 | def test_unicode():
 55 |     # issue29: make sure unicode is parsable
 56 |     r = parse.parse("{}", "t€ststr")
 57 |     assert r[0] == "t€ststr"
 58 | 
 59 | 
 60 | def test_hexadecimal():
 61 |     # issue42: make sure bare hexadecimal isn't matched as "digits"
 62 |     r = parse.parse("{:d}", "abcdef")
 63 |     assert r is None
 64 | 
 65 | 
 66 | def test_fixed():
 67 |     # pull a fixed value out of string
 68 |     r = parse.parse("hello {}", "hello world")
 69 |     assert r.fixed == ("world",)
 70 | 
 71 | 
 72 | def test_left():
 73 |     # pull left-aligned text out of string
 74 |     r = parse.parse("{:<} world", "hello       world")
 75 |     assert r.fixed == ("hello",)
 76 | 
 77 | 
 78 | def test_right():
 79 |     # pull right-aligned text out of string
 80 |     r = parse.parse("hello {:>}", "hello       world")
 81 |     assert r.fixed == ("world",)
 82 | 
 83 | 
 84 | def test_center():
 85 |     # pull center-aligned text out of string
 86 |     r = parse.parse("hello {:^} world", "hello  there     world")
 87 |     assert r.fixed == ("there",)
 88 | 
 89 | 
 90 | def test_typed():
 91 |     # pull a named, typed values out of string
 92 |     r = parse.parse("hello {:d} {:w}", "hello 12 people")
 93 |     assert r.fixed == (12, "people")
 94 |     r = parse.parse("hello {:w} {:w}", "hello 12 people")
 95 |     assert r.fixed == ("12", "people")
 96 | 
 97 | 
 98 | def test_sign():
 99 |     # sign is ignored
100 |     r = parse.parse("Pi = {:.7f}", "Pi = 3.1415926")
101 |     assert r.fixed == (3.1415926,)
102 |     r = parse.parse("Pi = {:+.7f}", "Pi = 3.1415926")
103 |     assert r.fixed == (3.1415926,)
104 |     r = parse.parse("Pi = {:-.7f}", "Pi = 3.1415926")
105 |     assert r.fixed == (3.1415926,)
106 |     r = parse.parse("Pi = {: .7f}", "Pi = 3.1415926")
107 |     assert r.fixed == (3.1415926,)
108 | 
109 | 
110 | def test_precision():
111 |     # pull a float out of a string
112 |     r = parse.parse("Pi = {:.7f}", "Pi = 3.1415926")
113 |     assert r.fixed == (3.1415926,)
114 |     r = parse.parse("Pi/10 = {:8.5f}", "Pi/10 =  0.31415")
115 |     assert r.fixed == (0.31415,)
116 |     # float may have not leading zero
117 |     r = parse.parse("Pi/10 = {:8.5f}", "Pi/10 =  .31415")
118 |     assert r.fixed == (0.31415,)
119 |     r = parse.parse("Pi/10 = {:8.5f}", "Pi/10 = -.31415")
120 |     assert r.fixed == (-0.31415,)
121 | 
122 | 
123 | def test_custom_type():
124 |     # use a custom type
125 |     r = parse.parse(
126 |         "{:shouty} {:spam}",
127 |         "hello world",
128 |         {"shouty": lambda s: s.upper(), "spam": lambda s: "".join(reversed(s))},
129 |     )
130 |     assert r.fixed == ("HELLO", "dlrow")
131 |     r = parse.parse("{:d}", "12", {"d": lambda s: int(s) * 2})
132 |     assert r.fixed == (24,)
133 |     r = parse.parse("{:d}", "12")
134 |     assert r.fixed == (12,)
135 | 
136 | 
137 | def test_typed_fail():
138 |     # pull a named, typed values out of string
139 |     assert parse.parse("hello {:d} {:w}", "hello people 12") is None
140 | 
141 | 
142 | def test_named():
143 |     # pull a named value out of string
144 |     r = parse.parse("hello {name}", "hello world")
145 |     assert r.named == {"name": "world"}
146 | 
147 | 
148 | def test_named_repeated():
149 |     # test a name may be repeated
150 |     r = parse.parse("{n} {n}", "x x")
151 |     assert r.named == {"n": "x"}
152 | 
153 | 
154 | def test_named_repeated_type():
155 |     # test a name may be repeated with type conversion
156 |     r = parse.parse("{n:d} {n:d}", "1 1")
157 |     assert r.named == {"n": 1}
158 | 
159 | 
160 | def test_named_repeated_fail_value():
161 |     # test repeated name fails if value mismatches
162 |     r = parse.parse("{n} {n}", "x y")
163 |     assert r is None
164 | 
165 | 
166 | def test_named_repeated_type_fail_value():
167 |     # test repeated name with type conversion fails if value mismatches
168 |     r = parse.parse("{n:d} {n:d}", "1 2")
169 |     assert r is None
170 | 
171 | 
172 | def test_named_repeated_type_mismatch():
173 |     # test repeated name with mismatched type
174 |     with pytest.raises(parse.RepeatedNameError):
175 |         parse.compile("{n:d} {n:w}")
176 | 
177 | 
178 | def test_mixed():
179 |     # pull a fixed and named values out of string
180 |     r = parse.parse("hello {} {name} {} {spam}", "hello world and other beings")
181 |     assert r.fixed == ("world", "other")
182 |     assert r.named == {"name": "and", "spam": "beings"}
183 | 
184 | 
185 | def test_named_typed():
186 |     # pull a named, typed values out of string
187 |     r = parse.parse("hello {number:d} {things}", "hello 12 people")
188 |     assert r.named == {"number": 12, "things": "people"}
189 |     r = parse.parse("hello {number:w} {things}", "hello 12 people")
190 |     assert r.named == {"number": "12", "things": "people"}
191 | 
192 | 
193 | def test_named_aligned_typed():
194 |     # pull a named, typed values out of string
195 |     r = parse.parse("hello {number:<d} {things}", "hello 12      people")
196 |     assert r.named == {"number": 12, "things": "people"}
197 |     r = parse.parse("hello {number:>d} {things}", "hello      12 people")
198 |     assert r.named == {"number": 12, "things": "people"}
199 |     r = parse.parse("hello {number:^d} {things}", "hello      12      people")
200 |     assert r.named == {"number": 12, "things": "people"}
201 | 
202 | 
203 | def test_multiline():
204 |     r = parse.parse("hello\n{}\nworld", "hello\nthere\nworld")
205 |     assert r.fixed[0] == "there"
206 | 
207 | 
208 | def test_spans():
209 |     # test the string sections our fields come from
210 |     string = "hello world"
211 |     r = parse.parse("hello {}", string)
212 |     assert r.spans == {0: (6, 11)}
213 |     start, end = r.spans[0]
214 |     assert string[start:end] == r.fixed[0]
215 | 
216 |     string = "hello     world"
217 |     r = parse.parse("hello {:>}", string)
218 |     assert r.spans == {0: (10, 15)}
219 |     start, end = r.spans[0]
220 |     assert string[start:end] == r.fixed[0]
221 | 
222 |     string = "hello 0x12 world"
223 |     r = parse.parse("hello {val:x} world", string)
224 |     assert r.spans == {"val": (6, 10)}
225 |     start, end = r.spans["val"]
226 |     assert string[start:end] == "0x%x" % r.named["val"]
227 | 
228 |     string = "hello world and other beings"
229 |     r = parse.parse("hello {} {name} {} {spam}", string)
230 |     assert r.spans == {0: (6, 11), "name": (12, 15), 1: (16, 21), "spam": (22, 28)}
231 | 
232 | 
233 | def test_numbers():
234 |     # pull a numbers out of a string
235 |     def y(fmt, s, e, str_equals=False):
236 |         p = parse.compile(fmt)
237 |         r = p.parse(s)
238 |         assert r is not None
239 |         r = r.fixed[0]
240 |         if str_equals:
241 |             assert str(r) == str(e)
242 |         else:
243 |             assert r == e
244 | 
245 |     def n(fmt, s, e):
246 |         assert parse.parse(fmt, s) is None
247 | 
248 |     y("a {:d} b", "a 0 b", 0)
249 |     y("a {:d} b", "a 12 b", 12)
250 |     y("a {:5d} b", "a    12 b", 12)
251 |     y("a {:5d} b", "a   -12 b", -12)
252 |     y("a {:d} b", "a -12 b", -12)
253 |     y("a {:d} b", "a +12 b", 12)
254 |     y("a {:d} b", "a  12 b", 12)
255 |     y("a {:d} b", "a 0b1000 b", 8)
256 |     y("a {:d} b", "a 0o1000 b", 512)
257 |     y("a {:d} b", "a 0x1000 b", 4096)
258 |     y("a {:d} b", "a 0xabcdef b", 0xABCDEF)
259 | 
260 |     y("a {:%} b", "a 100% b", 1)
261 |     y("a {:%} b", "a 50% b", 0.5)
262 |     y("a {:%} b", "a 50.1% b", 0.501)
263 | 
264 |     y("a {:n} b", "a 100 b", 100)
265 |     y("a {:n} b", "a 1,000 b", 1000)
266 |     y("a {:n} b", "a 1.000 b", 1000)
267 |     y("a {:n} b", "a -1,000 b", -1000)
268 |     y("a {:n} b", "a 10,000 b", 10000)
269 |     y("a {:n} b", "a 100,000 b", 100000)
270 |     n("a {:n} b", "a 100,00 b", None)
271 |     y("a {:n} b", "a 100.000 b", 100000)
272 |     y("a {:n} b", "a 1.000.000 b", 1000000)
273 | 
274 |     y("a {:f} b", "a 12.0 b", 12.0)
275 |     y("a {:f} b", "a -12.1 b", -12.1)
276 |     y("a {:f} b", "a +12.1 b", 12.1)
277 |     y("a {:f} b", "a .121 b", 0.121)
278 |     y("a {:f} b", "a -.121 b", -0.121)
279 |     n("a {:f} b", "a 12 b", None)
280 | 
281 |     y("a {:e} b", "a 1.0e10 b", 1.0e10)
282 |     y("a {:e} b", "a .0e10 b", 0.0e10)
283 |     y("a {:e} b", "a 1.0E10 b", 1.0e10)
284 |     y("a {:e} b", "a 1.10000e10 b", 1.1e10)
285 |     y("a {:e} b", "a 1.0e-10 b", 1.0e-10)
286 |     y("a {:e} b", "a 1.0e+10 b", 1.0e10)
287 |     # can't actually test this one on values 'cos nan != nan
288 |     y("a {:e} b", "a nan b", float("nan"), str_equals=True)
289 |     y("a {:e} b", "a NAN b", float("nan"), str_equals=True)
290 |     y("a {:e} b", "a inf b", float("inf"))
291 |     y("a {:e} b", "a +inf b", float("inf"))
292 |     y("a {:e} b", "a -inf b", float("-inf"))
293 |     y("a {:e} b", "a INF b", float("inf"))
294 |     y("a {:e} b", "a +INF b", float("inf"))
295 |     y("a {:e} b", "a -INF b", float("-inf"))
296 | 
297 |     y("a {:g} b", "a 1 b", 1)
298 |     y("a {:g} b", "a 1e10 b", 1e10)
299 |     y("a {:g} b", "a 1.0e10 b", 1.0e10)
300 |     y("a {:g} b", "a 1.0E10 b", 1.0e10)
301 | 
302 |     y("a {:b} b", "a 1000 b", 8)
303 |     y("a {:b} b", "a 0b1000 b", 8)
304 |     y("a {:o} b", "a 12345670 b", int("12345670", 8))
305 |     y("a {:o} b", "a 0o12345670 b", int("12345670", 8))
306 |     y("a {:x} b", "a 1234567890abcdef b", 0x1234567890ABCDEF)
307 |     y("a {:x} b", "a 1234567890ABCDEF b", 0x1234567890ABCDEF)
308 |     y("a {:x} b", "a 0x1234567890abcdef b", 0x1234567890ABCDEF)
309 |     y("a {:x} b", "a 0x1234567890ABCDEF b", 0x1234567890ABCDEF)
310 | 
311 |     y("a {:05d} b", "a 00001 b", 1)
312 |     y("a {:05d} b", "a -00001 b", -1)
313 |     y("a {:05d} b", "a +00001 b", 1)
314 |     y("a {:02d} b", "a 10 b", 10)
315 | 
316 |     y("a {:=d} b", "a 000012 b", 12)
317 |     y("a {:x=5d} b", "a xxx12 b", 12)
318 |     y("a {:x=5d} b", "a -xxx12 b", -12)
319 | 
320 |     # Test that hex numbers that ambiguously start with 0b / 0B are parsed correctly
321 |     # See issue #65 (https://github.com/r1chardj0n3s/parse/issues/65)
322 |     y("a {:x} b", "a 0B b", 0xB)
323 |     y("a {:x} b", "a 0B1 b", 0xB1)
324 |     y("a {:x} b", "a 0b b", 0xB)
325 |     y("a {:x} b", "a 0b1 b", 0xB1)
326 | 
327 |     # Test that number signs are understood correctly
328 |     y("a {:d} b", "a -0o10 b", -8)
329 |     y("a {:d} b", "a -0b1010 b", -10)
330 |     y("a {:d} b", "a -0x1010 b", -0x1010)
331 |     y("a {:o} b", "a -10 b", -8)
332 |     y("a {:b} b", "a -1010 b", -10)
333 |     y("a {:x} b", "a -1010 b", -0x1010)
334 |     y("a {:d} b", "a +0o10 b", 8)
335 |     y("a {:d} b", "a +0b1010 b", 10)
336 |     y("a {:d} b", "a +0x1010 b", 0x1010)
337 |     y("a {:o} b", "a +10 b", 8)
338 |     y("a {:b} b", "a +1010 b", 10)
339 |     y("a {:x} b", "a +1010 b", 0x1010)
340 | 
341 | 
342 | def test_two_datetimes():
343 |     r = parse.parse("a {:ti} {:ti} b", "a 1997-07-16 2012-08-01 b")
344 |     assert len(r.fixed) == 2
345 |     assert r[0] == datetime(1997, 7, 16)
346 |     assert r[1] == datetime(2012, 8, 1)
347 | 
348 | 
349 | def test_flexible_datetimes():
350 |     r = parse.parse("a {:%Y-%m-%d} b", "a 1997-07-16 b")
351 |     assert len(r.fixed) == 1
352 |     assert r[0] == date(1997, 7, 16)
353 | 
354 |     r = parse.parse("a {:%Y-%b-%d} b", "a 1997-Feb-16 b")
355 |     assert len(r.fixed) == 1
356 |     assert r[0] == date(1997, 2, 16)
357 | 
358 |     r = parse.parse("a {:%Y-%b-%d} {:d} b", "a 1997-Feb-16 8 b")
359 |     assert len(r.fixed) == 2
360 |     assert r[0] == date(1997, 2, 16)
361 | 
362 |     r = parse.parse("a {my_date:%Y-%b-%d} {num:d} b", "a 1997-Feb-16 8 b")
363 |     assert (r.named["my_date"]) == date(1997, 2, 16)
364 |     assert (r.named["num"]) == 8
365 | 
366 |     r = parse.parse("a {:%Y-%B-%d} b", "a 1997-February-16 b")
367 |     assert r[0] == date(1997, 2, 16)
368 | 
369 |     r = parse.parse("a {:%Y%m%d} b", "a 19970716 b")
370 |     assert r[0] == date(1997, 7, 16)
371 | 
372 | 
373 | def test_flexible_datetime_with_colon():
374 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S}", "2023-11-21 13:23:27")
375 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27)
376 | 
377 | 
378 | def test_datetime_with_various_subsecond_precision():
379 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S.%f}", "2023-11-21 13:23:27.123456")
380 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, 123456)
381 | 
382 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S.%f}", "2023-11-21 13:23:27.12345")
383 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, 123450)
384 | 
385 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S.%f}", "2023-11-21 13:23:27.1234")
386 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, 123400)
387 | 
388 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S.%f}", "2023-11-21 13:23:27.123")
389 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, 123000)
390 | 
391 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S.%f}", "2023-11-21 13:23:27.0")
392 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, 0)
393 | 
394 | 
395 | @pytest.mark.skipif(
396 |     sys.version_info[0] < 3, reason="Python 3+ required for timezone support"
397 | )
398 | def test_flexible_datetime_with_timezone():
399 |     from datetime import timezone
400 | 
401 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +0000")
402 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)
403 | 
404 | 
405 | @pytest.mark.skipif(
406 |     sys.version_info[0] < 3, reason="Python 3+ required for timezone support"
407 | )
408 | def test_flexible_datetime_with_timezone_that_has_colons():
409 |     from datetime import timezone
410 | 
411 |     r = parse.parse("{dt:%Y-%m-%d %H:%M:%S %z}", "2023-11-21 13:23:27 +00:00:00")
412 |     assert r.named["dt"] == datetime(2023, 11, 21, 13, 23, 27, tzinfo=timezone.utc)
413 | 
414 | 
415 | def test_flexible_time():
416 |     r = parse.parse("a {time:%H:%M:%S} b", "a 13:23:27 b")
417 |     assert r.named["time"] == time(13, 23, 27)
418 | 
419 | 
420 | def test_flexible_time_no_hour():
421 |     r = parse.parse("a {time:%M:%S} b", "a 23:27 b")
422 |     assert r.named["time"] == time(0, 23, 27)
423 | 
424 | 
425 | def test_flexible_time_ms():
426 |     r = parse.parse("a {time:%M:%S:%f} b", "a 23:27:123456 b")
427 |     assert r.named["time"] == time(0, 23, 27, 123456)
428 | 
429 | 
430 | def test_flexible_dates_single_digit():
431 |     r = parse.parse("{dt:%Y/%m/%d}", "2023/1/1")
432 |     assert r.named["dt"] == date(2023, 1, 1)
433 | 
434 | 
435 | def test_flexible_dates_j():
436 |     r = parse.parse("{dt:%Y/%j}", "2023/9")
437 |     assert r.named["dt"] == date(2023, 1, 9)
438 | 
439 |     r = parse.parse("{dt:%Y/%j}", "2023/009")
440 |     assert r.named["dt"] == date(2023, 1, 9)
441 | 
442 | 
443 | def test_flexible_dates_year_current_year_inferred():
444 |     r = parse.parse("{dt:%j}", "9")
445 |     assert r.named["dt"] == date(datetime.today().year, 1, 9)
446 | 
447 | 
448 | def test_datetimes():
449 |     def y(fmt, s, e, tz=None):
450 |         p = parse.compile(fmt)
451 |         r = p.parse(s)
452 |         assert r is not None
453 |         r = r.fixed[0]
454 |         assert r == e
455 |         assert tz is None or r.tzinfo == tz
456 | 
457 |     utc = parse.FixedTzOffset(0, "UTC")
458 |     assert repr(utc) == "<FixedTzOffset UTC 0:00:00>"
459 |     aest = parse.FixedTzOffset(10 * 60, "+1000")
460 |     tz60 = parse.FixedTzOffset(60, "+01:00")
461 | 
462 |     # ISO 8660 variants
463 |     # YYYY-MM-DD (eg 1997-07-16)
464 |     y("a {:ti} b", "a 1997-07-16 b", datetime(1997, 7, 16))
465 | 
466 |     # YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
467 |     y("a {:ti} b", "a 1997-07-16 19:20 b", datetime(1997, 7, 16, 19, 20, 0))
468 |     y("a {:ti} b", "a 1997-07-16T19:20 b", datetime(1997, 7, 16, 19, 20, 0))
469 |     y(
470 |         "a {:ti} b",
471 |         "a 1997-07-16T19:20Z b",
472 |         datetime(1997, 7, 16, 19, 20, tzinfo=utc),
473 |     )
474 |     y(
475 |         "a {:ti} b",
476 |         "a 1997-07-16T19:20+0100 b",
477 |         datetime(1997, 7, 16, 19, 20, tzinfo=tz60),
478 |     )
479 |     y(
480 |         "a {:ti} b",
481 |         "a 1997-07-16T19:20+01:00 b",
482 |         datetime(1997, 7, 16, 19, 20, tzinfo=tz60),
483 |     )
484 |     y(
485 |         "a {:ti} b",
486 |         "a 1997-07-16T19:20 +01:00 b",
487 |         datetime(1997, 7, 16, 19, 20, tzinfo=tz60),
488 |     )
489 | 
490 |     # YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
491 |     y("a {:ti} b", "a 1997-07-16 19:20:30 b", datetime(1997, 7, 16, 19, 20, 30))
492 |     y("a {:ti} b", "a 1997-07-16T19:20:30 b", datetime(1997, 7, 16, 19, 20, 30))
493 |     y(
494 |         "a {:ti} b",
495 |         "a 1997-07-16T19:20:30Z b",
496 |         datetime(1997, 7, 16, 19, 20, 30, tzinfo=utc),
497 |     )
498 |     y(
499 |         "a {:ti} b",
500 |         "a 1997-07-16T19:20:30+01:00 b",
501 |         datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60),
502 |     )
503 |     y(
504 |         "a {:ti} b",
505 |         "a 1997-07-16T19:20:30 +01:00 b",
506 |         datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60),
507 |     )
508 | 
509 |     # YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
510 |     y(
511 |         "a {:ti} b",
512 |         "a 1997-07-16 19:20:30.500000 b",
513 |         datetime(1997, 7, 16, 19, 20, 30, 500000),
514 |     )
515 |     y(
516 |         "a {:ti} b",
517 |         "a 1997-07-16T19:20:30.500000 b",
518 |         datetime(1997, 7, 16, 19, 20, 30, 500000),
519 |     )
520 |     y(
521 |         "a {:ti} b",
522 |         "a 1997-07-16T19:20:30.5Z b",
523 |         datetime(1997, 7, 16, 19, 20, 30, 500000, tzinfo=utc),
524 |     )
525 |     y(
526 |         "a {:ti} b",
527 |         "a 1997-07-16T19:20:30.5+01:00 b",
528 |         datetime(1997, 7, 16, 19, 20, 30, 500000, tzinfo=tz60),
529 |     )
530 | 
531 |     aest_d = datetime(2011, 11, 21, 10, 21, 36, tzinfo=aest)
532 |     dt = datetime(2011, 11, 21, 10, 21, 36)
533 |     dt00 = datetime(2011, 11, 21, 10, 21)
534 |     d = datetime(2011, 11, 21)
535 | 
536 |     # te   RFC2822 e-mail format        datetime
537 |     y("a {:te} b", "a Mon, 21 Nov 2011 10:21:36 +1000 b", aest_d)
538 |     y("a {:te} b", "a Mon, 21 Nov 2011 10:21:36 +10:00 b", aest_d)
539 |     y("a {:te} b", "a 21 Nov 2011 10:21:36 +1000 b", aest_d)
540 | 
541 |     # tg   global (day/month) format datetime
542 |     y("a {:tg} b", "a 21/11/2011 10:21:36 AM +1000 b", aest_d)
543 |     y("a {:tg} b", "a 21/11/2011 10:21:36 AM +10:00 b", aest_d)
544 |     y("a {:tg} b", "a 21-11-2011 10:21:36 AM +1000 b", aest_d)
545 |     y("a {:tg} b", "a 21/11/2011 10:21:36 +1000 b", aest_d)
546 |     y("a {:tg} b", "a 21/11/2011 10:21:36 b", dt)
547 |     y("a {:tg} b", "a 21/11/2011 10:21 b", dt00)
548 |     y("a {:tg} b", "a 21-11-2011 b", d)
549 |     y("a {:tg} b", "a 21-Nov-2011 10:21:36 AM +1000 b", aest_d)
550 |     y("a {:tg} b", "a 21-November-2011 10:21:36 AM +1000 b", aest_d)
551 | 
552 |     # ta   US (month/day) format     datetime
553 |     y("a {:ta} b", "a 11/21/2011 10:21:36 AM +1000 b", aest_d)
554 |     y("a {:ta} b", "a 11/21/2011 10:21:36 AM +10:00 b", aest_d)
555 |     y("a {:ta} b", "a 11-21-2011 10:21:36 AM +1000 b", aest_d)
556 |     y("a {:ta} b", "a 11/21/2011 10:21:36 +1000 b", aest_d)
557 |     y("a {:ta} b", "a 11/21/2011 10:21:36 b", dt)
558 |     y("a {:ta} b", "a 11/21/2011 10:21 b", dt00)
559 |     y("a {:ta} b", "a 11-21-2011 b", d)
560 |     y("a {:ta} b", "a Nov-21-2011 10:21:36 AM +1000 b", aest_d)
561 |     y("a {:ta} b", "a November-21-2011 10:21:36 AM +1000 b", aest_d)
562 |     y("a {:ta} b", "a November-21-2011 b", d)
563 | 
564 |     # ts   Linux System log format        datetime
565 |     y(
566 |         "a {:ts} b",
567 |         "a Nov 21 10:21:36 b",
568 |         datetime(datetime.today().year, 11, 21, 10, 21, 36),
569 |     )
570 |     y(
571 |         "a {:ts} b",
572 |         "a Nov  1 10:21:36 b",
573 |         datetime(datetime.today().year, 11, 1, 10, 21, 36),
574 |     )
575 |     y(
576 |         "a {:ts} b",
577 |         "a Nov  1 03:21:36 b",
578 |         datetime(datetime.today().year, 11, 1, 3, 21, 36),
579 |     )
580 | 
581 |     # th   HTTP log format date/time                   datetime
582 |     y("a {:th} b", "a 21/Nov/2011:10:21:36 +1000 b", aest_d)
583 |     y("a {:th} b", "a 21/Nov/2011:10:21:36 +10:00 b", aest_d)
584 | 
585 |     d = datetime(2011, 11, 21, 10, 21, 36)
586 | 
587 |     # tc   ctime() format           datetime
588 |     y("a {:tc} b", "a Mon Nov 21 10:21:36 2011 b", d)
589 | 
590 |     t530 = parse.FixedTzOffset(-5 * 60 - 30, "-5:30")
591 |     t830 = parse.FixedTzOffset(-8 * 60 - 30, "-8:30")
592 | 
593 |     # tt   Time                                        time
594 |     y("a {:tt} b", "a 10:21:36 AM +1000 b", time(10, 21, 36, tzinfo=aest))
595 |     y("a {:tt} b", "a 10:21:36 AM +10:00 b", time(10, 21, 36, tzinfo=aest))
596 |     y("a {:tt} b", "a 10:21:36 AM b", time(10, 21, 36))
597 |     y("a {:tt} b", "a 10:21:36 PM b", time(22, 21, 36))
598 |     y("a {:tt} b", "a 10:21:36 b", time(10, 21, 36))
599 |     y("a {:tt} b", "a 10:21 b", time(10, 21))
600 |     y("a {:tt} b", "a 10:21:36 PM -5:30 b", time(22, 21, 36, tzinfo=t530))
601 |     y("a {:tt} b", "a 10:21:36 PM -530 b", time(22, 21, 36, tzinfo=t530))
602 |     y("a {:tt} b", "a 10:21:36 PM -05:30 b", time(22, 21, 36, tzinfo=t530))
603 |     y("a {:tt} b", "a 10:21:36 PM -0530 b", time(22, 21, 36, tzinfo=t530))
604 |     y("a {:tt} b", "a 10:21:36 PM -08:30 b", time(22, 21, 36, tzinfo=t830))
605 |     y("a {:tt} b", "a 10:21:36 PM -0830 b", time(22, 21, 36, tzinfo=t830))
606 | 
607 | 
608 | def test_datetime_group_count():
609 |     # test we increment the group count correctly for datetimes
610 |     r = parse.parse("{:ti} {}", "1972-01-01 spam")
611 |     assert r.fixed[1] == "spam"
612 |     r = parse.parse("{:tg} {}", "1-1-1972 spam")
613 |     assert r.fixed[1] == "spam"
614 |     r = parse.parse("{:ta} {}", "1-1-1972 spam")
615 |     assert r.fixed[1] == "spam"
616 |     r = parse.parse("{:th} {}", "21/Nov/2011:10:21:36 +1000 spam")
617 |     assert r.fixed[1] == "spam"
618 |     r = parse.parse("{:te} {}", "21 Nov 2011 10:21:36 +1000 spam")
619 |     assert r.fixed[1] == "spam"
620 |     r = parse.parse("{:tc} {}", "Mon Nov 21 10:21:36 2011 spam")
621 |     assert r.fixed[1] == "spam"
622 |     r = parse.parse("{:tt} {}", "10:21 spam")
623 |     assert r.fixed[1] == "spam"
624 | 
625 | 
626 | def test_mixed_types():
627 |     # stress-test: pull one of everything out of a string
628 |     r = parse.parse(
629 |         """
630 |         letters: {:w}
631 |         non-letters: {:W}
632 |         whitespace: "{:s}"
633 |         non-whitespace: \t{:S}\n
634 |         digits: {:d} {:d}
635 |         non-digits: {:D}
636 |         numbers with thousands: {:n}
637 |         fixed-point: {:f}
638 |         floating-point: {:e}
639 |         general numbers: {:g} {:g}
640 |         binary: {:b}
641 |         octal: {:o}
642 |         hex: {:x}
643 |         ISO 8601 e.g. {:ti}
644 |         RFC2822 e.g. {:te}
645 |         Global e.g. {:tg}
646 |         US e.g. {:ta}
647 |         ctime() e.g. {:tc}
648 |         HTTP e.g. {:th}
649 |         time: {:tt}
650 |         final value: {}
651 |     """,
652 |         """
653 |         letters: abcdef_GHIJLK
654 |         non-letters: !@#%$ *^%
655 |         whitespace: "   \t\n"
656 |         non-whitespace: \tabc\n
657 |         digits: 12345 0b1011011
658 |         non-digits: abcdef
659 |         numbers with thousands: 1,000
660 |         fixed-point: 100.2345
661 |         floating-point: 1.1e-10
662 |         general numbers: 1 1.1
663 |         binary: 0b1000
664 |         octal: 0o1000
665 |         hex: 0x1000
666 |         ISO 8601 e.g. 1972-01-20T10:21:36Z
667 |         RFC2822 e.g. Mon, 20 Jan 1972 10:21:36 +1000
668 |         Global e.g. 20/1/1972 10:21:36 AM +1:00
669 |         US e.g. 1/20/1972 10:21:36 PM +10:30
670 |         ctime() e.g. Sun Sep 16 01:03:52 1973
671 |         HTTP e.g. 21/Nov/2011:00:07:11 +0000
672 |         time: 10:21:36 PM -5:30
673 |         final value: spam
674 |     """,
675 |     )
676 |     assert r is not None
677 |     assert r.fixed[22] == "spam"
678 | 
679 | 
680 | def test_mixed_type_variant():
681 |     r = parse.parse(
682 |         """
683 |         letters: {:w}
684 |         non-letters: {:W}
685 |         whitespace: "{:s}"
686 |         non-whitespace: \t{:S}\n
687 |         digits: {:d}
688 |         non-digits: {:D}
689 |         numbers with thousands: {:n}
690 |         fixed-point: {:f}
691 |         floating-point: {:e}
692 |         general numbers: {:g} {:g}
693 |         binary: {:b}
694 |         octal: {:o}
695 |         hex: {:x}
696 |         ISO 8601 e.g. {:ti}
697 |         RFC2822 e.g. {:te}
698 |         Global e.g. {:tg}
699 |         US e.g. {:ta}
700 |         ctime() e.g. {:tc}
701 |         HTTP e.g. {:th}
702 |         time: {:tt}
703 |         final value: {}
704 |     """,
705 |         """
706 |         letters: abcdef_GHIJLK
707 |         non-letters: !@#%$ *^%
708 |         whitespace: "   \t\n"
709 |         non-whitespace: \tabc\n
710 |         digits: 0xabcdef
711 |         non-digits: abcdef
712 |         numbers with thousands: 1.000.000
713 |         fixed-point: 0.00001
714 |         floating-point: NAN
715 |         general numbers: 1.1e10 nan
716 |         binary: 0B1000
717 |         octal: 0O1000
718 |         hex: 0X1000
719 |         ISO 8601 e.g. 1972-01-20T10:21:36Z
720 |         RFC2822 e.g. Mon, 20 Jan 1972 10:21:36 +1000
721 |         Global e.g. 20/1/1972 10:21:36 AM +1:00
722 |         US e.g. 1/20/1972 10:21:36 PM +10:30
723 |         ctime() e.g. Sun Sep 16 01:03:52 1973
724 |         HTTP e.g. 21/Nov/2011:00:07:11 +0000
725 |         time: 10:21:36 PM -5:30
726 |         final value: spam
727 |     """,
728 |     )
729 |     assert r is not None
730 |     assert r.fixed[21] == "spam"
731 | 
732 | 
733 | @pytest.mark.skipif(sys.version_info >= (3, 5), reason="Python 3.5 removed the limit of 100 named groups in a regular expression")
734 | def test_too_many_fields():
735 |     # Python 3.5 removed the limit of 100 named groups in a regular expression,
736 |     # so only test for the exception if the limit exists.
737 |     p = parse.compile("{:ti}" * 15)
738 |     with pytest.raises(parse.TooManyFields):
739 |         p.parse("")
740 | 
741 | 
742 | def test_letters():
743 |     res = parse.parse("{:l}", "")
744 |     assert res is None
745 |     res = parse.parse("{:l}", "sPaM")
746 |     assert res.fixed == ("sPaM",)
747 |     res = parse.parse("{:l}", "sP4M")
748 |     assert res is None
749 |     res = parse.parse("{:l}", "sP_M")
750 |     assert res is None
751 | 
752 | 
753 | def test_strftime_strptime_roundtrip():
754 |     dt = datetime.now()
755 |     fmt = "_".join([k for k in parse.dt_format_to_regex if k != "%z"])
756 |     s = dt.strftime(fmt)
757 |     [res] = parse.parse("{:" + fmt + "}", s)
758 |     assert res == dt
759 | 
760 | 
761 | def test_parser_format():
762 |     parser = parse.compile("hello {}")
763 |     assert parser.format.format("world") == "hello world"
764 |     with pytest.raises(AttributeError):
765 |         parser.format = "hi {}"
766 | 
767 | 
768 | def test_hyphen_inside_field_name():
769 |     # https://github.com/r1chardj0n3s/parse/issues/86
770 |     # https://github.com/python-openapi/openapi-core/issues/672
771 |     template = "/local/sub/{user-id}/duration"
772 |     assert parse.Parser(template).named_fields == ["user_id"]
773 |     string = "https://dummy_server.com/local/sub/1647222638/duration"
774 |     result = parse.search(template, string)
775 |     assert result["user-id"] == "1647222638"
776 | 
777 | 
778 | def test_hyphen_inside_field_name_collision_handling():
779 |     template = "/foo/{user-id}/{user_id}/{user.id}/bar/"
780 |     assert parse.Parser(template).named_fields == ["user_id", "user__id", "user___id"]
781 |     string = "/foo/1/2/3/bar/"
782 |     result = parse.search(template, string)
783 |     assert result["user-id"] == "1"
784 |     assert result["user_id"] == "2"
785 |     assert result["user.id"] == "3"
786 | 


--------------------------------------------------------------------------------
/tests/test_parsetype.py:
--------------------------------------------------------------------------------
  1 | from decimal import Decimal
  2 | 
  3 | import pytest
  4 | 
  5 | import parse
  6 | 
  7 | 
  8 | def assert_match(parser, text, param_name, expected):
  9 |     result = parser.parse(text)
 10 |     assert result[param_name] == expected
 11 | 
 12 | 
 13 | def assert_mismatch(parser, text, param_name):
 14 |     result = parser.parse(text)
 15 |     assert result is None
 16 | 
 17 | 
 18 | def assert_fixed_match(parser, text, expected):
 19 |     result = parser.parse(text)
 20 |     assert result.fixed == expected
 21 | 
 22 | 
 23 | def assert_fixed_mismatch(parser, text):
 24 |     result = parser.parse(text)
 25 |     assert result is None
 26 | 
 27 | 
 28 | def test_pattern_should_be_used():
 29 |     def parse_number(text):
 30 |         return int(text)
 31 | 
 32 |     parse_number.pattern = r"\d+"
 33 |     parse_number.name = "Number"  # For testing only.
 34 | 
 35 |     extra_types = {parse_number.name: parse_number}
 36 |     format = "Value is {number:Number} and..."
 37 |     parser = parse.Parser(format, extra_types)
 38 | 
 39 |     assert_match(parser, "Value is 42 and...", "number", 42)
 40 |     assert_match(parser, "Value is 00123 and...", "number", 123)
 41 |     assert_mismatch(parser, "Value is ALICE and...", "number")
 42 |     assert_mismatch(parser, "Value is -123 and...", "number")
 43 | 
 44 | 
 45 | def test_pattern_should_be_used2():
 46 |     def parse_yesno(text):
 47 |         return parse_yesno.mapping[text.lower()]
 48 | 
 49 |     parse_yesno.mapping = {
 50 |         "yes": True,
 51 |         "no": False,
 52 |         "on": True,
 53 |         "off": False,
 54 |         "true": True,
 55 |         "false": False,
 56 |     }
 57 |     parse_yesno.pattern = r"|".join(parse_yesno.mapping.keys())
 58 |     parse_yesno.name = "YesNo"  # For testing only.
 59 | 
 60 |     extra_types = {parse_yesno.name: parse_yesno}
 61 |     format = "Answer: {answer:YesNo}"
 62 |     parser = parse.Parser(format, extra_types)
 63 | 
 64 |     # -- ENSURE: Known enum values are correctly extracted.
 65 |     for value_name, value in parse_yesno.mapping.items():
 66 |         text = "Answer: %s" % value_name
 67 |         assert_match(parser, text, "answer", value)
 68 | 
 69 |     # -- IGNORE-CASE: In parsing, calls type converter function !!!
 70 |     assert_match(parser, "Answer: YES", "answer", True)
 71 |     assert_mismatch(parser, "Answer: __YES__", "answer")
 72 | 
 73 | 
 74 | def test_with_pattern():
 75 |     ab_vals = {"a": 1, "b": 2}
 76 | 
 77 |     @parse.with_pattern(r"[ab]")
 78 |     def ab(text):
 79 |         return ab_vals[text]
 80 | 
 81 |     parser = parse.Parser("test {result:ab}", {"ab": ab})
 82 |     assert_match(parser, "test a", "result", 1)
 83 |     assert_match(parser, "test b", "result", 2)
 84 |     assert_mismatch(parser, "test c", "result")
 85 | 
 86 | 
 87 | def test_with_pattern_and_regex_group_count():
 88 |     # -- SPECIAL-CASE: Regex-grouping is used in user-defined type
 89 |     # NOTE: Missing or wroung regex_group_counts cause problems
 90 |     #       with parsing following params.
 91 |     @parse.with_pattern(r"(meter|kilometer)", regex_group_count=1)
 92 |     def parse_unit(text):
 93 |         return text.strip()
 94 | 
 95 |     @parse.with_pattern(r"\d+")
 96 |     def parse_number(text):
 97 |         return int(text)
 98 | 
 99 |     type_converters = {"Number": parse_number, "Unit": parse_unit}
100 |     # -- CASE: Unnamed-params (affected)
101 |     parser = parse.Parser("test {:Unit}-{:Number}", type_converters)
102 |     assert_fixed_match(parser, "test meter-10", ("meter", 10))
103 |     assert_fixed_match(parser, "test kilometer-20", ("kilometer", 20))
104 |     assert_fixed_mismatch(parser, "test liter-30")
105 | 
106 |     # -- CASE: Named-params (uncritical; should not be affected)
107 |     # REASON: Named-params have additional, own grouping.
108 |     parser2 = parse.Parser("test {unit:Unit}-{value:Number}", type_converters)
109 |     assert_match(parser2, "test meter-10", "unit", "meter")
110 |     assert_match(parser2, "test meter-10", "value", 10)
111 |     assert_match(parser2, "test kilometer-20", "unit", "kilometer")
112 |     assert_match(parser2, "test kilometer-20", "value", 20)
113 |     assert_mismatch(parser2, "test liter-30", "unit")
114 | 
115 | 
116 | def test_with_pattern_and_wrong_regex_group_count_raises_error():
117 |     # -- SPECIAL-CASE:
118 |     # Regex-grouping is used in user-defined type, but wrong value is provided.
119 |     @parse.with_pattern(r"(meter|kilometer)", regex_group_count=1)
120 |     def parse_unit(text):
121 |         return text.strip()
122 | 
123 |     @parse.with_pattern(r"\d+")
124 |     def parse_number(text):
125 |         return int(text)
126 | 
127 |     # -- CASE: Unnamed-params (affected)
128 |     BAD_REGEX_GROUP_COUNTS_AND_ERRORS = [
129 |         (None, ValueError),
130 |         (0, ValueError),
131 |         (2, IndexError),
132 |     ]
133 |     for bad_regex_group_count, error_class in BAD_REGEX_GROUP_COUNTS_AND_ERRORS:
134 |         parse_unit.regex_group_count = bad_regex_group_count  # -- OVERRIDE-HERE
135 |         type_converters = {"Number": parse_number, "Unit": parse_unit}
136 |         parser = parse.Parser("test {:Unit}-{:Number}", type_converters)
137 |         with pytest.raises(error_class):
138 |             parser.parse("test meter-10")
139 | 
140 | 
141 | def test_with_pattern_and_regex_group_count_is_none():
142 |     # -- CORNER-CASE: Increase code-coverage.
143 |     data_values = {"a": 1, "b": 2}
144 | 
145 |     @parse.with_pattern(r"[ab]")
146 |     def parse_data(text):
147 |         return data_values[text]
148 | 
149 |     parse_data.regex_group_count = None  # ENFORCE: None
150 | 
151 |     # -- CASE: Unnamed-params
152 |     parser = parse.Parser("test {:Data}", {"Data": parse_data})
153 |     assert_fixed_match(parser, "test a", (1,))
154 |     assert_fixed_match(parser, "test b", (2,))
155 |     assert_fixed_mismatch(parser, "test c")
156 | 
157 |     # -- CASE: Named-params
158 |     parser2 = parse.Parser("test {value:Data}", {"Data": parse_data})
159 |     assert_match(parser2, "test a", "value", 1)
160 |     assert_match(parser2, "test b", "value", 2)
161 |     assert_mismatch(parser2, "test c", "value")
162 | 
163 | 
164 | def test_case_sensitivity():
165 |     r = parse.parse("SPAM {} SPAM", "spam spam spam")
166 |     assert r[0] == "spam"
167 |     assert parse.parse("SPAM {} SPAM", "spam spam spam", case_sensitive=True) is None
168 | 
169 | 
170 | def test_decimal_value():
171 |     value = Decimal("5.5")
172 |     str_ = "test {}".format(value)
173 |     parser = parse.Parser("test {:F}")
174 |     assert parser.parse(str_)[0] == value
175 | 
176 | 
177 | def test_width_str():
178 |     res = parse.parse("{:.2}{:.2}", "look")
179 |     assert res.fixed == ("lo", "ok")
180 |     res = parse.parse("{:2}{:2}", "look")
181 |     assert res.fixed == ("lo", "ok")
182 |     res = parse.parse("{:4}{}", "look at that")
183 |     assert res.fixed == ("look", " at that")
184 | 
185 | 
186 | def test_width_constraints():
187 |     res = parse.parse("{:4}", "looky")
188 |     assert res.fixed == ("looky",)
189 |     res = parse.parse("{:4.4}", "looky")
190 |     assert res is None
191 |     res = parse.parse("{:4.4}", "ook")
192 |     assert res is None
193 |     res = parse.parse("{:4}{:.4}", "look at that")
194 |     assert res.fixed == ("look at ", "that")
195 | 
196 | 
197 | def test_width_multi_int():
198 |     res = parse.parse("{:02d}{:02d}", "0440")
199 |     assert res.fixed == (4, 40)
200 |     res = parse.parse("{:03d}{:d}", "04404")
201 |     assert res.fixed == (44, 4)
202 | 
203 | 
204 | def test_width_empty_input():
205 |     res = parse.parse("{:.2}", "")
206 |     assert res is None
207 |     res = parse.parse("{:2}", "l")
208 |     assert res is None
209 |     res = parse.parse("{:2d}", "")
210 |     assert res is None
211 | 
212 | 
213 | def test_int_convert_stateless_base():
214 |     parser = parse.Parser("{:d}")
215 |     assert parser.parse("1234")[0] == 1234
216 |     assert parser.parse("0b1011")[0] == 0b1011
217 | 


--------------------------------------------------------------------------------
/tests/test_pattern.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import parse
 4 | 
 5 | 
 6 | def _test_expression(format, expression):
 7 |     assert parse.Parser(format)._expression == expression
 8 | 
 9 | 
10 | def test_braces():
11 |     # pull a simple string out of another string
12 |     _test_expression("{{ }}", r"\{ \}")
13 | 
14 | 
15 | def test_fixed():
16 |     # pull a simple string out of another string
17 |     _test_expression("{}", r"(.+?)")
18 |     _test_expression("{} {}", r"(.+?) (.+?)")
19 | 
20 | 
21 | def test_named():
22 |     # pull a named string out of another string
23 |     _test_expression("{name}", r"(?P<name>.+?)")
24 |     _test_expression("{name} {other}", r"(?P<name>.+?) (?P<other>.+?)")
25 | 
26 | 
27 | def test_named_typed():
28 |     # pull a named string out of another string
29 |     _test_expression("{name:w}", r"(?P<name>\w+)")
30 |     _test_expression("{name:w} {other:w}", r"(?P<name>\w+) (?P<other>\w+)")
31 | 
32 | 
33 | def test_numbered():
34 |     _test_expression("{0}", r"(.+?)")
35 |     _test_expression("{0} {1}", r"(.+?) (.+?)")
36 |     _test_expression("{0:f} {1:f}", r"([-+ ]?\d*\.\d+) ([-+ ]?\d*\.\d+)")
37 | 
38 | 
39 | def test_bird():
40 |     # skip some trailing whitespace
41 |     _test_expression("{:>}", r" *(.+?)")
42 | 
43 | 
44 | def test_format_variety():
45 |     def _(fmt, matches):
46 |         d = parse.extract_format(fmt, {"spam": "spam"})
47 |         for k in matches:
48 |             assert d.get(k) == matches[k]
49 | 
50 |     for t in "%obxegfdDwWsS":
51 |         _(t, {"type": t})
52 |         _("10" + t, {"type": t, "width": "10"})
53 |     _("05d", {"type": "d", "width": "5", "zero": True})
54 |     _("<", {"align": "<"})
55 |     _(".<", {"align": "<", "fill": "."})
56 |     _(">", {"align": ">"})
57 |     _(".>", {"align": ">", "fill": "."})
58 |     _("^", {"align": "^"})
59 |     _(".^", {"align": "^", "fill": "."})
60 |     _("x=d", {"type": "d", "align": "=", "fill": "x"})
61 |     _("d", {"type": "d"})
62 |     _("ti", {"type": "ti"})
63 |     _("spam", {"type": "spam"})
64 | 
65 |     _(".^010d", {"type": "d", "width": "10", "align": "^", "fill": ".", "zero": True})
66 |     _(".2f", {"type": "f", "precision": "2"})
67 |     _("10.2f", {"type": "f", "width": "10", "precision": "2"})
68 | 
69 | 
70 | def test_dot_separated_fields():
71 |     # this should just work and provide the named value
72 |     res = parse.parse("{hello.world}_{jojo.foo.baz}_{simple}", "a_b_c")
73 |     assert res.named["hello.world"] == "a"
74 |     assert res.named["jojo.foo.baz"] == "b"
75 |     assert res.named["simple"] == "c"
76 | 
77 | 
78 | def test_dict_style_fields():
79 |     res = parse.parse("{hello[world]}_{hello[foo][baz]}_{simple}", "a_b_c")
80 |     assert res.named["hello"]["world"] == "a"
81 |     assert res.named["hello"]["foo"]["baz"] == "b"
82 |     assert res.named["simple"] == "c"
83 | 
84 | 
85 | def test_dot_separated_fields_name_collisions():
86 |     # this should just work and provide the named value
87 |     res = parse.parse("{a_.b}_{a__b}_{a._b}_{a___b}", "a_b_c_d")
88 |     assert res.named["a_.b"] == "a"
89 |     assert res.named["a__b"] == "b"
90 |     assert res.named["a._b"] == "c"
91 |     assert res.named["a___b"] == "d"
92 | 
93 | 
94 | def test_invalid_groupnames_are_handled_gracefully():
95 |     with pytest.raises(NotImplementedError):
96 |         parse.parse("{hello['world']}", "doesn't work")
97 | 


--------------------------------------------------------------------------------
/tests/test_result.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import parse
 4 | 
 5 | 
 6 | def test_fixed_access():
 7 |     r = parse.Result((1, 2), {}, None)
 8 |     assert r[0] == 1
 9 |     assert r[1] == 2
10 |     with pytest.raises(IndexError):
11 |         r[2]
12 |     with pytest.raises(KeyError):
13 |         r["spam"]
14 | 
15 | 
16 | def test_slice_access():
17 |     r = parse.Result((1, 2, 3, 4), {}, None)
18 |     assert r[1:3] == (2, 3)
19 |     assert r[-5:5] == (1, 2, 3, 4)
20 |     assert r[:4:2] == (1, 3)
21 |     assert r[::-2] == (4, 2)
22 |     assert r[5:10] == ()
23 | 
24 | 
25 | def test_named_access():
26 |     r = parse.Result((), {"spam": "ham"}, None)
27 |     assert r["spam"] == "ham"
28 |     with pytest.raises(KeyError):
29 |         r["ham"]
30 |     with pytest.raises(IndexError):
31 |         r[0]
32 | 
33 | 
34 | def test_contains():
35 |     r = parse.Result(("cat",), {"spam": "ham"}, None)
36 |     assert "spam" in r
37 |     assert "cat" not in r
38 |     assert "ham" not in r
39 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
 1 | import parse
 2 | 
 3 | 
 4 | def test_basic():
 5 |     r = parse.search("a {} c", " a b c ")
 6 |     assert r.fixed == ("b",)
 7 | 
 8 | 
 9 | def test_multiline():
10 |     r = parse.search("age: {:d}\n", "name: Rufus\nage: 42\ncolor: red\n")
11 |     assert r.fixed == (42,)
12 | 
13 | 
14 | def test_pos():
15 |     r = parse.search("a {} c", " a b c ", 2)
16 |     assert r is None
17 | 
18 | 
19 | def test_no_evaluate_result():
20 |     match = parse.search(
21 |         "age: {:d}\n", "name: Rufus\nage: 42\ncolor: red\n", evaluate_result=False
22 |     )
23 |     r = match.evaluate_result()
24 |     assert r.fixed == (42,)
25 | 


--------------------------------------------------------------------------------