├── .github
    ├── FUNDING.yml
    └── workflows
    │   ├── .pypi_upload.yml
    │   └── ci.yml
├── .gitignore
├── .readthedocs.yml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── conf.py
    │   ├── documentation
    │       ├── best-practices.rst
    │       ├── covering-the-basics.rst
    │       ├── modules
    │       │   ├── core
    │       │   │   ├── assertions.rst
    │       │   │   ├── classes.rst
    │       │   │   ├── groups.rst
    │       │   │   ├── operators.rst
    │       │   │   ├── pre.rst
    │       │   │   ├── quantifiers.rst
    │       │   │   └── tokens.rst
    │       │   └── meta
    │       │   │   └── essentials.rst
    │       └── subpackages.rst
    │   ├── index.rst
    │   ├── introduction.rst
    │   ├── logo.png
    │   └── requirements.txt
├── pyproject.toml
├── src
    └── pregex
    │   ├── __init__.py
    │   ├── core
    │       ├── __init__.py
    │       ├── assertions.py
    │       ├── classes.py
    │       ├── exceptions.py
    │       ├── groups.py
    │       ├── operators.py
    │       ├── pre.py
    │       ├── quantifiers.py
    │       └── tokens.py
    │   └── meta
    │       ├── __init__.py
    │       └── essentials.py
└── tests
    ├── test_core_assertions.py
    ├── test_core_classes.py
    ├── test_core_groups.py
    ├── test_core_operators.py
    ├── test_core_pre.py
    ├── test_core_quantifiers.py
    ├── test_core_tokens.py
    └── test_meta_essentials.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [manoss96]
2 | 


--------------------------------------------------------------------------------
/.github/workflows/.pypi_upload.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |      - '*'
 7 | 
 8 | jobs:
 9 |   publish:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v3
15 | 
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.9'
20 | 
21 |       - name: Install dependencies
22 |         run: |
23 |           pip install build
24 | 
25 |       - name: Build dist
26 |         run: |
27 |           python -m build --outdir dist/
28 | 
29 |       - name: Publish to PyPI
30 |         uses: pypa/gh-action-pypi-publish@release/v1
31 |         with:
32 |           password: ${{ secrets.PYPI_API_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Main CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - '**'
 7 |   pull_request:
 8 |     braches:
 9 |       - '**'
10 | 
11 | jobs:
12 |   test-and-coverage:
13 | 
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         python-version: ["3.9", "3.10", "3.11"]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v3
21 | 
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@v4
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 | 
27 |       - name: Set PYTHONPATH
28 |         run: |
29 |           echo "PYTHONPATH=${GITHUB_WORKSPACE}/src" >> $GITHUB_ENV
30 | 
31 |       - name: Install dependencies
32 |         run: |
33 |           python -m pip install --upgrade pip
34 |           python -m pip install coverage
35 | 
36 |       - name: Run tests
37 |         run: |
38 |           cd tests
39 |           python -m coverage run -m unittest
40 |           python -m coverage lcov
41 | 
42 |       - name: Python ${{ matrix.python-version }} Coveralls
43 |         uses: coverallsapp/github-action@master
44 |         with:
45 |           github-token: ${{ secrets.GITHUB_TOKEN }}
46 |           path-to-lcov: tests/coverage.lcov
47 |           flag-name: python-${{ matrix.python-version }}-run
48 |           parallel: true
49 | 
50 | 
51 |   finish:
52 | 
53 |     needs: test-and-coverage
54 |     runs-on: ubuntu-latest
55 | 
56 |     steps:
57 |     - name: Update Coveralls
58 |       uses: coverallsapp/github-action@master
59 |       with:
60 |         github-token: ${{ secrets.github_token }}
61 |         parallel-finished: true


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Required
 2 | version: 2
 3 | 
 4 | # Set the version of Python and other tools you might need
 5 | build:
 6 |   os: ubuntu-20.04
 7 |   tools:
 8 |     python: "3.9"
 9 | 
10 | # Build documentation in the docs/ directory with Sphinx
11 | sphinx:
12 |    configuration: docs/source/conf.py
13 | 
14 | python:
15 |   install:
16 |     - requirements: docs/source/requirements.txt


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Contributing to pregex
 3 | ============================
 4 | 
 5 | There are two main ways to contribute to pregex:
 6 | 
 7 | 1. **Bug Hunting**: It is more probable than not that there are currently 
 8 |    a number of bugs silently waiting to be discovered! If you happen to stumble
 9 |    upon one of them while using pregex, please raise an issue labeled as **bug**,
10 |    in which you report your findings as well as explain how one can reproduce
11 |    the bug. Furthermore, if you're up for a challenge you can even create a
12 |    new branch just for the issue and try to tackle the problem yourself!
13 | 
14 | 2. **Propose an addition/modification**: Everything good can be even better!
15 |    If you have an idea that you think might improve pregex, you can raise an
16 |    issue labeled as **enhancement**, in which you discuss your idea.
17 | 
18 | You can raise an issue by visiting the [Issues Page][issues-page].
19 | 
20 | Setting up a development environment
21 | -------------------------------------
22 | Regardless of whether you want to work on fixing a bug or implementing a new feature,
23 | you should be able to set up a separate development environment just for pregex. The
24 | fastest way to do this would be the following:
25 | 
26 | 1. Either clone or download the "pregex" repository to your local machine.
27 | 2. Add the path pointing to the project's "src" directory on your local machine to the "PYTHONPATH" environmental variable.
28 | 	- Make sure that "PYTHONPATH" is included in "PATH" as well.
29 | 3. Create and activate a new Python 3.9 environment that you will use solely for development purposes regarding pregex.
30 | 	- Make sure that you don't pip install pregex on this environment.
31 | 
32 | After doing the above, you should be good to go!
33 | 
34 | Running the tests
35 | -------------------------------------
36 | For a pull request to be merged, it is important that it passes all tests defined
37 | within the project. In order to ensure that, you can run the tests yourself by
38 | simply going into the project's "tests" directory and executing the following
39 | command:
40 | ```
41 | python3 -m unittest
42 | ```
43 | Make sure that you've set up your development environment as explained in the
44 | corresponding section or else it is very likely that the above command will fail.
45 | 
46 | 
47 | Code of Conduct
48 | ---------------
49 | 
50 | Please be nice to each other and abide by the principles of the [Python Software Foundation][psf-coc].
51 | 
52 | <!-- MARKDOWN LINKS & IMAGES -->
53 | [issues-page]: https://github.com/manoss96/pregex/issues
54 | [psf-coc]: https://www.python.org/psf/codeofconduct/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Emmanouil Stoumpos
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!-- PROJECT BADGES -->
  2 | [![Python Version][python-shield]][python-url]
  3 | [![MIT License][license-shield]][license-url]
  4 | [![Coverage][coverage-shield]][coverage-url]
  5 | 
  6 | ![PRegEx Logo](docs/source/logo.png)
  7 | 
  8 | <!-- What is PRegEx? -->
  9 | ## What is PRegEx?
 10 | 
 11 | Let's face it, although RegEx is without a doubt an extremely useful tool, its syntax has been repeatedly proven to be quite hard for people to read and to memorize. This is mainly due to RegEx's declarative nature, which many programmers are not familiar with, as well as its extensive use of symbols that do not inherently relate to their functionality within a RegEx pattern, thus rendering them easy to forget. To make matters even worse, RegEx patterns are more often than not tightly packed with large amounts of information, which our brains just seem to be struggling to break down in order to analyze effectively. For these reasons, building even a simple RegEx pattern for matching URLs can prove to be quite a painful task.
 12 | 
 13 | This is where PRegEx comes in! PRegEx, which stands for Programmable Regular Expressions, is a Python package that can be used in order to construct Regular Expression patterns in a more human-friendly way. Through the use of PRegEx, one is able to fully utilize the powerful tool that is RegEx without having to deal with any of its nuisances that seem to drive people crazy! PRegEx achieves that by offering the following:
 14 | 
 15 | 1. An easy-to-remember syntax that resembles the good ol' imperative way of programming!
 16 | 2. No longer having to group patterns or escape meta characters, as both are handled internally by PRegEx!
 17 | 3. Modularity to building RegEx patterns, as one can easily break down a complex pattern into multiple simpler ones which can then be combined together.
 18 | 4. A higher-level API on top of Python's built-in "re" module, providing access to its core functionality and more, while saving you the trouble of having to deal with "re.Match" instances.
 19 | 
 20 | And remember, no matter how complex the abstraction, it's always just a pure RegEx pattern that sits underneath which you can fetch and use any way you like!
 21 | 
 22 | 
 23 | <!-- Installation -->
 24 | ## Installation
 25 | 
 26 | You can start using PRegEx by installing it via pip. Note that "pregex" requires Python >= 3.9.
 27 | 
 28 | ```sh
 29 | pip install pregex
 30 | ```
 31 | 
 32 | 
 33 | <!-- Usage example -->
 34 | ## Usage Example
 35 | 
 36 | In PRegEx, everything is a Programmable Regular Expression, or "Pregex" for short. This makes it easy for simple Pregex instances to be combined into more complex ones! Within the code snippet below, we construct a Pregex instance that will match any URL that ends with either ".com" or ".org" as well as any IP address for which a 4-digit port number is specified. Furthermore, in the case of a URL, we would like for its domain name to be separately captured as well.
 37 | 
 38 | ```python
 39 | from pregex.core.classes import AnyLetter, AnyDigit, AnyFrom
 40 | from pregex.core.quantifiers import Optional, AtLeastAtMost
 41 | from pregex.core.operators import Either
 42 | from pregex.core.groups import Capture
 43 | from pregex.core.pre import Pregex
 44 | 
 45 | # Define main sub-patterns.
 46 | http_protocol = Optional('http' + Optional('s') + '://')
 47 | 
 48 | www = Optional('www.')
 49 | 
 50 | alphanum = AnyLetter() | AnyDigit()
 51 | 
 52 | domain_name = \
 53 |     alphanum + \
 54 |     AtLeastAtMost(alphanum | AnyFrom('-', '.'), n=1, m=61) + \
 55 |     alphanum
 56 | 
 57 | tld = '.' + Either('com', 'org')
 58 | 
 59 | ip_octet = AnyDigit().at_least_at_most(n=1, m=3)
 60 | 
 61 | port_number = (AnyDigit() - '0') + 3 * AnyDigit()
 62 | 
 63 | # Combine sub-patterns together.
 64 | pre: Pregex = \
 65 |     http_protocol + \
 66 |     Either(
 67 |         www + Capture(domain_name) + tld,
 68 |         3 * (ip_octet + '.') + ip_octet + ':' + port_number
 69 |     )
 70 | ```
 71 | 
 72 | We can then easily fetch the resulting Pregex instance's underlying RegEx pattern.
 73 | ```python
 74 | regex = pre.get_pattern()
 75 | ```
 76 | 
 77 | This is the pattern that we just built. Yikes!
 78 | ```
 79 | (?:https?:\/\/)?(?:(?:www\.)?([A-Za-z\d][A-Za-z\d\-.]{1,61}[A-Za-z\d])\.(?:com|org)|(?:\d{1,3}\.){3}\d{1,3}:[1-9]\d{3})
 80 | ```
 81 | 
 82 | Besides from having access to its underlying pattern, we can use a Pregex instance to find matches within a piece of text. Consider for example the following string:
 83 | ```python
 84 | text = "text--192.168.1.1:8000--text--http://www.wikipedia.org--text--https://youtube.com--text"
 85 | ```
 86 | By invoking the instance's "get_matches" method, we are able to scan the above string for any possible matches:
 87 | ```python
 88 | matches = pre.get_matches(text)
 89 | ```
 90 | 
 91 | Looks like there were three matches:
 92 | ```python
 93 | ['192.168.1.1:8000', 'http://www.wikipedia.org', 'https://youtube.com']
 94 | ```
 95 | 
 96 | Likewise, we can invoke the instance's "get_captures" method to get any captured groups.
 97 | ```python
 98 | groups = pre.get_captures(text)
 99 | ```
100 | As expected, there were only two captured groups since the first match is not a URL and therefore it does not contain a domain name to be captured.
101 | ```python
102 | [(None,), ('wikipedia',), ('youtube',)]
103 | ```
104 | 
105 | Finally, you might have noticed that we built our pattern by utilizing
106 | various classes that were imported from modules under *pregex.core*. These
107 | modules contain classes through which the RegEx syntax is essentially replaced.
108 | However, PRegEx also includes another set of modules, namely those under
109 | subpackage *pregex.meta*, whose classes build upon those in *pregex.core* so
110 | as to provide numerous pre-built patterns that you can just import and use
111 | right away!
112 | 
113 | ```python
114 | 
115 | from pregex.core.pre import Pregex
116 | from pregex.core.classes import AnyDigit
117 | from pregex.core.operators import Either
118 | from pregex.meta.essentials import HttpUrl, IPv4
119 | 
120 | port_number = (AnyDigit() - '0') + 3 * AnyDigit()
121 | 
122 | pre: Pregex = Either(
123 |     HttpUrl(capture_domain=True, is_extensible=True),
124 |     IPv4(is_extensible=True) + ':' + port_number
125 | )
126 | ```
127 | 
128 | By using classes found within the *pregex.meta* subpackage, we were able to
129 | construct more or less the same pattern as before only much more easily!
130 | 
131 | ## Solving Wordle with PRegEx
132 | 
133 | We are now going to see another example that better exhibits the *programmable* nature of PRegEx.
134 | More specifically, we will be creating a Wordle solver function that, given all currently known
135 | information as well as access to a 5-letter word dictionary, utilizes PRegEx in order to return
136 | a list of candidate words to choose from as a possible solution to the problem.
137 | 
138 | ### Formulating what is known
139 | 
140 | First things first, we must think of a way to represent what is known so far regarding the
141 | word that we're trying to guess. This information can be encapsulated into three distinct
142 | sets of letters:
143 | 
144 | 1. **Green letters**: Letters that are included in the word, whose position within it is known.
145 | 2. **Yellow letters**: Letters that are included in the word, and while their exact position is
146 |    unknown, there is one or more positions which we can rule out. 
147 | 3. **Gray letters**: Letters that are not included in the word.
148 | 
149 | Green letters can be represented by using a dictionary that maps integers (positions) to strings (letters).
150 | For example, ``{4 : 'T'}`` indicates that the word we are looking for contains the letter ``T`` in its
151 | fourth position. Yellow letters can also be represented as a dictionary with integer keys, whose values
152 | however are going to be lists of strings instead of regular strings, as a position might have been ruled
153 | out for more than a single letter. For example, ``{1 : ['A', 'R'], 3 : ['P']}`` indicates that even though
154 | the word contains letters ``A``, ``R`` and ``P``, it cannot start with either an ``A`` or an ``R`` as
155 | well as it cannot have the letter ``P`` occupying its third position. Finally, gray letters can be simply
156 | stored in a list.
157 | 
158 | In order to have a concrete example to work with, we will be assuming that our current
159 | information about the problem is expressed by the following three data structures:
160 | 
161 | ```python
162 | green: dict[int, str] = {4 : 'T'}
163 | yellow: dict[int, list[str]] = {1 : ['A', 'R'], 3 : ['P']}
164 | gray: list[str] = ['C', 'D', 'L', 'M', 'N', 'Q', 'U']
165 | ```
166 | 
167 | ### Initializing a Pregex class instance
168 | 
169 | Having come up with a way of programmatically formulating the problem, the first step towards
170 | actually solving it would be to create a ``Pregex`` class instance:
171 | ```python
172 | wordle = Pregex()
173 | ```
174 | 
175 | Since we aren't providing a ``pattern`` parameter to the class's constructor, it automatically
176 | defaults to the empty string ``''``. Thus, through this instance we now have access to all methods
177 | of the ``Pregex`` class, though we are not really able to match anything with it yet.
178 | 
179 | ### Yellow letter assertions
180 | 
181 | Before we go on to dictate what the valid letters for each position within the word
182 | are, we are first going to deal with yellow letters, that is, letters which we know are
183 | included in the word that we are looking for, though their position is still uncertain.
184 | Since we know for a fact that the sought out word contains these letters, we have to
185 | somehow make sure that any candidate word includes them as well. This can easily be
186 | done by using what is known in RegEx lingo as a *positive lookahead assertion*,
187 | represented in PRegEx by the less intimidating *FollowedBy*! Assertions are used in
188 | order to *assert* something about a pattern without really having to *match* any additional
189 | characters. A positive lookahead assertion, in particular, dictates that the pattern to which
190 | it is applied must be followed by some other pattern in order for the former to constitute
191 | a valid match.
192 | 
193 | In PRegEx, one is able to create a ``Pregex`` instance out of applying a positive
194 | lookahead assertion to some pattern ``p1`` by doing the following:
195 | 
196 | ```python
197 | from pregex.core.assertions import FollowedBy
198 | 
199 | pre = FollowedBy(p1, p2)
200 | ```
201 | 
202 | where both ``p1`` and ``p2`` are either strings or ``Pregex`` instances. Futhermore, in the
203 | case that ``p1`` already is a ``Pregex`` class instance, one can achieve the same result with:
204 | 
205 | ```python
206 | pre = p1.followed_by(p2)
207 | ```
208 | 
209 | Having initialized ``wordle`` as a ``Pregex`` instance, we can simply simply do
210 | ``wordle.followed_by(some_pattern)`` so as to indicate that any potential match
211 | with ``wordle`` must be followed by ``some_pattern``. Recall that ``wordle`` merely
212 | represents the empty string, so we are not really matching anything at this point.
213 | Applying an assertion to the empty string pattern is just a neat little trick one
214 | can use in order to validate something about their pattern before they even begin
215 | to build it.
216 | 
217 | Now it's just a matter of figuring out what the value of ``some_pattern`` is.
218 | Surely we can't just do ``wordle = wordle.followed_by(letter)``, as this results
219 | in ``letter`` always having to be at the beginning of the word. Here's however what
220 | we can do: It follows from the rules of Wordle that all words must be comprised of five
221 | letters, any of which is potentially a yellow letter. Thus, every yellow letter is certain
222 | to be preceded by up to four other letters, but no more than that. Therefore, we need a
223 | pattern that represents just that, namely *four letters at most*. By applying quantifier
224 | ``at_most(n=4)`` to an instance of ``AnyUppercaseLetter()``, we are able to create such
225 | a pattern. Add a yellow letter to its right and we have our ``some_pattern``. Since there
226 | may be more than one yellow letters, we make sure that we iterate them all one by one so
227 | as to enforce a separate assertion for each:
228 | 
229 | ```python
230 | from pregex.core.classes import AnyUppercaseLetter
231 | 
232 | yellow_letters_list: list[str] = [l for letter_list in yellow.values() for l in letter_list]
233 | 
234 | at_most_four_letters = AnyUppercaseLetter().at_most(n=4)
235 | 
236 | for letter in yellow_letters_list:
237 |     wordle = wordle.followed_by(at_most_four_letters + letter)
238 | ```
239 | 
240 | By executing the above code snippet we get a ``Pregex`` instance which
241 | represents the following RegEx pattern:
242 | 
243 | ```
244 | (?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)
245 | ```
246 | 
247 | ### Building valid character classes
248 | 
249 | After we have made sure that our pattern will reject any words that do not contain
250 | all the yellow letters, we can finally start building the part of the pattern that
251 | will handle the actual matching. This can easily be achived by performing five
252 | iterations, one for each letter of the word, where at each iteration ``i`` we
253 | construct a new character class, which is then appended to our pattern based
254 | on the following logic:
255 | 
256 | * If the letter that corresponds to the word's i-th position is known, then
257 |   make it so that the pattern only matches that letter at that position.
258 | 
259 | * If the letter that corresponds to the word's i-th position is not known,
260 |   then make it so that the pattern matches any letter except for gray letters,
261 |   green letters, as well as any yellow letters that may have been ruled out for
262 |   that exact position.
263 | 
264 | The following code snippet does just that:
265 | 
266 | ```python
267 | from pregex.core.classes import AnyFrom
268 | 
269 | for i in range(1, 6):
270 |     if i in green:
271 |         wordle += green[i]
272 |     else:
273 |         invalid_chars_at_pos_i = gray + list(green.values())
274 |         if i in yellow:
275 |             invalid_chars_at_pos_i += yellow[i]
276 |         wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)
277 | ```
278 | 
279 | After executing the above code, ``wordle`` will contain the following
280 | RegEx pattern:
281 | 
282 | ```
283 | (?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)[BE-KOPSV-Z][ABE-KOPRSV-Z][ABE-KORSV-Z]T[ABE-KOPRSV-Z]
284 | ```
285 | 
286 | ### Matching from a dictionary
287 | 
288 | Having built our pattern, the only thing left to do is to actually use it to
289 | match candidate words. Provided that we have access to a text file containing
290 | all possible Wordle words, we are able to invoke our ``Pregex`` instance's
291 | ``get_matches`` method in order to scan said text file for any potential matches. 
292 | 
293 | ```python
294 | words = wordle.get_matches('word_dictionary.txt', is_path=True)
295 | ```
296 | 
297 | ### Putting it all together
298 | 
299 | Finally, we combine together everything we discussed into a single function that
300 | spews out a list of words which satisfy all necessary conditions so that they
301 | constitute possible solutions to the problem.
302 | 
303 | ```python
304 | def wordle_solver(green: dict[int, str], yellow: dict[int, list[str]], gray: list[str]) -> list[str]:
305 | 
306 |     from pregex.core.pre import Pregex
307 |     from pregex.core.classes import AnyUpperCaseLetter, AnyFrom
308 | 
309 |     # Initialize pattern as the empty string pattern.
310 |     wordle = Pregex()
311 | 
312 |     # This part ensures that yellow letters
313 |     # will appear at least once within the word.
314 |     yellow_letters_list = [l for letter_list in yellow.values() for l in letter_list]
315 |     at_most_four_letters = AnyUppercaseLetter().at_most(n=4)
316 |     for letter in yellow_letters_list:
317 |         wordle = wordle.followed_by(at_most_four_letters + letter)
318 | 
319 |     # This part actually dictates the set of valid letters
320 |     # for each position within the word.
321 |     for i in range(1, 6):
322 |         if i in green:
323 |             wordle += green[i]
324 |         else:
325 |             invalid_chars_at_pos_i = gray + list(green.values())
326 |             if i in yellow:
327 |                 invalid_chars_at_pos_i += yellow[i]
328 |             wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)
329 | 
330 |     # Match candidate words from dictionary and return them in a list.
331 |     return wordle.get_matches('word_dictionary.txt', is_path=True)
332 | ```
333 | 
334 | By invoking the above function we get the following list of words:
335 | 
336 | ```python
337 | word_candidates = wordle_solver(green, yellow, gray)
338 | 
339 | print(word_candidates) # This prints ['PARTY']
340 | ```
341 | 
342 | Looks like there is only one candidate word, which means that we
343 | can consider our problem solved!
344 | 
345 | You can learn more about PRegEx by visiting the [PRegEx Documentation Page][docs-url].
346 | 
347 | 
348 | <!-- MARKDOWN LINKS & IMAGES -->
349 | [python-shield]: https://img.shields.io/badge/python-3.9+-blue
350 | [python-url]: https://www.python.org/downloads/release/python-390/
351 | [license-shield]: https://img.shields.io/badge/license-MIT-red
352 | [license-url]: https://github.com/manoss96/pregex/blob/main/LICENSE.txt
353 | [coverage-shield]: https://coveralls.io/repos/github/manoss96/pregex/badge.svg?branch=main&service=github
354 | [coverage-url]: https://coveralls.io/github/manoss96/pregex?branch=main
355 | [docs-url]: https://pregex.readthedocs.io/en/latest/


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../src/'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'pregex'
21 | copyright = '2022, Manos Stoumpos'
22 | author = 'Manos Stoumpos'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '2.3.3'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = ['sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc']
34 | 
35 | # Supported file suffixes.
36 | source_suffix = ['.rst']
37 | 
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ['_templates']
40 | 
41 | # List of patterns, relative to source directory, that match files and
42 | # directories to ignore when looking for source files.
43 | # This pattern also affects html_static_path and html_extra_path.
44 | exclude_patterns = []
45 | 
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | 
49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
50 | # a list of builtin themes.
51 | html_theme = 'sphinx_rtd_theme'
52 | 
53 | # Add any paths that contain custom static files (such as style sheets) here,
54 | # relative to this directory. They are copied after the builtin static files,
55 | # so a file named "default.css" will overwrite the builtin "default.css".
56 | html_static_path = ['_static']


--------------------------------------------------------------------------------
/docs/source/documentation/best-practices.rst:
--------------------------------------------------------------------------------
  1 | ###############
  2 | Best Practices
  3 | ###############
  4 | 
  5 | This page discusses the best practices that one should
  6 | consider following when it comes to using pregex.
  7 | 
  8 | Importing
  9 | ==========
 10 | 
 11 | Due to the relatively large number of modules contained within pregex,
 12 | having to import each class individually can quickly become extremely annoying.
 13 | For this reason, it is suggested that one handles their imports by
 14 | including the following statements at the top of their Python script:
 15 | 
 16 | * ``from pregex.core import *`` - Imports all core modules by using short aliases.
 17 |   More specifically:
 18 | 
 19 |   * Module :py:mod:`pregex.core.assertions` is imported as ``asr``
 20 |   * Module :py:mod:`pregex.core.classes` is imported as ``cl``
 21 |   * Module :py:mod:`pregex.core.groups` is imported as ``gr``
 22 |   * Module :py:mod:`pregex.core.operators` is imported as ``op``
 23 |   * Module :py:mod:`pregex.core.quantifiers` is imported as ``qu``
 24 |   * Module :py:mod:`pregex.core.tokens` is imported as ``tk``
 25 |   * Class :class:`pregex.core.pre.Pregex` is imported as is.
 26 | 
 27 |   Take a look at the example below to better understand how this works:
 28 | 
 29 |   .. code-block:: python
 30 | 
 31 | 	from pregex.core import *
 32 | 
 33 | 	pre = op.Either("Hello", "Bye") + " World" + qu.Optional("!")
 34 | 
 35 | 	pre.print_pattern() # This prints "(?:Hello|Bye) World!?"
 36 | 
 37 |   It is recommended that you follow this practice as besides the fact that
 38 |   it saves you the trouble of having to import from each module separately,
 39 |   it also ensures that you are aware of the module that each class belongs in,
 40 |   which in turn reveals a lot in regards to the class's functionality and how
 41 |   it can be used.
 42 | 
 43 | * ``from pregex.meta import *`` - Directly imports every class defined within any
 44 |   one of the *meta* modules.
 45 | 
 46 | 
 47 | Finally, one is also able to replace both of the above import statements
 48 | with a single statement, namely ``from pregex import *``.
 49 | 
 50 | 
 51 | Maintaining readability
 52 | =========================
 53 | 
 54 | One of the primary benefits of using PRegEx is being able to construct patterns
 55 | that are readable, and therefore easier to maintain and update than their
 56 | raw RegEx counterparts. This is mainly made possible through PRegEx's human-friendly
 57 | syntax. Nevertheless, there exist certain cases where the syntax on its own is not
 58 | enough to achieve readability, especially when it comes to building
 59 | complex patterns. Consider for example the following Pregex instance:
 60 | 
 61 | .. code-block:: python
 62 | 
 63 |   from pregex.core import *
 64 | 
 65 |   pre: Pregex = \
 66 |       op.Enclose(
 67 |           op.Either(
 68 |               asr.FollowedBy(
 69 |                   asr.PrecededBy(
 70 |                       qu.OneOrMore(op.Either('+', '-')),
 71 |                       2 * (cl.AnyLetter() | (cl.AnyPunctuation() - cl.AnyFrom('+', '-')))
 72 |                   ),
 73 |                   2 * (cl.AnyLetter() | (cl.AnyPunctuation() - cl.AnyFrom('+', '-')))
 74 |               ),
 75 |               asr.NotPrecededBy(
 76 |                   asr.FollowedBy(
 77 |                       qu.OneOrMore(op.Either('+', '-')),
 78 |                       2 * cl.AnyDigit()
 79 |                   ),
 80 |                   op.Either(cl.Any() + AnyDigit(), cl.AnyDigit() + cl.Any())
 81 |               ),
 82 |               asr.NotFollowedBy(
 83 |                   asr.PrecededBy(
 84 |                       qu.OneOrMore(op.Either('+', '-')),
 85 |                       2 * cl.AnyDigit()
 86 |                   ),
 87 |                   op.Either(cl.Any() + AnyDigit(), cl.AnyDigit() + cl.Any())
 88 |               )
 89 |           ),
 90 |           2 * (cl.AnyDigit | cl.AnyLetter() | (cl.AnyPunctuation() - cl.AnyFrom('+', '-')))
 91 |       )
 92 | 
 93 | And this is the RegEx pattern to which the above Pregex instance compiles:
 94 | 
 95 | .. code-block::
 96 | 
 97 |   [,.-~!-*]{2}(?:(?<=[.-\/,!-*:-~]{2})(?:\+|-)+(?=[.-\/,!-*:-~]{2})|(?<!.\d|\d.)(?:\+|-)+(?=\d{2})|(?<=\d{2})(?:\+|-)+(?!.\d|\d.))[,.-~!-*]{2}
 98 | 
 99 | Although it could be argued that this pattern can be more easily
100 | studied while in its Pregex form, at least by people who are not entirely
101 | familiar with RegEx's syntax, it is still not quite clear what it's purpose
102 | is. By following a different pattern-building approach, we are going to
103 | slightly modify the above Pregex instance so that it is a lot more easy
104 | to read, without messing with the underlying RegEx pattern.
105 | 
106 | 
107 | Breaking down a pattern
108 | --------------------------
109 | If a pattern is overly complex, one might like to try breaking it down
110 | into simpler subpatterns which can then be stored in variables with
111 | meaningful names. Considering our example above, we can search for any
112 | repeated subpatterns throughout the main pattern and substitute them
113 | with such variables. Furthermore, we are going to replace operator 
114 | :class:`~pregex.core.operators.Enclose` by simply concatenating
115 | the *enclosing* pattern at both the start and the end of the *enclosed* pattern,
116 | as, despite the operator succeeding in making the pattern shorter to write,
117 | it adds an additional layer of nestedness, which we would like to eliminate:
118 | 
119 | .. code-block:: python
120 | 
121 |   from pregex.core import *
122 | 
123 |   one_or_more_signs = qu.OneOrMore(op.Either('+', '-'))
124 | 
125 |   any_punct_but_signs = cl.AnyPunctuation() - cl.AnyFrom('+', '-')
126 | 
127 |   any_two_letters_or_punct_but_signs = 2 * (cl.AnyLetter() | any_punct_but_signs)
128 | 
129 |   any_two_digits = 2 * cl.AnyDigit()
130 | 
131 |   any_two_char_sequence_containing_digits = op.Either(cl.Any() + AnyDigit(), cl.AnyDigit() + cl.Any())
132 | 
133 |   any_two_alphanums_or_punct_but_signs = 2 * (cl.AnyDigit() | cl.AnyLetter() | any_punct_but_signs)
134 | 
135 | 
136 |   pre: Pregex = \
137 |       any_two_alphanums_or_punct_but_signs + \
138 |       op.Either(
139 |           asr.PrecededBy(
140 |               asr.FollowedBy(
141 |                   one_or_more_signs,
142 |                   any_two_letters_or_punct_but_signs
143 |               ),
144 |               any_two_letters_or_punct_but_signs
145 |           ),
146 |           asr.NotPrecededBy(
147 |               asr.FollowedBy(
148 |                   one_or_more_signs,
149 |                   any_two_digits
150 |               ),
151 |               any_two_char_sequence_containing_digits
152 |           ),
153 |           asr.NotFollowedBy(
154 |               asr.PrecededBy(
155 |                   one_or_more_signs,
156 |                   any_two_digits
157 |                 ),
158 |               any_two_char_sequence_containing_digits
159 |           )
160 |       ) + \
161 |       any_two_alphanums_or_punct_but_signs
162 | 
163 | This new form certainly looks less overwhelming that it did before,
164 | though there is still room for improvement.
165 | 
166 | Utilizing pattern chaining
167 | --------------------------
168 | In `Pattern chaining <covering-the-basics.html#pattern-chaining>`_ we saw an alternative way
169 | of building patterns, which in certain cases is to be preferred over the standard API,
170 | and it just so happens that lookarounds constitute one of these cases. Here's what our pattern
171 | looks like when we apply the pattern chaining technique in order to impose any lookaround
172 | assertions:
173 | 
174 | 
175 | .. code-block:: python
176 | 
177 |   from pregex.core import *
178 | 
179 |   one_or_more_signs = qu.OneOrMore(op.Either('+', '-'))
180 | 
181 |   any_punct_but_signs = cl.AnyPunctuation() - cl.AnyFrom('+', '-')
182 | 
183 |   any_two_letters_or_punct_but_signs = 2 * (cl.AnyLetter() | any_punct_but_signs)
184 | 
185 |   any_two_digits = 2 * cl.AnyDigit()
186 | 
187 |   any_two_char_sequence_containing_digits = op.Either(cl.Any() + AnyDigit(), cl.AnyDigit() + cl.Any())
188 | 
189 |   any_two_alphanums_or_punct_but_signs = 2 * (cl.AnyDigit() | cl.AnyLetter() | any_punct_but_signs)
190 | 
191 | 
192 |   pre: Pregex = \
193 |       any_two_alphanums_or_punct_but_signs + \
194 |       op.Either(
195 |           one_or_more_signs \
196 |               .preceded_by(any_two_letters_or_punct_but_signs) \
197 |               .followed_by(any_two_letters_or_punct_but_signs),
198 |           one_or_more_signs \
199 |               .followed_by(any_two_digits) \
200 |               .not_preceded_by(any_two_char_sequence_containing_digits) \
201 |           one_or_more_signs  \
202 |               .preceded_by(any_two_digits) \
203 |               .not_followed_by(any_two_char_sequence_containing_digits)
204 |       ) + \
205 |       any_two_alphanums_or_punct_but_signs
206 | 
207 | Having tinkered with the pattern-building process by incorporating what was discussed,
208 | it is now a lot more clear what this pattern is trying to match, which is any sequence
209 | of signs ``+`` and ``-`` that is both preceded and followed by any two-character sequence
210 | of letters, digits and punctuation marks except for ``+`` and ``-``, as long as any digits
211 | that appear within a possible match are:
212 | 
213 | 1. Found exclusively either to the left or to the right of the sign sequence.
214 | 2. Occupy the whole two-character sequence.
215 | 
216 | To give a concrete example, this pattern will match strings like ``a!+#c``, ``a!--12``
217 | and ``12+-+a#``, but it won't work for strings like ``a!#$f``, ``a!+#3`` and ``1!-a#``.
218 | 
219 | Having read all the above, try adopting these practices yourself when building
220 | patterns with PRegEx so you make the most out of it!
221 | 


--------------------------------------------------------------------------------
/docs/source/documentation/covering-the-basics.rst:
--------------------------------------------------------------------------------
  1 | ###################
  2 | Covering the Basics
  3 | ###################
  4 | 
  5 | In this section you will be learning about the :class:`~pregex.core.pre.Pregex`
  6 | class, and how instances of this class can be effectively combined together in
  7 | order to construct complex RegEx patterns.
  8 | 
  9 | The Pregex class
 10 | ============================================
 11 | 
 12 | The basic idea behind PRegEx is to provide higher-level abstractions
 13 | of RegEx patterns that are easier to read and to work with.
 14 | 
 15 | .. code-block:: python
 16 | 
 17 |    from pregex.core.quantifiers import Optional
 18 |    from pregex.core.groups import Capture
 19 |    from pregex.core.operators import Either
 20 | 
 21 |    Optional('a') # Stands for quantifier 'a?'
 22 |    Capture('a') # Stands for capturing group '(a)'
 23 |    Either('a', 'b') # Stands for alternation 'a|b'
 24 | 
 25 | Besides representing RegEx patterns, these abstractions must also be able to
 26 | serve as individual units that can be built upon. This is made possible by
 27 | having a single base class, namely :class:`~pregex.core.pre.Pregex`, from which
 28 | all other classes inherit.
 29 | 
 30 | .. code-block:: python
 31 | 
 32 |    from pregex.core.pre import Pregex
 33 |    from pregex.core.classes import AnyDigit
 34 |    from pregex.core.operators import Either
 35 |    from pregex.core.assertions import FollowedBy
 36 | 
 37 |    # These are both Pregex instances.
 38 |    digit: Pregex = AnyDigit()
 39 |    either_a_or_b: Pregex = Either('a', 'b')
 40 | 
 41 |    # This is a Pregex instance as well!
 42 |    digit_followed_by_either_a_or_b: Pregex = FollowedBy(digit, either_a_or_b)
 43 | 
 44 | Being wrapped within instances of the same type allows for these Pregex
 45 | patterns to be easily combined together into even more complex patterns.
 46 | Consider for example the code snippet below where we construct a Pregex
 47 | pattern that will match either any word that starts with "ST" or "st",
 48 | or any three-digit integer:
 49 | 
 50 | .. code-block:: python
 51 | 
 52 |    from pregex.core.operators import Either
 53 |    from pregex.core.quantifiers import OneOrMore
 54 |    from pregex.core.assertions import WordBoundary
 55 |    from pregex.core.classes import AnyLetter, AnyDigit
 56 | 
 57 |    starts_with_st = Either('ST', 'st') + OneOrMore(AnyLetter())
 58 | 
 59 |    three_digit_integer = (AnyDigit() - '0') + (2 * AnyDigit())
 60 | 
 61 |    pre = WordBoundary() + Either(starts_with_st, three_digit_integer) + WordBoundary()
 62 | 
 63 | By both using PRegEx's human-friendly syntax and breaking down the pattern into simpler
 64 | subpatterns, it is not hard to follow this pattern's construction process, as well as
 65 | what its purpose is. Furthermore, the resulting pattern is a Pregex instance itself,
 66 | and as such, it has access to all of the class's methods:
 67 | 
 68 | .. code-block:: python
 69 | 
 70 |    pre.print_pattern() # This prints '\b(?:(?:ST|st)[A-Za-z]+|[1-9]\d{2})\b'
 71 |    print(pre.get_matches('STACK station pastry must 012 446 3462')) # This prints "['STACK', 'station', '446']"
 72 | 
 73 | 
 74 | Converting a string into a Pregex instance
 75 | ============================================
 76 | In general, one can wrap any string within a Pregex instance by passing it as a 
 77 | parameter to the class's constructor. By doing this, any characters of the provided
 78 | string that require escaping are automatically escaped.
 79 | 
 80 | .. code-block:: python
 81 | 
 82 |    from pregex.core.pre import Pregex
 83 | 
 84 |    pre = Pregex('Hello.')
 85 | 
 86 |    pre.print_pattern() # This prints 'Hello\.'
 87 | 
 88 | Nevertheless, you probably won't need to do this often since any string that interacts
 89 | with a Pregex instance in any way is automatically converted into a Pregex instance itself:
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |    from pregex.core.pre import Pregex
 94 |    from pregex.core.quantifiers import Optional
 95 | 
 96 |    # These two statements are equivalent.
 97 |    pre1 = Optional(Pregex('Hello.'))
 98 |    pre2 = Optional('Hello.')
 99 | 
100 | Manually wrapping strings within Pregex instances can however be of use when one wishes
101 | to explicitly define their own RegEx pattern. In that case, one must also not forget
102 | to set the class's constructor ``escape`` parameter to ``False``, in order to disable
103 | character-escaping:
104 | 
105 | .. code-block:: python
106 | 
107 |    from pregex.core.pre import Pregex
108 | 
109 |    pre = Pregex('[a-z].?', escape=False)
110 | 
111 |    pre.print_pattern() # This prints '[a-z].?'   
112 | 
113 | Concatenating patterns with "+"
114 | ============================================
115 | There exists a separate :class:`~pregex.core.operators.Concat` class,
116 | which is specifically used to concatenate two or more patterns together.
117 | However, one can also achieve the same result by making use of Pregex's
118 | overloaded addition operator ``+``.
119 | 
120 | .. code-block:: python
121 | 
122 |    from pregex.core.pre import Pregex
123 |    from pregex.core.quantifiers import Optional
124 | 
125 |    pre = Pregex('a') + Pregex('b') + Optional('c')
126 | 
127 |    pre.print_pattern() # This prints 'abc?'
128 | 
129 | This of course works with simple strings as well, as long as there
130 | is at least one Pregex instance involved in the operation:
131 | 
132 | .. code-block:: python
133 | 
134 |    from pregex.core.quantifiers import Optional
135 | 
136 |    pre = 'a' + 'b' + Optional('c')
137 | 
138 |    pre.print_pattern() # This prints 'abc?'
139 | 
140 | Concatenating patterns this way is encouraged as it leads to much more
141 | easy-to-read code.
142 | 
143 | Repeating patterns with "*"
144 | ============================================
145 | :class:`Pregex` has one more overloaded operator, namely the multiplication operator
146 | ``*``, which essentially replaces class :class:`~pregex.core.quantifiers.Exactly`.
147 | By using this operator on a Pregex instance, one indicates that a pattern is to be
148 | repeated an exact number of times:
149 | 
150 | .. code-block:: python
151 | 
152 |    from pregex.core.pre import Pregex
153 | 
154 |    pre = 3 * Pregex('a')
155 | 
156 |    pre.print_pattern() # This prints 'a{3}'
157 | 
158 | As it is the case with the addition operator ``+``, it is recommended
159 | that one also makes use of the multiplication operator ``*`` whenever
160 | possible.
161 | 
162 | 
163 | The "empty string" pattern
164 | ================================
165 | 
166 | Invoking the ``Pregex`` class's constructor without supplying it with a
167 | value for parameter ``pattern``, causes said parameter to take its default
168 | value, that is, the empty string ``''``. This is a good starting point
169 | to begin constructing your pattern:
170 | 
171 | .. code-block:: python
172 | 
173 |    from pregex.core.pre.Pregex
174 | 
175 |    # Initialize your pattern as the empty string pattern.
176 |    pre = Pregex()
177 | 
178 |    # Start building your pattern...
179 |    for subpattern in subpatterns:
180 |       if '!' in subpattern.get_pattern():
181 |          pre = pre.concat(subpattern + '?')
182 |       else:
183 |          pre = pre.concat(subpattern + '!')
184 | 
185 | On top of that, any ``Pregex`` instance whose underlying pattern
186 | is the empty string pattern, has the following properties:
187 | 
188 | 1. Applying a quantifier to the empty string pattern results in itself:
189 | 
190 |   .. code-block:: python
191 | 
192 |     from pregex.core.pre import Pregex
193 |     from pregex.core.quantifiers import OneOrMore
194 | 
195 |     pre = OneOrMore(Pregex())
196 |     pre.print_pattern() # This prints ''
197 | 
198 | 2. Creating a group out of the empty string pattern results in itself:
199 | 
200 |   .. code-block:: python
201 | 
202 |     from pregex.core.pre import Pregex
203 |     from pregex.core.group import Group
204 | 
205 |     pre = Group(Pregex())
206 |     pre.print_pattern() # This prints ''
207 | 
208 | 3. Applying the alternation operation between the empty string
209 |    pattern and an ordinary pattern results in the latter:
210 | 
211 |   .. code-block:: python
212 | 
213 |     from pregex.core.pre import Pregex
214 |     from pregex.core.operators import Either
215 | 
216 |     pre = Either(Pregex(), 'a')
217 |     pre.print_pattern() # This prints 'a'
218 | 
219 | 4. Applying a positive lookahead assertion based on the empty
220 |    string pattern to any pattern results in that pattern:
221 | 
222 |   .. code-block:: python
223 | 
224 |     from pregex.core.pre import Pregex
225 |     from pregex.core.assertions import FollowedBy
226 | 
227 |     pre = FollowedBy('a', Pregex())
228 |     pre.print_pattern() # This prints 'a'
229 | 
230 | The above properties make it easy to write concise code
231 | like the following, without compromising your pattern:
232 | 
233 | .. code-block:: python
234 | 
235 |    from pregex.core.pre.Pregex
236 |    from pregex.core.groups import Capture
237 |    from pregex.core.operators import Either
238 |    from pregex.core.quantifiers import OneOrMore
239 | 
240 |    pre = Either(
241 |       'a',
242 |       'b' if i > 5 else Pregex(),
243 |       OneOrMore('c' if i > 10 else Pregex())
244 |    ) + Capture('d' if i > 15 else Pregex())
245 | 
246 | This is the underlying pattern of instance ``pre`` when
247 | executing the above code snippet for various values of ``i``:
248 | 
249 | * For ``i`` equal to ``1`` the resulting pattern is ``a``
250 | * For ``i`` equal to ``6`` the resulting pattern is ``a|b``
251 | * For ``i`` equal to ``11`` the resulting pattern is ``a|b|c+``
252 | * For ``i`` equal to ``16`` the resulting pattern is ``(?:a|b|c+)(d)``
253 |    
254 | 
255 | Pattern chaining
256 | ==================
257 | Apart from PRegEx's standard pattern-building API which involves
258 | wrapping strings and/or Pregex instances within other Pregex instances,
259 | there also exists a more functional-like approach to constructing patterns.
260 | More specifically, every Pregex instance has access to a number of methods
261 | that can be used so as to apply basic RegEx operators to its underlying
262 | pattern, through which process a brand new Pregex instance is generated.
263 | 
264 | .. code-block:: python
265 | 
266 |   from pregex.core.classes import AnyLetter()
267 |   from pregex.core.quantifiers import Optional()
268 | 
269 |   letter = AnyLetter()
270 | 
271 |   # Both statements are equivalent.
272 |   optional_letter_1 = Optional(letter)
273 |   optional_letter_2 = letter.optional()
274 | 
275 | By chaining many of these methods together, it is also possible
276 | to construct more complex patterns. This technique is called
277 | *pattern chaining*:
278 | 
279 | .. code-block:: python
280 | 
281 |   from pregex.core.pre import Pregex
282 | 
283 |   pre = Pregex() \
284 |       .concat('a') \
285 |       .either('b') \
286 |       .one_or_more() \
287 |       .concat('c') \
288 |       .optional() \
289 |       .concat('d') \
290 |       .match_at_line_start() \
291 |       .match_at_line_end()
292 | 
293 |   pre.print_pattern() # This prints '^(?:(?:a|b)+c)?d$'
294 | 
295 | It is generally recommended that you use the standard API when dealing
296 | with larger patterns, as it provides a way of building patterns that is
297 | usually easier to read. Be that as it may, there do exist several case
298 | where pattern chaining is the better choice of the two. In the end, it's
299 | just a matter of choice!
300 | 
301 | Check out :class:`~pregex.core.pre.Pregex` to learn what other methods this class
302 | has to offer.


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/assertions.rst:
--------------------------------------------------------------------------------
1 | ***********************
2 | pregex.core.assertions
3 | ***********************
4 | .. automodule:: pregex.core.assertions
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/classes.rst:
--------------------------------------------------------------------------------
1 | 
2 | *********************
3 | pregex.core.classes
4 | *********************
5 | .. automodule:: pregex.core.classes
6 |    :members:
7 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/groups.rst:
--------------------------------------------------------------------------------
1 | *********************
2 | pregex.core.groups
3 | *********************
4 | .. automodule:: pregex.core.groups
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/operators.rst:
--------------------------------------------------------------------------------
1 | *********************
2 | pregex.core.operators
3 | *********************
4 | .. automodule:: pregex.core.operators
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/pre.rst:
--------------------------------------------------------------------------------
1 | *********************
2 | pregex.core.pre
3 | *********************
4 | .. automodule:: pregex.core.pre
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/quantifiers.rst:
--------------------------------------------------------------------------------
1 | *************************
2 | pregex.core.quantifiers
3 | *************************
4 | .. automodule:: pregex.core.quantifiers
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/core/tokens.rst:
--------------------------------------------------------------------------------
1 | *********************
2 | pregex.core.tokens
3 | *********************
4 | .. automodule:: pregex.core.tokens
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/modules/meta/essentials.rst:
--------------------------------------------------------------------------------
1 | ***********************
2 | pregex.meta.essentials
3 | ***********************
4 | .. automodule:: pregex.meta.essentials
5 |    :members:
6 |    :undoc-members:


--------------------------------------------------------------------------------
/docs/source/documentation/subpackages.rst:
--------------------------------------------------------------------------------
  1 | #############
  2 | Subpackages
  3 | #############
  4 | 
  5 | PRegEx's modules are divided into two subpackages, namely ``pregex.core`` and
  6 | ``pregex.meta``, the former of which predominantly contains modules whose classes
  7 | represent some fundamental RegEx operator, whereas the latter acts as a collection
  8 | of various classes that build upon those within the core modules in order to provide
  9 | ready-made patterns that can be used "straight out of the box".
 10 | 
 11 | pregex.core
 12 | =================
 13 | 
 14 | In order to better understand *core* modules, consider for example
 15 | :py:mod:`pregex.core.quantifiers`, all classes of which correspond
 16 | to a unique RegEx quantifier:
 17 | 
 18 | .. code-block:: python
 19 | 
 20 |    from pregex.core.quantifiers import *
 21 | 
 22 |    Optional # Represents quantifier '?'
 23 |    Indefinite # Represents quantifier '*'
 24 |    OneOrMore # Represents quantifier '+'
 25 |    Exactly # Represents quantifier '{n}'
 26 |    AtLeast # Represents quantifier '{n,}'
 27 |    AtMost # Represents quantifier '{,n}'
 28 |    AtLeastAtMost # Represents quantifier '{n,m}'
 29 | 
 30 | However, not all core modules contain classes that represent some specific
 31 | RegEx operator. There is the :py:mod:`pregex.core.tokens` module, whose
 32 | classes act as wrappers for various single-character patterns. That is, either
 33 | to protect you from any character-escape-related issues that may arise due
 34 | to using raw strings containing backslashes, or to save you the trouble of looking
 35 | for a specific symbol's Unicode code point, provided of course that there is a
 36 | corresponding *Token* class for that symbol.
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |    from pregex.core.tokens import Newline, Copyright
 41 | 
 42 |    # Both of these statements are 'True'.
 43 |    Newline().is_exact_match('\n')
 44 |    Copyright().is_exact_match('©')
 45 | 
 46 | 
 47 | Lastly, there is module :py:mod:`pregex.core.classes` which does not only
 48 | offer a number of commonly used RegEx character classes, but a complete
 49 | framework for working on these classes as if they were regular sets.
 50 | 
 51 | .. code-block:: python
 52 | 
 53 |    from pregex.core.classes import AnyLetter, AnyDigit
 54 | 
 55 |    letter = AnyLetter() # Represents '[A-Za-z]'
 56 |    digit_but_five = AnyDigit() - '5' # Represents '[0-46-9]'
 57 |    letter_or_digit_but_five = letter | digit_but_five # Represents '[A-Za-z0-46-9]'
 58 |    any_but_letter_or_digit_but_five = ~ letter_or_digit_but_five # Represents '[^A-Za-z0-46-9]'
 59 | 
 60 | Click on any one of pregex's *core* modules below to check out its classes:
 61 | 
 62 | .. toctree::
 63 |    :maxdepth: 1
 64 | 
 65 |    modules/core/assertions
 66 |    modules/core/classes
 67 |    modules/core/groups
 68 |    modules/core/operators
 69 |    modules/core/pre
 70 |    modules/core/quantifiers
 71 |    modules/core/tokens
 72 | 
 73 | pregex.meta
 74 | =================
 75 | 
 76 | Unlike *core* modules, whose classes are all independent from each other,
 77 | *meta* modules contain classes that effectively combine various
 78 | :class:`~pregex.core.pre.Pregex` instances together in order to form
 79 | complex patterns that you can then use. Consider for example
 80 | :class:`~pregex.meta.essentials.Integer` which enables you to
 81 | match any integer within a specified range.
 82 | 
 83 | .. code-block:: python
 84 | 
 85 |    from pregex.meta.essentials import Integer
 86 | 
 87 |    text = "1 5 11 23 77 117 512 789 1011"
 88 | 
 89 |    pre = Integer(start=50, end=1000)
 90 |    
 91 |    print(pre.get_matches(text)) # This prints "['77', '117', '512', '789']"
 92 | 
 93 | Classes in *meta* modules therefore offer various patterns that can be useful,
 94 | but at the same time hard to build. And remember, no matter the complexity of
 95 | a pattern, it remains to be a Pregex instance, and as such, it can always be
 96 | extended even further!
 97 | 
 98 | .. code-block:: python
 99 | 
100 |    from pregex.core.classes import AnyLetter
101 |    from pregex.meta.essentials import Integer
102 | 
103 |    pre = AnyLetter() + Integer(start=50, end=1000, is_extensible=True)
104 |    text = "a1 b5 c11 d23 e77 f117 g512 h789 i1011"
105 | 
106 |    print(pre.get_matches(text)) # This prints "['e77', 'f117', 'g512', 'h789']"
107 | 
108 | Just don't forget to set parameter ``is_extensible`` to ``True``, as
109 | this prevents some additional assertions from being applied to the
110 | pattern, which even though are essential in order for it to be able
111 | to match what is supposed to, at the same time they might introduce
112 | certain complications when it comes to the pattern serving as a building
113 | block to a larger pattern.
114 | 
115 | Click on any one of pregex's *meta* modules below to check out its classes:
116 | 
117 | .. toctree::
118 |    :maxdepth: 1
119 | 
120 |    modules/meta/essentials


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | ******************************************
 2 | PRegEx - Programmable Regular Expressions
 3 | ******************************************
 4 | 
 5 | Welcome to pregex's documentation page! You can start by going through the
 6 | `Introduction <introduction.html>`_ section in order to get a first look at the
 7 | pregex package, as well as receive instructions on how to install it. After that,
 8 | you are free to explore the `Documentation <documentation/covering-the-basics.html>`_
 9 | section so that you learn more about building RegEx patterns with pregex,
10 | or you can even check out the source code itself by visiting pregex on
11 | `Github <https://github.com/manoss96/pregex>`_.
12 | 
13 | ==================================
14 | 
15 | .. toctree::
16 |    :maxdepth: 1
17 |    :caption: Introduction
18 | 
19 |    introduction
20 | 
21 | .. toctree::
22 |    :maxdepth: 1
23 |    :caption: Documentation
24 | 
25 |    documentation/covering-the-basics
26 |    documentation/subpackages
27 |    documentation/best-practices


--------------------------------------------------------------------------------
/docs/source/introduction.rst:
--------------------------------------------------------------------------------
  1 | .. _introduction:
  2 | 
  3 | *******************
  4 | What is PRegEx?
  5 | *******************
  6 | 
  7 | Let's face it, although RegEx is without a doubt an extremely useful tool, its syntax has been repeatedly proven to be quite hard for people to read and to memorize. This is mainly due to RegEx's declarative nature, which many programmers are not familiar with, as well as its extensive use of symbols that do not inherently relate to their functionality within a RegEx pattern, thus rendering them easy to forget. To make matters even worse, RegEx patterns are more often than not tightly packed with large amounts of information, which our brains just seem to be struggling to break down in order to analyze effectively. For these reasons, building even a simple RegEx pattern for matching URLs can prove to be quite a painful task.
  8 | 
  9 | This is where PRegEx comes in! PRegEx, which stands for Programmable Regular Expressions, is a Python package that can be used in order to construct Regular Expression patterns in a more human-friendly way. Through the use of PRegEx, one is able to fully utilize the powerful tool that is RegEx without having to deal with any of its nuisances that seem to drive people crazy! PRegEx achieves that by offering the following:
 10 | 
 11 | 1. An easy-to-remember syntax that resembles the good ol' imperative way of programming!
 12 | 2. No longer having to group patterns or escape meta characters, as both are handled internally by PRegEx!
 13 | 3. Modularity to building RegEx patterns, as one can easily break down a complex pattern into multiple simpler ones which can then be combined together.
 14 | 4. A higher-level API on top of Python's built-in "re" module, providing access to its core functionality and more, while saving you the trouble of having to deal with "re.Match" instances.
 15 | 
 16 | And remember, no matter how complex the abstraction, it's always just a pure
 17 | RegEx pattern that sits underneath which you can fetch and use any way you like!
 18 | 
 19 | *******************
 20 | Installation
 21 | *******************
 22 | 
 23 | You can start using PRegEx by installing it via pip. Note that "pregex" requires Python >= 3.9.
 24 | 
 25 | .. code-block::
 26 | 
 27 |     pip install pregex
 28 | 
 29 | *******************
 30 | Usage Example
 31 | *******************
 32 | 
 33 | In PRegEx, everything is a Programmable Regular Expression, or `Pregex` for short. This makes it easy for simple :class:`~pregex.core.pre.Pregex` instances to be combined into more complex ones! Within the code snippet below, we construct a Pregex instance that will match any URL that ends with either ".com" or ".org" as well as any IP address for which a 4-digit port number is specified. Furthermore, in the case of a URL, we would like for its domain name to be separately captured as well.
 34 | 
 35 | .. code-block:: python
 36 | 
 37 |   from pregex.core.classes import AnyLetter, AnyDigit, AnyFrom
 38 |   from pregex.core.quantifiers import Optional, AtLeastAtMost
 39 |   from pregex.core.operators import Either
 40 |   from pregex.core.groups import Capture
 41 |   from pregex.core.pre import Pregex
 42 | 
 43 |   http_protocol = Optional('http' + Optional('s') + '://')
 44 | 
 45 |   www = Optional('www.')
 46 | 
 47 |   alphanum = AnyLetter() | AnyDigit()
 48 | 
 49 |   domain_name = \
 50 |     alphanum + \
 51 |     AtLeastAtMost(alphanum | AnyFrom('-', '.'), n=1, m=61) + \
 52 |     alphanum
 53 | 
 54 |   tld = '.' + Either('com', 'org')
 55 | 
 56 |   ip_octet = AnyDigit().at_least_at_most(n=1, m=3)
 57 | 
 58 |   port_number = (AnyDigit() - '0') + 3 * AnyDigit()
 59 | 
 60 |   # Combine sub-patterns together.
 61 |   pre: Pregex = \
 62 |       http_protocol + \
 63 |       Either(
 64 |           www + Capture(domain_name) + tld,
 65 |           3 * (ip_octet + '.') + ip_octet + ':' + port_number
 66 |       )
 67 | 
 68 | We can then easily fetch the resulting Pregex instance's underlying RegEx pattern.
 69 | 
 70 | .. code-block:: python
 71 | 
 72 |     regex = pre.get_pattern()
 73 | 
 74 | 
 75 | This is the pattern that we just built. Yikes!
 76 | 
 77 | .. code-block::
 78 | 
 79 |     (?:https?:\/\/)?(?:(?:www\.)?([A-Za-z\d][A-Za-z\d\-.]{1,61}[A-Za-z\d])\.(?:com|org)|(?:\d{1,3}\.){3}\d{1,3}:[1-9]\d{3})
 80 | 
 81 | 
 82 | Besides from having access to its underlying pattern, we can use a Pregex instance to find matches within a piece of text. Consider for example the following string:
 83 | 
 84 | .. code-block:: python
 85 | 
 86 |     text = "text--192.168.1.1:8000--text--http://www.wikipedia.org--text--https://youtube.com--text"
 87 | 
 88 | By invoking the instance's :py:meth:`~pregex.pre.Pregex.get_matches` method, we are able to scan the above string for any possible matches:
 89 | 
 90 | .. code-block:: python
 91 | 
 92 |     matches = pre.get_matches(text)
 93 | 
 94 | 
 95 | Looks like there were three matches:
 96 | 
 97 | .. code-block:: python
 98 | 
 99 |     ['192.168.1.1:8000', 'http://www.wikipedia.org', 'https://youtube.com']
100 | 
101 | 
102 | Likewise, we can invoke the instance's :py:meth:`~pregex.pre.Pregex.get_captures` method to get any captured groups.
103 | 
104 | .. code-block:: python
105 | 
106 |     groups = pre.get_captures(text)
107 | 
108 | As expected, there were only two captured groups since the first match is not a URL and therefore it does not
109 | contain a domain name to be captured.
110 | 
111 | .. code-block:: python
112 | 
113 |     [(None,), ('wikipedia',), ('youtube',)]
114 | 
115 | Finally, you might have noticed that we built our pattern by utilizing
116 | various classes that were imported from modules under *pregex.core*. These
117 | modules contain classes through which the RegEx syntax is essentially replaced.
118 | However, PRegEx also includes another set of modules, namely those under
119 | subpackage *pregex.meta*, whose classes build upon those in *pregex.core* so
120 | as to provide numerous pre-built patterns that you can just import and use
121 | right away!
122 | 
123 | .. code-block:: python
124 | 
125 |   from pregex.core.pre import Pregex
126 |   from pregex.core.classes import AnyDigit
127 |   from pregex.core.operators import Either
128 |   from pregex.meta.essentials import HttpUrl, IPv4
129 | 
130 |   port_number = (AnyDigit() - '0') + 3 * AnyDigit()
131 | 
132 |   pre: Pregex = Either(
133 |       HttpUrl(capture_domain=True, is_extensible=True),
134 |       IPv4(is_extensible=True) + ':' + port_number
135 |   )
136 | 
137 | By using classes found within the *pregex.meta* subpackage, we were able to
138 | construct more or less the same pattern as before only much more easily!
139 | 
140 | 
141 | ***************************
142 | Solving Wordle with PRegEx
143 | ***************************
144 | 
145 | We are now going to see another example that better exhibits the *programmable* nature of PRegEx.
146 | More specifically, we will be creating a Wordle solver function that, given all currently known
147 | information as well as access to a 5-letter word dictionary, utilizes PRegEx in order to return
148 | a list of candidate words to choose from as a possible solution to the problem.
149 | 
150 | Formulating what is known
151 | ------------------------------
152 | 
153 | First things first, we must think of a way to represent what is known so far regarding the
154 | word that we're trying to guess. This information can be encapsulated into three distinct
155 | sets of letters:
156 | 
157 | 1. **Green letters**: Letters that are included in the word, whose position within it is known.
158 | 2. **Yellow letters**: Letters that are included in the word, and while their exact position is
159 |    unknown, there is one or more positions which we can rule out. 
160 | 3. **Gray letters**: Letters that are not included in the word.
161 | 
162 | Green letters can be represented by using a dictionary that maps integers (positions) to strings (letters).
163 | For example, ``{4 : 'T'}`` indicates that the word we are looking for contains the letter ``T`` in its
164 | fourth position. Yellow letters can also be represented as a dictionary with integer keys, whose values
165 | however are going to be lists of strings instead of regular strings, as a position might have been ruled
166 | out for more than a single letter. For example, ``{1 : ['A', 'R'], 3 : ['P']}`` indicates that even though
167 | the word contains letters ``A``, ``R`` and ``P``, it cannot start with either an ``A`` or an ``R`` as
168 | well as it cannot have the letter ``P`` occupying its third position. Finally, gray letters can be simply
169 | stored in a list.
170 | 
171 | In order to have a concrete example to work with, we will be assuming that our current
172 | information about the problem is expressed by the following three data structures:
173 | 
174 | .. code-block:: python
175 | 
176 |   green: dict[int, str] = {4 : 'T'}
177 |   yellow: dict[int, list[str]] = {1 : ['A', 'R'], 3 : ['P']}
178 |   gray: list[str] = ['C', 'D', 'L', 'M', 'N', 'Q', 'U']
179 | 
180 | 
181 | Initializing a Pregex class instance
182 | ----------------------------------------
183 | 
184 | Having come up with a way of programmatically formulating the problem, the first step towards
185 | actually solving it would be to create a ``Pregex`` class instance:
186 | 
187 | .. code-block:: python
188 | 
189 |   wordle = Pregex()
190 | 
191 | Since we aren't providing a ``pattern`` parameter to the class's constructor, it automatically
192 | defaults to the empty string ``''``. Thus, through this instance we now have access to all methods
193 | of the ``Pregex`` class, though we are not really able to match anything with it yet.
194 | 
195 | 
196 | Yellow letter assertions
197 | ----------------------------------------
198 | 
199 | Before we go on to dictate what the valid letters for each position within the word
200 | are, we are first going to deal with yellow letters, that is, letters which we know are
201 | included in the word that we are looking for, though their position is still uncertain.
202 | Since we know for a fact that the sought out word contains these letters, we have to
203 | somehow make sure that any candidate word includes them as well. This can easily be
204 | done by using what is known in RegEx lingo as a *positive lookahead assertion*,
205 | represented in PRegEx by the less intimidating *FollowedBy*! Assertions are used in
206 | order to *assert* something about a pattern without really having to *match* any additional
207 | characters. A positive lookahead assertion, in particular, dictates that the pattern to which
208 | it is applied must be followed by some other pattern in order for the former to constitute
209 | a valid match.
210 | 
211 | In PRegEx, one is able to create a ``Pregex`` instance out of applying a positive
212 | lookahead assertion to some pattern ``p1`` by doing the following:
213 | 
214 | .. code-block:: python
215 | 
216 |   from pregex.core.assertions import FollowedBy
217 | 
218 |   pre = FollowedBy(p1, p2)
219 | 
220 | where both ``p1`` and ``p2`` are either strings or ``Pregex`` instances. Futhermore, in the
221 | case that ``p1`` already is a ``Pregex`` class instance, one can achieve the same result with:
222 | 
223 | .. code-block:: python
224 | 
225 |   pre = p1.followed_by(p2)
226 | 
227 | 
228 | Having initialized ``wordle`` as a ``Pregex`` instance, we can simply simply do
229 | ``wordle.followed_by(some_pattern)`` so as to indicate that any potential match
230 | with ``wordle`` must be followed by ``some_pattern``. Recall that ``wordle`` merely
231 | represents the empty string, so we are not really matching anything at this point.
232 | Applying an assertion to the empty string pattern is just a neat little trick one
233 | can use in order to validate something about their pattern before they even begin
234 | to build it.
235 | 
236 | Now it's just a matter of figuring out what the value of ``some_pattern`` is.
237 | Surely we can't just do ``wordle = wordle.followed_by(letter)``, as this results
238 | in ``letter`` always having to be at the beginning of the word. Here's however what
239 | we can do: It follows from the rules of Wordle that all words must be comprised of five
240 | letters, any of which is potentially a yellow letter. Thus, every yellow letter is certain
241 | to be preceded by up to four other letters, but no more than that. Therefore, we need a
242 | pattern that represents just that, namely *four letters at most*. By applying quantifier
243 | ``at_most(n=4)`` to an instance of ``AnyUppercaseLetter()``, we are able to create such
244 | a pattern. Add a yellow letter to its right and we have our ``some_pattern``. Since there
245 | may be more than one yellow letters, we make sure that we iterate them all one by one so
246 | as to enforce a separate assertion for each:
247 | 
248 | .. code-block:: python
249 | 
250 |   from pregex.core.classes import AnyUppercaseLetter
251 | 
252 |   yellow_letters_list: list[str] = [l for letter_list in yellow.values() for l in letter_list]
253 | 
254 |   at_most_four_letters = AnyUppercaseLetter().at_most(n=4)
255 | 
256 |   for letter in yellow_letters_list:
257 |       wordle = wordle.followed_by(at_most_four_letters + letter)
258 | 
259 | By executing the above code snippet we get a ``Pregex`` instance which
260 | represents the following RegEx pattern:
261 | 
262 | .. code-block::
263 | 
264 |   (?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)
265 | 
266 | Building valid character classes
267 | ----------------------------------------
268 | 
269 | After we have made sure that our pattern will reject any words that do not contain
270 | all the yellow letters, we can finally start building the part of the pattern that
271 | will handle the actual matching. This can easily be achived by performing five
272 | iterations, one for each letter of the word, where at each iteration ``i`` we
273 | construct a new character class, which is then appended to our pattern based
274 | on the following logic:
275 | 
276 | * If the letter that corresponds to the word's i-th position is known, then
277 |   make it so that the pattern only matches that letter at that position.
278 | 
279 | * If the letter that corresponds to the word's i-th position is not known,
280 |   then make it so that the pattern matches any letter except for gray letters,
281 |   green letters, as well as any yellow letters that may have been ruled out for
282 |   that exact position.
283 | 
284 | The following code snippet does just that:
285 | 
286 | .. code-block:: python
287 | 
288 |   from pregex.core.classes import AnyFrom
289 | 
290 |   for i in range(1, 6):
291 |       if i in green:
292 |           wordle += green[i]
293 |       else:
294 |           invalid_chars_at_pos_i = gray + list(green.values())
295 |           if i in yellow:
296 |               invalid_chars_at_pos_i += yellow[i]
297 |           wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)
298 | 
299 | After executing the above code, ``wordle`` will contain the following
300 | RegEx pattern:
301 | 
302 | .. code-block::
303 | 
304 |   (?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)[BE-KOPSV-Z][ABE-KOPRSV-Z][ABE-KORSV-Z]T[ABE-KOPRSV-Z]
305 | 
306 | Matching from a dictionary
307 | ---------------------------
308 | 
309 | Having built our pattern, the only thing left to do is to actually use it to
310 | match candidate words. Provided that we have access to a text file containing
311 | all possible Wordle words, we are able to invoke our ``Pregex`` instance's
312 | ``get_matches`` method in order to scan said text file for any potential matches. 
313 | 
314 | .. code-block:: python
315 | 
316 |   words = wordle.get_matches('word_dictionary.txt', is_path=True)
317 | 
318 | Putting it all together
319 | ----------------------------------------
320 | 
321 | Finally, we combine together everything we discussed into a single function that
322 | spews out a list of words which satisfy all necessary conditions so that they
323 | constitute possible solutions to the problem.
324 | 
325 | .. code-block:: python
326 | 
327 |   def wordle_solver(green: dict[int, str], yellow: dict[int, list[str]], gray: list[str]) -> list[str]:
328 | 
329 |       from pregex.core.pre import Pregex
330 |       from pregex.core.classes import AnyUpperCaseLetter, AnyFrom
331 | 
332 |       # Initialize pattern as the empty string pattern.
333 |       wordle = Pregex()
334 | 
335 |       # This part ensures that yellow letters
336 |       # will appear at least once within the word.
337 |       yellow_letters_list = [l for letter_list in yellow.values() for l in letter_list]
338 |       at_most_four_letters = AnyUppercaseLetter().at_most(n=4)
339 |       for letter in yellow_letters_list:
340 |           wordle = wordle.followed_by(at_most_four_letters + letter)
341 | 
342 |       # This part actually dictates the set of valid letters
343 |       # for each position within the word.
344 |       for i in range(1, 6):
345 |           if i in green:
346 |               wordle += green[i]
347 |           else:
348 |               invalid_chars_at_pos_i = gray + list(green.values())
349 |               if i in yellow:
350 |                   invalid_chars_at_pos_i += yellow[i]
351 |               wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)
352 | 
353 |       # Match candidate words from dictionary and return them in a list.
354 |       return wordle.get_matches('word_dictionary.txt', is_path=True)
355 | 
356 | 
357 | By invoking the above function we get the following list of words:
358 | 
359 | .. code-block:: python
360 | 
361 |   word_candidates = wordle_solver(green, yellow, gray)
362 | 
363 |   print(word_candidates) # This prints ['PARTY']
364 | 
365 | Looks like there is only one candidate word, which means that we
366 | can consider our problem solved!
367 | 
368 | You can learn more about PRegEx by going through the
369 | `Documentation <documentation/covering-the-basics.html>`_
370 | section or by directly visiting PRegEx on
371 | `Github <https://github.com/manoss96/pregex>`_
372 | in order to check out the source code itself.


--------------------------------------------------------------------------------
/docs/source/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manoss96/pregex/33861d8e141116c670d4765ed2fdbc0eaf077114/docs/source/logo.png


--------------------------------------------------------------------------------
/docs/source/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==5.3.0
2 | sphinx-rtd-theme==1.1.1
3 | readthedocs-sphinx-ext==2.2.0
4 | sphinxcontrib-applehelp==1.0.2
5 | sphinxcontrib-devhelp==1.0.2
6 | sphinxcontrib-htmlhelp==2.0.0
7 | sphinxcontrib-jsmath==1.0.1
8 | sphinxcontrib-qthelp==1.0.3
9 | sphinxcontrib-serializinghtml==1.1.5


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pregex"
 3 | version = "2.3.3"
 4 | authors = [
 5 |   {email = "manosstoumpos@gmail.com"},
 6 |   {name = "Manos Stoumpos"}
 7 | ]
 8 | description = "PRegEx - Programmable Regular Expressions"
 9 | keywords = ["regex"]
10 | readme = "README.md"
11 | license = {text = "MIT"}
12 | requires-python = ">=3.9"
13 | classifiers = [
14 |     "Programming Language :: Python :: 3",
15 |     "License :: OSI Approved :: MIT License",
16 |     "Operating System :: OS Independent",
17 | ]
18 | 
19 | [project.urls]
20 | "Homepage" = "https://github.com/manoss96/pregex"
21 | "Bug Tracker" = "https://github.com/manoss96/pregex/issues"
22 | "Documentation" = "https://pregex.rtfd.io"


--------------------------------------------------------------------------------
/src/pregex/__init__.py:
--------------------------------------------------------------------------------
1 | from pregex.core import *
2 | from pregex.meta import *


--------------------------------------------------------------------------------
/src/pregex/core/__init__.py:
--------------------------------------------------------------------------------
1 | from pregex.core import assertions as asr
2 | from pregex.core import classes as cl
3 | from pregex.core import groups as gr
4 | from pregex.core import operators as op
5 | from pregex.core import quantifiers as qu
6 | from pregex.core import tokens as tk
7 | from pregex.core.pre import Pregex


--------------------------------------------------------------------------------
/src/pregex/core/assertions.py:
--------------------------------------------------------------------------------
  1 | __doc__ = """
  2 | All classes within this module "assert" something about the provided pattern
  3 | without having to match any additional characters. For example, :class:`MatchAtStart`
  4 | ensures that the provided pattern matches only when it is found at the start of the string,
  5 | while :class:`NotFollowedBy` asserts that a match must not be followed by one or more
  6 | specified patterns. Another thing you should keep in mind is that many of these assertions
  7 | cannot be repeated, as attempting that will cause a ``CannotBeRepeatedException`` exception
  8 | to be thrown.
  9 | 
 10 | Classes & methods
 11 | -------------------------------------------
 12 | 
 13 | Below are listed all classes within :py:mod:`pregex.core.assertions`
 14 | along with any possible methods they may possess.
 15 | """
 16 | 
 17 | 
 18 | import pregex.core.pre as _pre
 19 | import pregex.core.exceptions as _ex
 20 | from typing import Union as _Union
 21 | 
 22 | 
 23 | class __Assertion(_pre.Pregex):
 24 |     '''
 25 |     Constitutes the base class for `__Anchor` and `__Lookaround` classes.
 26 | 
 27 |     :param str pattern: The RegEx pattern which represents the assertion.
 28 |     '''
 29 |     def __init__(self, pattern: str):
 30 |         '''
 31 |         Constitutes the base class for `__Anchor` and `__Lookaround` classes.
 32 | 
 33 |         :param str pattern: The RegEx pattern which represents the assertion.
 34 |         '''
 35 |         super().__init__(pattern, escape=False)
 36 | 
 37 | 
 38 | class __Anchor(__Assertion):
 39 |     '''
 40 |     Constitutes the base class for all `anchor` classes that are part of this module.
 41 | 
 42 |     :param Pregex | str pre: A Pregex instance or string representing the `anchor` pattern.
 43 |     :param (Pregex => str) transform: A `transform` function for the provided pattern.
 44 | 
 45 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a ``Pregex`` instance \
 46 |         nor a string.
 47 |     '''
 48 |     def __init__(self, pre: _Union[_pre.Pregex, str], transform):
 49 |         '''
 50 |         Constitutes the base class for all `anchor` classes that are part of this module.
 51 | 
 52 |         :param Pregex | str pre: A Pregex instance or string representing the `anchor` pattern.
 53 |         :param (Pregex => str) transform: A `transform` function for the provided pattern.
 54 | 
 55 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a ``Pregex`` instance \
 56 |             nor a string.
 57 |         '''
 58 |         super().__init__(str(transform(__class__._to_pregex(pre))))
 59 | 
 60 | 
 61 | class __Lookaround(__Assertion):
 62 |     '''
 63 |     Constitutes the base class for all "Lookaround" classes.
 64 | 
 65 |     :param Pregex | str pres: Two or more Pregex instances, the first of which always \
 66 |         represents the `match` pattern, while the rest constitute `assertion` patterns.
 67 |     :param (tuple[Pregex | str] => str) transform: A `transform` function for the provided patterns.
 68 | 
 69 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
 70 |     :raises EmptyNegativeAssertionException: The empty string is provided \
 71 |         as one of the assertion patterns.
 72 |     '''
 73 |     def __init__(self, pres: tuple[_Union[_pre.Pregex, str]], transform) -> _pre.Pregex:
 74 |         '''
 75 |         Constitutes the base class for all "Lookaround" classes.
 76 | 
 77 |         :param Pregex | str pres: Two or more Pregex instances, the first of which always \
 78 |             represents the `match` pattern, while the rest constitute `assertion` patterns.
 79 |         :param (tuple[Pregex | str] => str) transform: A `transform` function for the provided patterns.
 80 | 
 81 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
 82 |         :raises EmptyNegativeAssertionException: The empty string is provided \
 83 |             as one of the assertion patterns.
 84 |         '''
 85 |         if len(pres) < 2:
 86 |             message = "At least one assertion pattern is required."
 87 |             raise _ex.NotEnoughArgumentsException(message)
 88 |         result = __class__._to_pregex(pres[0])
 89 |         for pre in pres[1:]:
 90 |             result = transform(result, pre)
 91 |         super().__init__(str(result))
 92 | 
 93 | 
 94 | class MatchAtStart(__Anchor):
 95 |     '''
 96 |     Matches the provided pattern only if it is at the start of the string.
 97 | 
 98 |     :param Pregex | str pre: The pattern that is to be matched.
 99 | 
100 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
101 |         ``Pregex`` instance nor a string.
102 | 
103 |     :note: The resulting pattern cannot have a repeating quantifier applied to it.
104 |     '''
105 | 
106 |     def __init__(self, pre: _Union[_pre.Pregex, str]):
107 |         '''
108 |         Matches the provided pattern only if it is at the start of the string.
109 | 
110 |         :param Pregex | str pre: The pattern that is to be matched.
111 | 
112 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
113 |             ``Pregex`` instance nor a string.
114 | 
115 |         :note: The resulting pattern cannot have a repeating quantifier applied to it.
116 |         '''
117 |         super().__init__(pre, lambda pre: pre.match_at_start())
118 | 
119 | 
120 | class MatchAtEnd(__Anchor):
121 |     '''
122 |     Matches the provided pattern only if it is at the end of the string.
123 | 
124 |     :param Pregex | str pre: The pattern that is to be matched.
125 | 
126 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
127 |         ``Pregex`` instance nor a string.
128 | 
129 |     :note: The resulting pattern cannot have a repeating quantifier applied to it.
130 |     '''
131 | 
132 |     def __init__(self, pre: _Union[_pre.Pregex, str]):
133 |         '''
134 |         Matches the provided pattern only if it is at the end of the string.
135 | 
136 |         :param Pregex | str pre: The pattern that is to be matched.
137 | 
138 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
139 |             ``Pregex`` instance nor a string.
140 | 
141 |         :note: The resulting pattern cannot have a repeating quantifier applied to it.
142 |         '''
143 |         super().__init__(pre, lambda pre: pre.match_at_end())
144 | 
145 | 
146 | class MatchAtLineStart(__Anchor):
147 |     '''
148 |     Matches the provided pattern only if it is at the start of a line.
149 | 
150 |     :param Pregex | str pre: The pattern that is to be matched.
151 | 
152 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
153 |         ``Pregex`` instance nor a string.
154 | 
155 |     :note:
156 |         - The resulting pattern cannot have a repeating quantifier applied to it.
157 |         - Uses meta character ``^`` since the `MULTILINE` flag is considered on.
158 |     '''
159 | 
160 |     def __init__(self, pre: _Union[_pre.Pregex, str]):
161 |         '''
162 |         Matches the provided pattern only if it is at the start of a line.
163 | 
164 |         :param Pregex | str pre: The pattern that is to be matched.
165 | 
166 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
167 |             ``Pregex`` instance nor a string.
168 | 
169 |         :note:
170 |             - The resulting pattern cannot have a repeating quantifier applied to it.
171 |             - Uses meta character ``^`` since the `MULTILINE` flag is considered on.
172 |         '''
173 |         super().__init__(pre, lambda pre: pre.match_at_line_start())
174 | 
175 | 
176 | class MatchAtLineEnd(__Anchor):
177 |     '''
178 |     Matches the provided pattern only if it is at the end of a line.
179 | 
180 |     :param Pregex | str pre: The pattern that is to be matched.
181 | 
182 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
183 |         ``Pregex`` instance nor a string.
184 | 
185 |     :note:
186 |         - The resulting pattern cannot have a repeating quantifier applied to it.
187 |         - Uses meta character ``$`` since the `MULTILINE` flag is considered on.
188 |     '''
189 | 
190 |     def __init__(self, pre: _Union[_pre.Pregex, str]):
191 |         '''
192 |         Matches the provided pattern only if it is at the end of a line.
193 | 
194 |         :param Pregex | str pre: The pattern that is to be matched.
195 | 
196 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
197 |             ``Pregex`` instance nor a string.
198 | 
199 |         :note:
200 |             - The resulting pattern cannot have a repeating quantifier applied to it.
201 |             - Uses meta character ``$`` since the `MULTILINE` flag is considered on.
202 |         '''
203 |         super().__init__(pre, lambda pre: pre.match_at_line_end())
204 | 
205 | 
206 | class WordBoundary(__Anchor):
207 |     '''
208 |     Asserts that the position, at which an instance of this class is placed, \
209 |     must constitute a word boundary.
210 |     '''
211 | 
212 |     def __init__(self):
213 |         '''
214 |         Asserts that the position, at which an instance of this class is placed, \
215 |         must constitute a word boundary.
216 |         '''
217 |         super().__init__(_pre.Pregex(), lambda pre: pre.concat(_pre.Pregex("\\b", escape=False)))
218 | 
219 | 
220 | class NonWordBoundary(__Anchor):
221 |     '''
222 |     Asserts that the position, at which an instance of this class is placed, \
223 |     must not constitute a word boundary.
224 |     '''
225 | 
226 |     def __init__(self):
227 |         '''
228 |         Asserts that the position, at which an instance of this class is placed, \
229 |         must not constitute a word boundary.
230 |         '''
231 |         super().__init__(_pre.Pregex(), lambda pre: pre.concat(_pre.Pregex("\\B", escape=False)))
232 | 
233 | 
234 | class FollowedBy(__Lookaround):
235 |     '''
236 |     Matches pattern ``match`` only if it is directly followed \
237 |     by all of the provided ``assertion`` patterns.
238 | 
239 |     :param Pregex | str match: A Pregex instance or string \
240 |         representing the `match` pattern.
241 |     :param Pregex | str \*assertions: One or more patterns, all of which must \
242 |         come right after pattern ``match`` in order for it to be considered a match.
243 | 
244 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
245 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
246 |         is neither a ``Pregex`` instance nor a string.
247 | 
248 |     :note: The resulting pattern cannot have a repeating quantifier applied to it.
249 |     '''
250 | 
251 |     def __init__(self, match: _Union[_pre.Pregex, str], *assertions: _Union[_pre.Pregex, str]):
252 |         '''
253 |         Matches pattern ``match`` only if it is directly followed \
254 |         by all of the provided ``assertion`` patterns.
255 | 
256 |         :param Pregex | str match: A Pregex instance or string \
257 |             representing the `match` pattern.
258 |         :param Pregex | str \*assertions: One or more patterns, all of which must \
259 |             come right after pattern ``match`` in order for it to be considered a match.
260 | 
261 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
262 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
263 |             is neither a ``Pregex`` instance nor a string.
264 | 
265 |         :note: The resulting pattern cannot have a repeating quantifier applied to it.
266 |         '''
267 |         super().__init__((match, *assertions), lambda pre1, pre2: pre1.followed_by(pre2))
268 | 
269 | 
270 | class PrecededBy(__Lookaround):
271 |     '''
272 |     Matches pattern ``match`` only if it is directly preceded \
273 |     by all of the provided ``assertion`` patterns.
274 | 
275 |     :param Pregex | str match: A Pregex instance or string \
276 |         representing the `match` pattern.
277 |     :param Pregex | str \*assertions: One or more patterns, all of which must \
278 |         come right before pattern ``match`` in order for it to be considered a match.
279 | 
280 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
281 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
282 |         is neither a ``Pregex`` instance nor a string.
283 |     :raises NonFixedWidthPatternException: Parameter ``assertion`` \
284 |         corresponds to a pattern that does not have a fixed width.
285 | 
286 |     :note: The resulting pattern cannot have a repeating quantifier applied to it.
287 |     '''
288 | 
289 |     def __init__(self, match: _Union[_pre.Pregex, str], *assertions: _Union[_pre.Pregex, str]):
290 |         '''
291 |         Matches pattern ``match`` only if it is directly preceded \
292 |         by all of the provided ``assertion`` patterns.
293 | 
294 |         :param Pregex | str match: A Pregex instance or string \
295 |             representing the `match` pattern.
296 |         :param Pregex | str \*assertions: One or more patterns, all of which must \
297 |             come right before pattern ``match`` in order for it to be considered a match.
298 | 
299 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
300 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
301 |             is neither a ``Pregex`` instance nor a string.
302 |         :raises NonFixedWidthPatternException: Parameter ``assertion`` \
303 |             corresponds to a pattern that does not have a fixed width.
304 | 
305 |         :note: The resulting pattern cannot have a repeating quantifier applied to it.
306 |         '''
307 |         super().__init__((match, *assertions), lambda pre1, pre2: pre1.preceded_by(pre2))
308 | 
309 | 
310 | class EnclosedBy(__Lookaround):
311 |     '''
312 |     Matches pattern ``match`` only if it is both directly preceded \
313 |     and followed by all of the provided ``assertion`` patterns.
314 | 
315 |     :param Pregex | str match: A Pregex instance or string \
316 |         representing the `match` pattern.
317 |     :param Pregex | str \*assertions: One or more patterns, all of which must \
318 |         come both right before and right after pattern ``match`` in order for \
319 |         it to be considered a match.
320 | 
321 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
322 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
323 |         is neither a ``Pregex`` instance nor a string.
324 |     :raises NonFixedWidthPatternException: Parameter ``assertion`` \
325 |         corresponds to a pattern that does not have a fixed width.
326 | 
327 |     :note: The resulting pattern cannot have a repeating quantifier applied to it.
328 |     '''
329 | 
330 |     def __init__(self, match: _Union[_pre.Pregex, str], *assertions: _Union[_pre.Pregex, str]):
331 |         '''
332 |         Matches pattern ``match`` only if it is both directly preceded \
333 |         and followed by all of the provided ``assertion`` patterns.
334 | 
335 |         :param Pregex | str match: A Pregex instance or string \
336 |             representing the `match` pattern.
337 |         :param Pregex | str \*assertions: One or more patterns, all of which must \
338 |             come both right before and right after pattern ``match`` in order for \
339 |             it to be considered a match.
340 | 
341 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
342 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
343 |             is neither a ``Pregex`` instance nor a string.
344 |         :raises NonFixedWidthPatternException: Parameter ``assertion`` \
345 |             corresponds to a pattern that does not have a fixed width.
346 | 
347 |         :note: The resulting pattern cannot have a repeating quantifier applied to it.
348 |         '''
349 |         super().__init__((match, *assertions), lambda pre1, pre2: pre1.enclosed_by(pre2))
350 | 
351 | 
352 | class NotFollowedBy(__Lookaround):
353 |     '''
354 |     Matches pattern ``match`` only if it is not directly followed by \
355 |     any one of the provided ``assertions`` patterns.
356 | 
357 |     :param Pregex | str match: The pattern that is to be matched.
358 |     :param Pregex | str \*assertions: One or more patterns, none of which must \
359 |         come right after pattern ``match`` in order for it to be considered a match.
360 | 
361 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
362 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
363 |         is neither a ``Pregex`` instance nor a string.
364 |     :raises EmptyNegativeAssertionException: At least one of the provided assertion \
365 |         patterns is the empty-string pattern.
366 |     '''
367 | 
368 |     def __init__(self, match: _Union[_pre.Pregex, str], *assertions: _Union[_pre.Pregex, str]):
369 |         '''
370 |         Matches pattern ``match`` only if it is not directly followed by \
371 |         any one of the provided ``assertions`` patterns.
372 | 
373 |         :param Pregex | str match: The pattern that is to be matched.
374 |         :param Pregex | str \*assertions: One or more patterns, none of which must \
375 |             come right after pattern ``match`` in order for it to be considered a match.
376 | 
377 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
378 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
379 |             is neither a ``Pregex`` instance nor a string.
380 |         :raises EmptyNegativeAssertionException: At least one of the provided assertion \
381 |             patterns is the empty-string pattern.
382 |         '''
383 |         super().__init__((match, *assertions),
384 |             lambda pre1, pre2: pre1.not_followed_by(pre2))
385 | 
386 | 
387 | class NotPrecededBy(__Lookaround):
388 |     '''
389 |     Matches pattern ``match`` only if it is not directly preceded by \
390 |     any one of the provided ``assertions`` patterns.
391 | 
392 |     :param Pregex | str match: The pattern that is to be matched.
393 |     :param Pregex | str \*assertions: One or more patterns, none of which must \
394 |         come right before pattern ``match`` in order for it to be considered a match.
395 | 
396 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
397 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
398 |         is neither a ``Pregex`` instance nor a string.
399 |     :raises EmptyNegativeAssertionException: At least one of the provided assertion \
400 |         patterns is the empty-string pattern.
401 |     :raises NonFixedWidthPatternException: At least one of the provided assertion \
402 |         patterns does not have a fixed width.
403 |     '''
404 | 
405 |     def __init__(self, match: _Union[_pre.Pregex, str], *assertions: _Union[_pre.Pregex, str]):
406 |         '''
407 |         Matches pattern ``match`` only if it is not directly preceded by \
408 |         any one of the provided ``assertions`` patterns.
409 | 
410 |         :param Pregex | str match: The pattern that is to be matched.
411 |         :param Pregex | str \*assertions: One or more patterns, none of which must \
412 |             come right before pattern ``match`` in order for it to be considered a match.
413 | 
414 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
415 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
416 |             is neither a ``Pregex`` instance nor a string.
417 |         :raises EmptyNegativeAssertionException: At least one of the provided assertion \
418 |             patterns is the empty-string pattern.
419 |         :raises NonFixedWidthPatternException: At least one of the provided assertion \
420 |             patterns does not have a fixed width.
421 |         '''
422 |         super().__init__((match, *assertions),
423 |             lambda pre1, pre2: pre1.not_preceded_by(pre2))
424 | 
425 | 
426 | class NotEnclosedBy(__Lookaround):
427 |     '''
428 |     Matches pattern ``match`` only if it is neither directly preceded \
429 |     nor followed by any one of the provided ``assertions`` patterns.
430 | 
431 |     :param Pregex | str match: The pattern that is to be matched.
432 |     :param Pregex | str \*assertions: One or more patterns, none of which must \
433 |         come either right before or right after pattern ``match`` in order for \
434 |         it to be considered a match.
435 | 
436 |     :raises NotEnoughArgumentsException: No assertion patterns were provided.
437 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
438 |         is neither a ``Pregex`` instance nor a string.
439 |     :raises EmptyNegativeAssertionException: At least one of the provided assertion \
440 |         patterns is the empty-string pattern.
441 |     :raises NonFixedWidthPatternException: At least one of the provided assertion \
442 |         patterns does not have a fixed width.
443 |     '''
444 | 
445 |     def __init__(self, match: _Union[_pre.Pregex, str], *assertions: _Union[_pre.Pregex, str]):
446 |         '''
447 |         Matches pattern ``match`` only if it is neither directly preceded \
448 |         nor followed by any one of the provided ``assertions`` patterns.
449 | 
450 |         :param Pregex | str match: The pattern that is to be matched.
451 |         :param Pregex | str \*assertions: One or more patterns, none of which must \
452 |             come either right before or right after pattern ``match`` in order for \
453 |             it to be considered a match.
454 | 
455 |         :raises NotEnoughArgumentsException: No assertion patterns were provided.
456 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
457 |             is neither a ``Pregex`` instance nor a string.
458 |         :raises EmptyNegativeAssertionException: At least one of the provided assertion \
459 |             patterns is the empty-string pattern.
460 |         :raises NonFixedWidthPatternException: At least one of the provided assertion \
461 |             patterns does not have a fixed width.
462 |         '''
463 |         super().__init__((match, *assertions),
464 |             lambda pre1, pre2: pre1.not_enclosed_by(pre2))


--------------------------------------------------------------------------------
/src/pregex/core/exceptions.py:
--------------------------------------------------------------------------------
  1 | class InvalidArgumentValueException(Exception):
  2 |     '''
  3 |     This exception is thrown whenever an argument of invalid value is provided.
  4 | 
  5 |     :param str message: The message that is to be displayed \
  6 |         along with the exception.
  7 |     '''
  8 | 
  9 |     def __init__(self, message: str):
 10 |         '''
 11 |         This exception is thrown whenever an argument of invalid value is provided.
 12 | 
 13 |         :param str message: The message that is to be displayed \
 14 |             along with the exception.
 15 |         '''
 16 |         super().__init__(message)
 17 | 
 18 | 
 19 | class InvalidArgumentTypeException(Exception):
 20 |     '''
 21 |     This exception is thrown whenever an argument of invalid type is provided.
 22 | 
 23 |     :param str message: The message that is to be displayed \
 24 |         along with the exception.
 25 |     '''
 26 | 
 27 |     def __init__(self, message: str):
 28 |         '''
 29 |         This exception is thrown whenever an argument of invalid type is provided.
 30 | 
 31 |         :param str message: The message that is to be displayed \
 32 |             along with the exception.
 33 |         '''
 34 |         super().__init__(message)
 35 | 
 36 | 
 37 | class NotEnoughArgumentsException(Exception):
 38 |     '''
 39 |     This exception is thrown whenever an insufficient amount \
 40 |     of arguments is provided.
 41 | 
 42 |     :param str message: The message that is to be displayed \
 43 |         along with the exception.
 44 |     '''
 45 | 
 46 |     def __init__(self, message: str):
 47 |         '''
 48 |         This exception is thrown whenever an insufficient amount \
 49 |         of arguments is provided.
 50 | 
 51 |         :param str message: The message that is to be displayed \
 52 |             along with the exception.
 53 |         '''
 54 |         super().__init__(message)
 55 | 
 56 | 
 57 | class InvalidCapturingGroupNameException(Exception):
 58 |     '''
 59 |     This exception is thrown whenever an invalid name \
 60 |     for a capturing group was provided.
 61 | 
 62 |     :param str name: The string type argument because of which this exception was thrown.
 63 |     '''
 64 | 
 65 |     def __init__(self, name: str):
 66 |         '''
 67 |         This exception is thrown whenever an invalid name \
 68 |         for a capturing group was provided.
 69 | 
 70 |         :param str name: The string type argument because of which this exception was thrown.
 71 |         '''
 72 |         super().__init__(f"Name \"{name}\" is not valid. A capturing group's " +
 73 |         "name must be an alphanumeric sequence that starts with a non-digit.")
 74 | 
 75 | 
 76 | class CannotBeNegatedException(Exception):
 77 |     '''
 78 |     This exception is thrown whenever one tries to negate class ``Any``.
 79 |     '''
 80 |     def __init__(self):
 81 |         '''
 82 |         This exception is thrown whenever one tries to negate class ``Any``.
 83 |         '''
 84 |         super().__init__(f"Class \"Any\" cannot be negated.")
 85 | 
 86 | 
 87 | class CannotBeUnionedException(Exception):
 88 |     '''
 89 |     This exception is thrown whenever one tries to union a class (or negated class) \
 90 |     either with a negated class (or regular class) or an object of different type.
 91 | 
 92 |     :param Pregex pre: The ``Pregex`` instance because of which this exception was thrown.
 93 |     :param bool are_both_classes: Indicates whether both ``Pregex`` instances are of \ 
 94 |         type ``__Class``.
 95 |     '''
 96 | 
 97 |     def __init__(self, pre, are_both_classes: bool):
 98 |         '''
 99 |         This exception is thrown whenever one tries to union a class (or negated class) \
100 |         either with a negated class (or regular class) or an object of different type.
101 | 
102 |         :param Pregex pre: The ``Pregex`` instance because of which this exception was thrown.
103 |         :param bool are_both_classes: Indicates whether both ``Pregex`` instances are of \ 
104 |             type ``__Class``.
105 |         '''
106 |         m = f"Classes and negated classes cannot be unioned together." if are_both_classes \
107 |             else f"Instance of type \"{type(pre).__name__}\" cannot be unioned with a class."
108 |         super().__init__(m)
109 | 
110 | 
111 | class CannotBeSubtractedException(Exception):
112 |     '''
113 |     This exception is thrown whenever one tries to subtract a class (or negated class) \
114 |     either from a negated class (or regular class) or an object of different type.
115 | 
116 |     :param Pregex pre: The ``Pregex`` instance because of which this exception was thrown.
117 |     :param bool are_both_classes: Indicates whether both ``Pregex`` instances are of type ``__Class``.
118 |     '''
119 | 
120 |     def __init__(self, pre, are_both_classes: bool):
121 |         '''
122 |         This exception is thrown whenever one tries to subtract a class (or negated class) \
123 |         either from a negated class (or regular class) or an object of different type.
124 | 
125 |         :param Pregex pre: The ``Pregex`` instance because of which this exception was thrown.
126 |         :param bool are_both_classes: Indicates whether both ``Pregex`` instances are of type ``__Class``.
127 |         '''
128 |         m = f"Classes and negated classes cannot be subtracted from one another." if are_both_classes \
129 |             else f"Instance of type \"{type(pre).__name__}\" cannot be subtracted from a class."
130 |         super().__init__(m)
131 | 
132 | 
133 | class GlobalWordCharSubtractionException(Exception):
134 |     '''
135 |     This exception is thrown whenever one tries to subtract from an instance of \
136 |     either one of ``AnyWordChar`` or ``AnyButWordChar`` classes, for which parameter \
137 |     "is_global" has been set to ``True``.
138 | 
139 |     :param AnyWordChar | AnyButWordChar pre: An instance of either one of the two classes.
140 |     '''
141 | 
142 |     def __init__(self, pre):
143 |         '''
144 |         This exception is thrown whenever one tries to subtract from an instance of \
145 |         either one of ``AnyWordChar`` or ``AnyButWordChar`` classes, for which parameter \
146 |         "is_global" has been set to ``True``.
147 | 
148 |         :param AnyWordChar | AnyButWordChar pre: An instance of either one of the two classes.
149 |         '''
150 |         m = f"Cannot subtract from an instance of class \"{type(pre).__name__}\"" + \
151 |              " for which parameter \"is_global\" has been set to \"True\"."
152 |         super().__init__(m)
153 | 
154 | 
155 | class EmptyClassException(Exception):
156 |     '''
157 |     This exception is thrown whenever one tries to subtract a class (or negated class) \
158 |     from a class (or negated class) which results in an empty class.
159 | 
160 |     :param Pregex pre1: The ``Pregex`` instance because of which this exception was thrown.
161 |     :param Pregex pre2: The ``Pregex`` instance because of which this exception was thrown.
162 |     '''
163 | 
164 |     def __init__(self, pre1, pre2):
165 |         '''
166 |         This exception is thrown whenever one tries to subtract a class (or negated class) \
167 |         from a class (or negated class) which results in an empty class.
168 | 
169 |         :param Pregex pre1: The ``Pregex`` instance because of which this exception was thrown.
170 |         :param Pregex pre2: The ``Pregex`` instance because of which this exception was thrown.
171 |         '''
172 |         m = f"Cannot subtract class \"{pre2}\" from class \"{pre1}\"" \
173 |             " as this results into an empty class."
174 |         super().__init__(m)
175 | 
176 | 
177 | class InvalidRangeException(Exception):
178 |     '''
179 |     This exception is thrown whenever there was provided a pair \
180 |     of values ``start`` and ``end``, where ``start`` comes after ``end``.
181 | 
182 |     :param int start: The integer because of which this exception was thrown.
183 |     :param int end: The integer because of which this exception was thrown.
184 |     '''
185 | 
186 |     def __init__(self, start: int, end: int):
187 |         '''
188 |         This exception is thrown whenever there was provided a pair \
189 |         of values ``start`` and ``end``, where ``start`` comes after ``end``.
190 | 
191 |         :param int start: The integer because of which this exception was thrown.
192 |         :param int end: The integer because of which this exception was thrown.
193 |         '''
194 |         super().__init__(f"\"[{start}-{end}]\" is not a valid range.")
195 | 
196 | 
197 | class CannotBeRepeatedException(Exception):
198 |     '''
199 |     This exception is thrown whenever an instance of a class \
200 |     that is part of the ``assertions`` module is being quantified.
201 | 
202 |     :param __Assertion pre: The ``__Assertion`` instance because of which this exception was thrown.
203 |     '''
204 | 
205 |     def __init__(self, pre):
206 |         '''
207 |         This exception is thrown whenever there is an attempt to \
208 |         repeat a non-repeatable pattern.
209 | 
210 |         :param __Assertion pre: The ``__Assertion`` instance because of which this exception was thrown.
211 |         '''
212 |         m = f"Pattern \"{pre.get_pattern()}\" is non-repeatable."
213 |         super().__init__(m)
214 | 
215 | 
216 | class NonFixedWidthPatternException(Exception):
217 |     '''
218 |     This exception is thrown whenever a non-fixed-width pattern is being
219 |     provided as lookbehind-pattern to either ``PrecededBy`` or ``NotPrecededBy``.
220 | 
221 |     :param __Lookaround lookbehind: The ``__Lookaround`` instance because of which this exception was thrown.
222 |     :param Pregex pre: The ``Pregex`` instance because of which this exception was thrown.
223 |     '''
224 | 
225 |     def __init__(self, lookbehind):
226 |         '''
227 |         This exception is thrown whenever a non-fixed-width pattern is being
228 |         provided as lookbehind-pattern to either ``PrecededBy`` or ``NotPrecededBy``.
229 | 
230 |         :param __Lookaround lookbehind: The ``__Lookaround`` instance because of which this exception was thrown.
231 |         '''
232 |         m = f"Pattern '{lookbehind.get_pattern()}' cannot be used as a lookbehind"
233 |         m += f" assertion pattern due to its variable length."
234 |         super().__init__(m)
235 | 
236 | 
237 | class EmptyNegativeAssertionException(Exception):
238 |     '''
239 |     This exception is thrown whenever the ``Empty`` pattern is provided
240 |     as a negative assertion.
241 |     '''
242 | 
243 |     def __init__(self):
244 |         '''
245 |         This exception is thrown whenever the ``Empty`` pattern is provided
246 |         as a negative assertion.
247 |         '''
248 |         message = "The empty string can't be provided as a negative lookaround assertion pattern."
249 |         super().__init__(message)


--------------------------------------------------------------------------------
/src/pregex/core/groups.py:
--------------------------------------------------------------------------------
  1 | __doc__ = """
  2 | This module contains all necessary classes that are used to construct both
  3 | capturing and non-capturing groups, as well as any other classes which relate
  4 | to concepts that are based on groups, such as backreferences and conditionals.
  5 | 
  6 | Pattern grouping
  7 | -------------------------------------------
  8 | In general, one should not have to concern themselves with pattern grouping,
  9 | as patterns are automatically wrapped within non-capturing groups whenever this is
 10 | deemed necessary. Consider for instance the following code snippet:
 11 | 
 12 | .. code-block:: python
 13 | 
 14 |    from pregex.core.quantifiers import Optional
 15 | 
 16 |    Optional('a').print_pattern() # This prints "a?"
 17 |    Optional('aa').print_pattern() # This prints "(?:aa)?"
 18 | 
 19 | In the first case, quantifier :class:`~pregex.core.quantifiers.Optional` is applied to
 20 | the pattern directly, whereas in the second case the pattern is placed into a non-capturing
 21 | group so that "aa" is quantified as a whole. Be that as it may, there exists a separate class,
 22 | namely :class:`Group`, through which one is able to explicitly wrap any pattern within
 23 | a non-capturing group if they wish to do so:
 24 | 
 25 | .. code-block:: python
 26 | 
 27 |    from pregex.core.groups import Group
 28 |    from pregex.core.quantifiers import Optional
 29 | 
 30 |    pre = Group(Optional('a'))
 31 | 
 32 |    pre.print_pattern() # This prints "(?:a)?"
 33 | 
 34 | This class can also be used so as to apply various RegEx flags, also known \
 35 | as *modifiers*, to a pattern. As of yet, the only flag that is supported is
 36 | the case-insensitive flag ``i``:
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |    from pregex.core.groups import Group
 41 | 
 42 |    pre = Group('pregex', is_case_insensitive=True)
 43 | 
 44 |    # This statement is "True"
 45 |    pre.is_exact_match('PRegEx')
 46 | 
 47 | 
 48 | Capturing patterns
 49 | -------------------------------------------
 50 | 
 51 | You'll find however that :class:`Capture` is probably the most important class
 52 | of this module, as it is used to create a capturing group out of a pattern,
 53 | so that said pattern is captured separately whenever a match occurs.
 54 | 
 55 | .. code-block:: python
 56 | 
 57 |    from pregex.core.groups import Capture
 58 |    from pregex.core.classes import AnyLetter
 59 | 
 60 |    pre = AnyLetter() + Capture(2 * AnyLetter())
 61 | 
 62 |    text = "abc def"
 63 |    print(pre.get_matches(text)) # This prints "['abc', 'def']"
 64 |    print(pre.get_captures(text)) # This prints "[('bc'), ('ef')]"
 65 | 
 66 | As you can see, capturing is a very useful tool for whenever you are
 67 | interested in isolating some specific part of a pattern.
 68 | 
 69 | Classes & methods
 70 | -------------------------------------------
 71 | 
 72 | Below are listed all classes within :py:mod:`pregex.core.groups`
 73 | along with any possible methods they may possess.
 74 | """
 75 | 
 76 | 
 77 | import re as _re
 78 | import pregex.core.pre as _pre
 79 | import pregex.core.exceptions as _ex
 80 | from typing import Union as _Union
 81 | from typing import Optional as _Optional
 82 | 
 83 | 
 84 | class __Group(_pre.Pregex):
 85 |     '''
 86 |     Constitutes the base class for all classes that are part of this module.
 87 | 
 88 |     :param Pregex | str pre: A Pregex instance or string representing the pattern \
 89 |         that is to be groupped.
 90 |     :param (Pregex => str) transform: A `transform` function for the provided pattern.
 91 | 
 92 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
 93 |         ``Pregex`` instance nor a string.
 94 |     '''
 95 |     def __init__(self, pre: _Union[_pre.Pregex, str], transform) -> _pre.Pregex:
 96 |         '''
 97 |         Constitutes the base class for all classes that are part of this module.
 98 | 
 99 |         :param Pregex | str pre: A Pregex instance or string representing the pattern \
100 |             that is to be groupped.
101 |         :param (Pregex => str) transform: A `transform` function for the provided pattern.
102 | 
103 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
104 |             ``Pregex`` instance nor a string.
105 |         '''
106 |         pattern = transform(__class__._to_pregex(pre))
107 |         super().__init__(str(pattern), escape=False)
108 | 
109 | 
110 | class Capture(__Group):
111 |     '''
112 |     Creates a capturing group out of the provided pattern.
113 | 
114 |     :param Pregex | str pre: The pattern out of which the capturing group is created.
115 |     :param str name: The name that is assigned to the captured group \
116 |         for backreference purposes. A value of ``None`` indicates that no name \
117 |         is to be assigned to the group. Defaults to ``None``.
118 | 
119 |     :raises InvalidArgumentTypeException:
120 |         - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
121 |         - Parameter ``name`` is neither a string nor ``None``.
122 |     :raises InvalidCapturingGroupNameException: Parameter ``name`` is not a valid \
123 |         capturing group name. Such name must contain word characters only and start \
124 |         with a non-digit character.
125 | 
126 |     :note:
127 |         - Creating a capturing group out of a capturing group does nothing to it.
128 |         - Creating a capturing group out of a non-capturing group converts it \
129 |             into a capturing group, except if any flags have been applied to it, \
130 |             in which case, the non-capturing group is wrapped within a capturing \
131 |             group as a whole.
132 |         - Creating a named capturing group out of an unnamed capturing group, \
133 |           assigns a name to it.
134 |         - Creating a named capturing group out of a named capturing group, \
135 |           changes the group's name.
136 |     '''
137 | 
138 |     def __init__(self, pre: _Union[_pre.Pregex, str], name: _Optional[str] = None):
139 |         '''
140 |         Creates a capturing group out of the provided pattern.
141 | 
142 |         :param Pregex | str pre: The pattern that is to be wrapped \
143 |             within a capturing group.
144 |         :param str name: The name that is assigned to the captured group \
145 |             for backreference purposes. A value of ``None`` indicates that no name \
146 |             is to be assigned to the group. Defaults to ``None``.
147 | 
148 |         :raises InvalidArgumentTypeException:
149 |             - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
150 |             - Parameter ``name`` is neither a string nor ``None``.
151 |         :raises InvalidCapturingGroupNameException: Parameter ``name`` is not a valid \
152 |             capturing group name. Such name must contain word characters only and start \
153 |             with a non-digit character.
154 | 
155 |         :note:
156 |             - Creating a capturing group out of a capturing group does nothing to.
157 |             - Creating a capturing group out of a non-capturing group converts it \
158 |               into a capturing group, except if any flags have been applied to it, \
159 |               in which case, the non-capturing group is wrapped within a capturing \
160 |               group as a whole.
161 |             - Creating a named capturing group out of an unnamed capturing group, \
162 |               assigns a name to it.
163 |             - Creating a named capturing group out of a named capturing group, \
164 |               changes the group's name.
165 |         '''
166 |         super().__init__(pre, lambda pre: pre.capture(name))
167 | 
168 | 
169 | class Group(__Group):
170 |     '''
171 |     Creates a non-capturing group out of the provided pattern.
172 | 
173 |     :param Pregex | str pre: The pattern that is to be wrapped \
174 |         within a non-capturing group.
175 |     :param bool is_case_insensitive: If ``True``, then the "case insensitive" \
176 |         flag is applied to the group so that the pattern within it ignores case \
177 |         when it comes to matching. Defaults to ``False``.
178 | 
179 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither \
180 |         a ``Pregex`` instance nor a string.
181 | 
182 |     :note:
183 |         - Creating a non-capturing group out of a non-capturing group does nothing, \
184 |           except for remove its flags if it has any.
185 |         - Creating a non-capturing group out of a capturing group converts it into \
186 |           a non-capturing group.
187 |     '''
188 | 
189 |     def __init__(self, pre: _Union[_pre.Pregex, str], is_case_insensitive: bool = False):
190 |         '''
191 |         Creates a non-capturing group out of the provided pattern.
192 | 
193 |         :param Pregex | str pre: The pattern that is to be wrapped \
194 |             within a non-capturing group.
195 |         :param bool is_case_insensitive: If ``True``, then the "case insensitive" \
196 |             flag is applied to the group so that the pattern within it ignores case \
197 |             when it comes to matching. Defaults to ``False``.
198 | 
199 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither \
200 |             a ``Pregex`` instance nor a string.
201 | 
202 |         :note:
203 |             - Creating a non-capturing group out of a non-capturing group does nothing, \
204 |               except for remove its flags if it has any.
205 |             - Creating a non-capturing group out of a capturing group converts it into \
206 |             a non-capturing group.
207 |         '''
208 |         super().__init__(pre, lambda pre: pre.group(is_case_insensitive))
209 | 
210 | 
211 | class Backreference(__Group):
212 |     '''
213 |     Creates a backreference to some previously declared capturing group.
214 | 
215 |     :param int | str ref: A reference to some previously declared capturing group. \
216 |         This parameter can either be an integer, in which case the capturing group \
217 |         is referenced by order, or a string, in which case the capturing group is \
218 |         referenced by name.
219 | 
220 |     :raises InvalidArgumentTypeException: Parameter ``ref`` is neither an integer \
221 |         nor a string.
222 |     :raises InvalidArgumentValueException: Parameter ``ref`` is an integer but \
223 |         has a value of either less than ``1`` or greater than ``10``.
224 |     :raises InvalidCapturingGroupNameException: Parameter ``ref`` is a string but \
225 |         not a valid capturing group name. Such name must contain word characters \
226 |         only and start with a non-digit character.
227 |     '''
228 | 
229 |     def __init__(self, ref: _Union[int, str]):
230 |         '''
231 |         Creates a backreference to some previously declared capturing group.
232 | 
233 |         :param int | str ref: A reference to some previously declared capturing group. \
234 |             This parameter can either be an integer, in which case the capturing group \
235 |             is referenced by order, or a string, in which case the capturing group is \
236 |             referenced by name.
237 | 
238 |         :raises InvalidArgumentTypeException: Parameter ``ref`` is neither an integer \
239 |             nor a string.
240 |         :raises InvalidArgumentValueException: Parameter ``ref`` is an integer but \
241 |             has a value of either less than ``1`` or greater than ``10``.
242 |         :raises InvalidCapturingGroupNameException: Parameter ``ref`` is a string but \
243 |             not a valid capturing group name. Such name must contain word characters \
244 |             only and start with a non-digit character.
245 |         '''
246 |         if isinstance(ref, int):
247 |             if isinstance(ref, bool):
248 |                 message = "Parameter \"ref\" is neither an integer nor a string."
249 |                 raise _ex.InvalidArgumentTypeException(message)
250 |             if ref < 1 or ref > 99:
251 |                 message = "Parameter \"ref\" cannot be less than 1 or greater than 99."
252 |                 raise _ex.InvalidArgumentValueException(message)
253 |             transform = lambda s : f"\\{s}"
254 |         elif isinstance(ref, str):
255 |             if _re.fullmatch("[A-Za-z_][A-Za-z_0-9]*", ref) is None:
256 |                 raise _ex.InvalidCapturingGroupNameException(ref)
257 |             transform = lambda s : f"(?P={s})"
258 |         else:
259 |             message = "Parameter \"ref\" is neither an integer nor a string."
260 |             raise _ex.InvalidArgumentTypeException(message)
261 |         super().__init__(str(ref), transform)
262 | 
263 |     
264 | class Conditional(__Group):
265 |     '''
266 |     Given the name of a capturing group, matches ``pre1`` only if said capturing group has \
267 |     been previously matched. Furthermore, if a second pattern ``pre2`` is provided, then \
268 |     this pattern is matched in case the referenced capturing group was not, though one \
269 |     should be aware that for this to be possible, the referenced capturing group must \
270 |     be optional.
271 | 
272 |     :param str name: The name of the referenced capturing group.
273 |     :param Pregex | str pre1: The pattern that is to be matched in case condition is true.
274 |     :param Pregex | str pre2: The pattern that is to be matched in case condition \
275 |         is false. Defaults to ``None``.
276 | 
277 |     :raises InvalidArgumentTypeException:
278 |         - Parameter ``name`` is not a string.
279 |         - Parameter ``pre1`` is neither a ``Pregex`` instance nor a string.
280 |         - Parameter ``pre2`` is neither a ``Pregex`` instance nor a string nor ``None``.
281 |     :raises InvalidCapturingGroupNameException: Parameter ``name`` is not a valid \
282 |         capturing group name. Such name must contain word characters only and start \
283 |         with a non-digit character.
284 |     '''
285 | 
286 |     def __init__(self, name: str, pre1: _Union[_pre.Pregex, str], pre2: _Optional[_Union[_pre.Pregex, str]] = None):
287 |         '''
288 |         Given the name of a capturing group, matches ``pre1`` only if said capturing group has \
289 |         been previously matched. Furthermore, if a second pattern ``pre2`` is provided, then \
290 |         this pattern is matched in case the referenced capturing group was not, though one \
291 |         should be aware that for this to be possible, the referenced capturing group must \
292 |         be optional.
293 | 
294 |         :param str name: The name of the referenced capturing group.
295 |         :param Pregex | str pre1: The pattern that is to be matched in case condition is true.
296 |         :param Pregex | str pre2: The pattern that is to be matched in case condition \
297 |             is false. Defaults to ``None``.
298 | 
299 |         :raises InvalidArgumentTypeException:
300 |             - Parameter ``name`` is not a string.
301 |             - Parameter ``pre1`` is neither a ``Pregex`` instance nor a string.
302 |             - Parameter ``pre2`` is neither a ``Pregex`` instance nor a string nor ``None``.
303 |         :raises InvalidCapturingGroupNameException: Parameter ``name`` is not a valid \
304 |             capturing group name. Such name must contain word characters only and start \
305 |             with a non-digit character.
306 |         '''
307 |         if not isinstance(name, str):
308 |             message = "Provided argument \"name\" is not a string."
309 |             raise _ex.InvalidArgumentTypeException(message)
310 |         if _re.fullmatch("[A-Za-z_][\w]*", name) is None:
311 |             raise _ex.InvalidCapturingGroupNameException(name)
312 |         super().__init__(name, lambda s: f"(?({s}){pre1}{'|' + str(pre2) if pre2 != None else ''})")


--------------------------------------------------------------------------------
/src/pregex/core/operators.py:
--------------------------------------------------------------------------------
  1 | __doc__ = """
  2 | This module contains various classes representing operators \
  3 | that are typically applied between two or more patterns.
  4 | 
  5 | Classes & methods
  6 | -------------------------------------------
  7 | 
  8 | Below are listed all classes within :py:mod:`pregex.core.operators`
  9 | along with any possible methods they may possess.
 10 | """
 11 | 
 12 | 
 13 | import pregex.core.pre as _pre
 14 | import pregex.core.exceptions as _ex
 15 | from typing import Union as _Union
 16 | 
 17 | 
 18 | class __Operator(_pre.Pregex):
 19 |     '''
 20 |     Constitutes the base class for all classes that are part of this module.
 21 | 
 22 |     :param tuple[Pregex | str] pres: A tuple of strings or Pregex instances representing \
 23 |         the patterns to which the operator is to be applied.
 24 |     :param (tuple[Pregex | str] => str) transform: A `transform` function for the provided pattern.
 25 | 
 26 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
 27 |         through ``pres`` is neither a ``Pregex`` instance nor a string.
 28 | 
 29 |     :note: If no arguments are provided, then the resulting ``Pregex`` instance \
 30 |         corresponds to the "empty string" pattern, whereas if a single argument is \
 31 |         provided, it is simply returned wrapped within a ``Pregex`` instance.
 32 |     '''
 33 |     def __init__(self, pres: tuple[_Union[_pre.Pregex, str]], transform) -> _pre.Pregex:
 34 |         '''
 35 |         Constitutes the base class for all classes that are part of this module.
 36 | 
 37 |         :param tuple[Pregex | str] pres: A tuple of strings or Pregex instances representing \
 38 |             the patterns to which the operator is to be applied.
 39 |         :param (tuple[Pregex | str] => str) transform: A `transform` function for the provided pattern.
 40 | 
 41 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
 42 |             through ``pres`` is neither a ``Pregex`` instance nor a string.
 43 | 
 44 |         :note: If no arguments are provided, then the resulting ``Pregex`` instance \
 45 |             corresponds to the "empty string" pattern, whereas if a single argument is \
 46 |             provided, it is simply returned wrapped within a ``Pregex`` instance.
 47 |         '''
 48 |         if len(pres) == 0:
 49 |             result = ''
 50 |         else:
 51 |             result = __class__._to_pregex(pres[0])
 52 |             if len(pres) > 1:
 53 |                 for pre in pres[1:]:
 54 |                     result = transform(result, pre)
 55 |         super().__init__(str(result), escape=False)
 56 | 
 57 | 
 58 | class Concat(__Operator):
 59 |     '''
 60 |     Matches the concatenation of the provided patterns.
 61 | 
 62 |     :param Pregex | str \*pres: Two or more patterns that are to be concatenated.
 63 | 
 64 |     :raises NotEnoughArgumentsException: Less than two arguments are provided.
 65 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
 66 |         is neither a ``Pregex`` instance nor a string.
 67 | 
 68 |     :note: If no arguments are provided, then the resulting ``Pregex`` instance \
 69 |         corresponds to the "empty string" pattern, whereas if a single argument is \
 70 |         provided, it is simply returned wrapped within a ``Pregex`` instance.
 71 |     '''
 72 | 
 73 |     def __init__(self, *pres: _Union[_pre.Pregex, str]) -> _pre.Pregex:
 74 |         '''
 75 |         Matches the concatenation of the provided patterns.
 76 | 
 77 |         :param Pregex | str \*pres: Two or more patterns that are to be concatenated.
 78 | 
 79 |         :raises NotEnoughArgumentsException: Less than two arguments are provided.
 80 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
 81 |             is neither a ``Pregex`` instance nor a string.
 82 | 
 83 |         :note: If no arguments are provided, then the resulting ``Pregex`` instance \
 84 |             corresponds to the "empty string" pattern, whereas if a single argument is \
 85 |             provided, it is simply returned wrapped within a ``Pregex`` instance.
 86 |         '''
 87 |         super().__init__(pres, lambda pre1, pre2: pre1.concat(pre2))
 88 | 
 89 | 
 90 | class Either(__Operator):
 91 |     '''
 92 |     Matches either one of the provided patterns.
 93 | 
 94 |     :param Pregex | str \*pres: Two or more patterns that constitute the \
 95 |         operator's alternatives.
 96 | 
 97 |     :raises NotEnoughArgumentsException: Less than two arguments are provided.
 98 |     :raises InvalidArgumentTypeException: At least one of the provided arguments \
 99 |         is neither a ``Pregex`` instance nor a string.
100 | 
101 |     :note:
102 |         - If no arguments are provided, then the resulting ``Pregex`` instance \
103 |           corresponds to the "empty string" pattern, whereas if a single argument is \
104 |           provided, it is simply returned wrapped within a ``Pregex`` instance.
105 |         - One should be aware that ``Either`` is eager, meaning that the regex engine will \
106 |           stop the moment it matches either one of the alternatives, starting from \
107 |           the left-most pattern and continuing on to the right until a match occurs.
108 |     '''
109 |     
110 |     def __init__(self, *pres: _Union[_pre.Pregex, str]):
111 |         '''
112 |         Matches either one of the provided patterns.
113 | 
114 |         :param Pregex | str \*pres: Two or more patterns that constitute the \
115 |             operator's alternatives.
116 | 
117 |         :raises NotEnoughArgumentsException: Less than two arguments are provided.
118 |         :raises InvalidArgumentTypeException: At least one of the provided arguments \
119 |             is neither a ``Pregex`` instance nor a string.
120 | 
121 |         :note:
122 |           - If no arguments are provided, then the resulting ``Pregex`` instance \
123 |             corresponds to the "empty string" pattern, whereas if a single argument is \
124 |             provided, it is simply returned wrapped within a ``Pregex`` instance.
125 |           - One should be aware that ``Either`` is eager, meaning that the regex engine will \
126 |             stop the moment it matches either one of the alternatives, starting from \
127 |             the left-most pattern and continuing on to the right until a match occurs.
128 |         '''
129 |         super().__init__(pres, lambda pre1, pre2: pre1.either(pre2))
130 | 
131 | 
132 | class Enclose(__Operator):
133 |     '''
134 |     Matches the pattern that results from concatenating the ``enclosing`` \
135 |     pattern(s) to both sides of pattern ``pre``.
136 | 
137 |     :param Pregex | str pre: The pattern that is to be at the center \
138 |         of the concatenation.
139 |     :param Pregex | str enclosing: One or more patterns that are to *enclose* \
140 |         pattern ``pre`` one by one.
141 | 
142 |     :raises NotEnoughArgumentsException: Less than two arguments are provided.
143 |     :raises InvalidArgumentTypeException: Either ``pre`` or at least one of the \
144 |         ``enclosing`` patterns is neither a ``Pregex`` instance nor a string.
145 |     '''
146 | 
147 |     def __init__(self, pre: _Union[_pre.Pregex, str], *enclosing:_Union[_pre.Pregex, str]) -> _pre.Pregex:
148 |         '''
149 |         Matches the pattern that results from concatenating the ``enclosing`` \
150 |         pattern(s) to both sides of pattern ``pre``.
151 | 
152 |         :param Pregex | str pre: The pattern that is to be at the center \
153 |             of the concatenation.
154 |         :param Pregex | str enclosing: One or more patterns that are to *enclose* \
155 |             pattern ``pre`` one by one.
156 | 
157 |         :raises NotEnoughArgumentsException: Less than two arguments are provided.
158 |         :raises InvalidArgumentTypeException: Either ``pre`` or at least one of the \
159 |             ``enclosing`` patterns is neither a ``Pregex`` instance nor a string.
160 |         '''
161 |         super().__init__((pre, *enclosing), lambda pre1, pre2: pre1.enclose(pre2))


--------------------------------------------------------------------------------
/src/pregex/core/quantifiers.py:
--------------------------------------------------------------------------------
  1 | __doc__ = """
  2 | Every class within this module is used to declare that a pattern is to be
  3 | matched a number of times, with each class representing a slightly different
  4 | pattern-repetition rule.
  5 | 
  6 | Classes & methods
  7 | -------------------------------------------
  8 | 
  9 | Below are listed all classes within :py:mod:`pregex.core.quantifiers`
 10 | along with any possible methods they may possess.
 11 | """
 12 | 
 13 | 
 14 | import pregex.core.pre as _pre
 15 | from typing import Union as _Union
 16 | from typing import Optional as _Optional
 17 | 
 18 | 
 19 | class __Quantifier(_pre.Pregex):
 20 |     '''
 21 |     Constitutes the base class for all classes that are part of this module.
 22 | 
 23 |     :param Pregex | str pre: A Pregex instance or string representing the pattern \
 24 |         that is to be quantified.
 25 |     :param (Pregex => str) transform: A `transform` function for the provided pattern.
 26 | 
 27 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
 28 |         ``Pregex`` instance nor a string.
 29 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
 30 |         pattern. Whether this exception is thrown also depends on certain parameter values.
 31 | 
 32 |     '''
 33 |     def __init__(self, pre: _Union[_pre.Pregex, str], is_greedy: bool, transform) -> '__Quantifier':
 34 |         '''
 35 |         Constitutes the base class for all classes that are part of this module.
 36 | 
 37 |         :param Pregex | str pre: A Pregex instance or string representing the pattern \
 38 |             that is to be quantified.
 39 |         :param (Pregex => str) transform: A `transform` function for the provided pattern.
 40 | 
 41 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
 42 |             ``Pregex`` instance nor a string.
 43 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
 44 |             pattern. Whether this exception is thrown also depends on certain parameter values.
 45 |         '''
 46 |         pattern = transform(__class__._to_pregex(pre), is_greedy)
 47 |         super().__init__(str(pattern), escape=False)
 48 | 
 49 | 
 50 | class Optional(__Quantifier):
 51 |     '''
 52 |     Matches the provided pattern once or not at all.
 53 | 
 54 |     :param Pregex | str pre: The pattern that is to be quantified.
 55 |     :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
 56 |         When declared as such, the regex engine will try to match \
 57 |         the expression as many times as possible. Defaults to ``True``.
 58 | 
 59 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
 60 |         ``Pregex`` instance nor a string.
 61 |     '''
 62 | 
 63 |     def __init__(self, pre: _Union[_pre.Pregex, str], is_greedy: bool = True) -> _pre.Pregex:
 64 |         '''
 65 |         Matches the provided pattern once or not at all.
 66 | 
 67 |         :param Pregex | str pre: The pattern that is to be quantified.
 68 |         :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
 69 |             When declared as such, the regex engine will try to match \
 70 |             the expression as many times as possible. Defaults to ``True``.
 71 | 
 72 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
 73 |             ``Pregex`` instance nor a string.
 74 |         '''
 75 |         super().__init__(pre, is_greedy, lambda pre, is_greedy: pre.optional(is_greedy))
 76 | 
 77 | 
 78 | class Indefinite(__Quantifier):
 79 |     '''
 80 |     Matches the provided pattern zero or more times.
 81 | 
 82 |     :param Pregex | str pre: The pattern that is to be quantified.
 83 |     :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
 84 |         When declared as such, the regex engine will try to match \
 85 |         the expression as many times as possible. Defaults to ``True``.
 86 | 
 87 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
 88 |         ``Pregex`` instance nor a string.
 89 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable pattern.
 90 |     '''
 91 | 
 92 |     def __init__(self, pre: _Union[_pre.Pregex, str], is_greedy: bool = True) -> _pre.Pregex:
 93 |         '''
 94 |         Matches the provided pattern zero or more times.
 95 | 
 96 |         :param Pregex | str pre: The pattern that is to be quantified.
 97 |         :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
 98 |             When declared as such, the regex engine will try to match \
 99 |             the expression as many times as possible. Defaults to ``True``.
100 | 
101 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable pattern.
102 |         '''
103 |         super().__init__(pre, is_greedy, lambda pre, is_greedy: pre.indefinite(is_greedy))
104 | 
105 | 
106 | class OneOrMore(__Quantifier):
107 |     '''
108 |     Matches the provided pattern one or more times.
109 | 
110 |     :param Pregex | str pre: The pattern that is to be quantified.
111 |     :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
112 |         When declared as such, the regex engine will try to match \
113 |         the expression as many times as possible. Defaults to ``True``.
114 | 
115 |     :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
116 |         ``Pregex`` instance nor a string.
117 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable pattern.
118 |     '''
119 | 
120 |     def __init__(self, pre: _Union[_pre.Pregex, str], is_greedy: bool = True) -> _pre.Pregex:
121 |         '''
122 |         Matches the provided pattern one or more times.
123 | 
124 |         :param Pregex | str pre: The pattern that is to be quantified.
125 |         :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
126 |             When declared as such, the regex engine will try to match \
127 |             the expression as many times as possible. Defaults to ``True``.
128 | 
129 |         :raises InvalidArgumentTypeException: Parameter ``pre`` is neither a \
130 |             ``Pregex`` instance nor a string.
131 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable pattern.
132 |         '''
133 |         super().__init__(pre, is_greedy, lambda pre, is_greedy: pre.one_or_more(is_greedy))
134 | 
135 | 
136 | class Exactly(__Quantifier):
137 |     '''
138 |     Matches the provided pattern an exact number of times.
139 | 
140 |     :param Pregex | str pre: The pattern that is to be quantified.
141 |     :param int n: The exact number of times that the provided pattern is to be matched.
142 | 
143 |     :raises InvalidArgumentTypeException: 
144 |         - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
145 |         - Parameter ``n`` is not an integer.
146 |     :raises InvalidArgumentValueException: Parameter ``n`` has a value of less than zero.
147 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
148 |         pattern while parameter ``n`` has been set to a value of greater than ``1``.
149 |     '''
150 | 
151 |     def __init__(self, pre: _Union[_pre.Pregex, str], n: int) -> _pre.Pregex:
152 |         '''
153 |         Matches the provided pattern an exact number of times.
154 | 
155 |         :param Pregex | str pre: The pattern that is to be quantified.
156 |         :param int n: The exact number of times that the provided pattern is to be matched.
157 | 
158 |         :raises InvalidArgumentTypeException: 
159 |             - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
160 |             - Parameter ``n`` is not an integer.
161 |         :raises InvalidArgumentValueException: Parameter ``n`` has a value of less than zero.
162 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
163 |             pattern while parameter ``n`` has been set to a value of greater than ``1``.
164 |         '''
165 |         super().__init__(pre, False, lambda pre, _: pre.exactly(n))
166 | 
167 | 
168 | class AtLeast(__Quantifier):
169 |     '''
170 |     Matches the provided pattern a minimum number of times.
171 | 
172 |     :param Pregex | str pre: The pattern that is to be quantified.
173 |     :param int n: The minimum number of times that the provided pattern is to be matched.
174 |     :param bool is_greedy: Determines whether to declare this quantifier as greedy. \
175 |         When declared as such, the regex engine will try to match \
176 |         the expression as many times as possible. Defaults to ``True``.
177 | 
178 |     :raises InvalidArgumentTypeException: 
179 |         - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
180 |         - Parameter ``n`` is not an integer.
181 |     :raises InvalidArgumentValueException: Parameter ``n`` has a value of less than zero.
182 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable pattern.
183 |     '''
184 | 
185 |     def __init__(self, pre: _Union[_pre.Pregex, str], n: int, is_greedy: bool = True) -> _pre.Pregex:
186 |         '''
187 |         Matches the provided pattern a minimum number of times.
188 | 
189 |         :param Pregex | str pre: The pattern that is to be quantified.
190 |         :param int n: The minimum number of times that the provided pattern is to be matched.
191 |         :param bool is_greedy: Determines whether to declare this quantifier as greedy. \
192 |             When declared as such, the regex engine will try to match \
193 |             the expression as many times as possible. Defaults to ``True``.
194 | 
195 |         :raises InvalidArgumentTypeException: 
196 |             - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
197 |             - Parameter ``n`` is not an integer.
198 |         :raises InvalidArgumentValueException: Parameter ``n`` has a value of less than zero.
199 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable pattern.
200 |         '''
201 |         super().__init__(pre, is_greedy, lambda pre, is_greedy: pre.at_least(n, is_greedy))
202 | 
203 | 
204 | class AtMost(__Quantifier):
205 |     '''
206 |     Matches the provided pattern up to a maximum number of times.
207 | 
208 |     :param Pregex | str pre: The pattern that is to be quantified.
209 |     :param int n: The maximum number of times that the provided pattern is to be matched.
210 |     :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
211 |         When declared as such, the regex engine will try to match \
212 |         the expression as many times as possible. Defaults to ``True``.
213 | 
214 |     :raises InvalidArgumentTypeException: 
215 |         - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
216 |         - Parameter ``n`` is neither an integer nor ``None``.
217 |     :raises InvalidArgumentValueException: Parameter ``n`` has a value of less than zero.
218 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
219 |         pattern while parameter ``n`` has been set to a value of greater than ``1``.
220 | 
221 |     :note: Setting ``n`` equal to ``None`` indicates that there is no upper limit to the number of \
222 |         times the pattern is to be repeated.
223 |     '''
224 | 
225 |     def __init__(self, pre: _Union[_pre.Pregex, str], n: _Optional[int], is_greedy: bool = True) -> _pre.Pregex:
226 |         '''
227 |         Matches the provided pattern up to a maximum number of times.
228 | 
229 |         :param Pregex | str pre: The pattern that is to be quantified.
230 |         :param int n: The maximum number of times that the provided pattern is to be matched.
231 |         :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
232 |             When declared as such, the regex engine will try to match \
233 |             the expression as many times as possible. Defaults to ``True``.
234 | 
235 |         :raises InvalidArgumentTypeException: 
236 |             - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
237 |             - Parameter ``n`` is neither an integer nor ``None``.
238 |         :raises InvalidArgumentValueException: Parameter ``n`` has a value of less than zero.
239 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
240 |         pattern while parameter ``n`` has been set to a value of greater than ``1``.
241 | 
242 |         :note: Setting ``n`` equal to ``None`` indicates that there is no upper limit to the number of \
243 |             times the pattern is to be repeated.
244 |         '''
245 |         super().__init__(pre, is_greedy, lambda pre, is_greedy: pre.at_most(n, is_greedy))
246 | 
247 | 
248 | class AtLeastAtMost(__Quantifier):
249 |     '''
250 |     Matches the provided expression between a minimum and a maximum number of times.
251 | 
252 |     :param Pregex | str pre: The pattern that is to be quantified.
253 |     :param int n: The minimum number of times that the provided pattern is to be matched.
254 |     :param int m: The maximum number of times that the provided pattern is to be matched.
255 |     :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
256 |         When declared as such, the regex engine will try to match \
257 |         the expression as many times as possible. Defaults to ``True``.
258 | 
259 |     :raises InvalidArgumentTypeException: 
260 |         - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
261 |         - Parameter ``n`` is not an integer.
262 |         - Parameter ``m`` is neither an integer nor ``None``.
263 |     :raises InvalidArgumentValueException:
264 |         - Either parameter ``n`` or ``m`` has a value of less than zero.
265 |         - Parameter ``n`` has a greater value than that of parameter ``m``.
266 |     :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
267 |         pattern while parameter ``m`` has been set to a value of greater than ``1``.
268 | 
269 |     :note: 
270 |         - Parameter ``is_greedy`` has no effect in the case that ``n`` equals ``m``.
271 |         - Setting ``m`` equal to ``None`` indicates that there is no upper limit to the \
272 |             number of times the pattern is to be repeated.
273 |     '''
274 | 
275 |     def __init__(self, pre: _Union[_pre.Pregex, str], n: int, m: _Optional[int], is_greedy: bool = True) -> _pre.Pregex:
276 |         '''
277 |         Matches the provided expression between a minimum and a maximum number of times.
278 | 
279 |         :param Pregex | str pre: The pattern that is to be quantified.
280 |         :param int n: The minimum number of times that the provided pattern is to be matched.
281 |         :param int m: The maximum number of times that the provided pattern is to be matched.
282 |         :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
283 |             When declared as such, the regex engine will try to match \
284 |             the expression as many times as possible. Defaults to ``True``.
285 | 
286 |         :raises InvalidArgumentTypeException: 
287 |             - Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
288 |             - Parameter ``n`` is not an integer.
289 |             - Parameter ``m`` is neither an integer nor ``None``.
290 |         :raises InvalidArgumentValueException:
291 |             - Either parameter ``n`` or ``m`` has a value of less than zero.
292 |             - Parameter ``n`` has a greater value than that of parameter ``m``.
293 |         :raises CannotBeRepeatedException: Parameter ``pre`` represents a non-repeatable \
294 |             pattern while parameter ``m`` has been set to a value of greater than ``1``.
295 | 
296 |         :note: 
297 |             - Parameter ``is_greedy`` has no effect in the case that ``n`` equals ``m``.
298 |             - Setting ``m`` equal to ``None`` indicates that there is no upper limit to the \
299 |                 number of times the pattern is to be repeated.
300 |         '''
301 |         super().__init__(pre, is_greedy, lambda pre, is_greedy: pre.at_least_at_most(n, m, is_greedy))


--------------------------------------------------------------------------------
/src/pregex/core/tokens.py:
--------------------------------------------------------------------------------
  1 | __doc__ = """
  2 | This module contains a number of classes that represent special characters.
  3 | Each token represents one and only one character. It is recommended that you
  4 | make use of these classes instead of providing their corresponding characters
  5 | as strings on your own in order to prevent any errors that relate to character
  6 | escaping from happening.
  7 | 
  8 | Classes & methods
  9 | -------------------------------------------
 10 | 
 11 | Below are listed all classes within :py:mod:`pregex.core.tokens`
 12 | along with any possible methods they may possess.
 13 | """
 14 | 
 15 | 
 16 | import pregex.core.pre as _pre
 17 | 
 18 | 
 19 | class __Token(_pre.Pregex):
 20 |     '''
 21 |     Constitutes the base class for all classes that are part of this module.
 22 | 
 23 |     :param str pattern: The pattern representing the token.
 24 |     '''
 25 | 
 26 |     def __init__(self, pattern: str) -> '__Token':
 27 |         '''
 28 |         Constitutes the base class for all classes that are part of this module.
 29 | 
 30 |         :param str pattern: The pattern representing the token.
 31 |         '''
 32 |         super().__init__(pattern, escape=False)
 33 | 
 34 | 
 35 | class Backslash(__Token):
 36 |     '''
 37 |     Matches a single backslash character.
 38 |     '''
 39 | 
 40 |     def __init__(self) -> 'Backslash':
 41 |         '''
 42 |         Matches a single backslash character.
 43 |         '''
 44 |         super().__init__(r"\\")
 45 | 
 46 | 
 47 | class Bullet(__Token):
 48 |     '''
 49 |     Matches the bullet symbol "•".
 50 |     '''
 51 | 
 52 |     def __init__(self) -> 'Bullet':
 53 |         '''
 54 |          Matches the bullet symbol "•".
 55 |         '''
 56 |         super().__init__("\u2022")  
 57 | 
 58 | 
 59 | class CarriageReturn(__Token):
 60 |     '''
 61 |     Matches a single carriage return character.
 62 |     '''
 63 | 
 64 |     def __init__(self) -> 'CarriageReturn':
 65 |         '''
 66 |         Matches a single carriage return character.
 67 |         '''
 68 |         super().__init__("\r")
 69 | 
 70 | 
 71 | class Copyright(__Token):
 72 |     '''
 73 |     Matches the copyright symbol "©".
 74 |     '''
 75 | 
 76 |     def __init__(self) -> 'Copyright':
 77 |         '''
 78 |          Matches the copyright symbol "©".
 79 |         '''
 80 |         super().__init__("\u00A9")
 81 | 
 82 | 
 83 | class Division(__Token):
 84 |     '''
 85 |     Matches the division sign "÷".
 86 |     '''
 87 | 
 88 |     def __init__(self) -> 'Division':
 89 |         '''
 90 |          Matches the division sign "÷".
 91 |         '''
 92 |         super().__init__("\u00f7")           
 93 | 
 94 | 
 95 | class Dollar(__Token):
 96 |     '''
 97 |     Matches the dollar sign "$".
 98 |     '''
 99 | 
100 |     def __init__(self) -> 'Dollar':
101 |         '''
102 |          Matches the dollar sign "$".
103 |         '''
104 |         super().__init__("\\\u0024")
105 | 
106 | 
107 | class Euro(__Token):
108 |     '''
109 |     Matches the euro sign "€".
110 |     '''
111 | 
112 |     def __init__(self) -> 'Euro':
113 |         '''
114 |          Matches the euro sign "€".
115 |         '''
116 |         super().__init__("\u20ac")
117 | 
118 | 
119 | class FormFeed(__Token):
120 |     '''
121 |     Matches a single form feed character.
122 |     '''
123 | 
124 |     def __init__(self) -> 'FormFeed':
125 |         '''
126 |         Matches a single form feed character.
127 |         '''
128 |         super().__init__("\f")
129 | 
130 | 
131 | class Infinity(__Token):
132 |     '''
133 |     Matches the infinity symbol "∞".
134 |     '''
135 | 
136 |     def __init__(self) -> 'Infinity':
137 |         '''
138 |          Matches the infinity symbol "∞".
139 |         '''
140 |         super().__init__("\u221e")  
141 | 
142 | 
143 | class Multiplication(__Token):
144 |     '''
145 |     Matches the multiplication sign "×".
146 |     '''
147 | 
148 |     def __init__(self) -> 'Multiplication':
149 |         '''
150 |          Matches the multiplication sign "×".
151 |         '''
152 |         super().__init__("\u00d7")        
153 | 
154 | 
155 | class Newline(__Token):
156 |     '''
157 |     Matches a single newline character.
158 |     '''
159 | 
160 |     def __init__(self) -> 'Newline':
161 |         '''
162 |          Matches a single newline character.
163 |         '''
164 |         super().__init__("\n")
165 | 
166 | 
167 | class Pound(__Token):
168 |     '''
169 |     Matches the English pound sign "£".
170 |     '''
171 | 
172 |     def __init__(self) -> 'Pound':
173 |         '''
174 |          Matches the English pound sign "£".
175 |         '''
176 |         super().__init__("\u00a3")
177 | 
178 | 
179 | class Registered(__Token):
180 |     '''
181 |     Matches the registered trademark symbol "®".
182 |     '''
183 | 
184 |     def __init__(self) -> 'Registered':
185 |         '''
186 |          Matches the registered trademark symbol "®".
187 |         '''
188 |         super().__init__("\u00ae")
189 | 
190 | 
191 | class Rupee(__Token):
192 |     '''
193 |     Matches the Indian rupee sign "₹".
194 |     '''
195 | 
196 |     def __init__(self) -> 'Yen':
197 |         '''
198 |          Matches the Indian rupee sign "₹".
199 |         '''
200 |         super().__init__("\u20b9") 
201 | 
202 | 
203 | class Space(__Token):
204 |     '''
205 |     Matches a single space character.
206 |     '''
207 | 
208 |     def __init__(self) -> 'Space':
209 |         '''
210 |          Matches a single space character.
211 |         '''
212 |         super().__init__(" ")
213 | 
214 | 
215 | class Tab(__Token):
216 |     '''
217 |     Matches a single tab character.
218 |     '''
219 | 
220 |     def __init__(self) -> 'Tab':
221 |         '''
222 |          Matches a single tab character.
223 |         '''
224 |         super().__init__("\t")
225 | 
226 | 
227 | class Trademark(__Token):
228 |     '''
229 |     Matches the unregistered trademark symbol "™".
230 |     '''
231 | 
232 |     def __init__(self) -> 'Trademark':
233 |         '''
234 |          Matches the unregistered trademark symbol "™".
235 |         '''
236 |         super().__init__("\u2122")
237 | 
238 | 
239 | class VerticalTab(__Token):
240 |     '''
241 |     Matches a single vertical tab character.
242 |     '''
243 | 
244 |     def __init__(self) -> 'VerticalTab':
245 |         '''
246 |          Matches a single vertical tab character.
247 |         '''
248 |         super().__init__("\v")
249 | 
250 | 
251 | class WhiteBullet(__Token):
252 |     '''
253 |     Matches the white bullet symbol "◦".
254 |     '''
255 | 
256 |     def __init__(self) -> 'WhiteBullet':
257 |         '''
258 |          Matches the white bullet symbol "◦".
259 |         '''
260 |         super().__init__("\u25e6") 
261 | 
262 | 
263 | class Yen(__Token):
264 |     '''
265 |     Matches the Japanese yen sign "¥".
266 |     '''
267 | 
268 |     def __init__(self) -> 'Yen':
269 |         '''
270 |          Matches the Japanese yen sign "¥".
271 |         '''
272 |         super().__init__("\u00a5")        


--------------------------------------------------------------------------------
/src/pregex/meta/__init__.py:
--------------------------------------------------------------------------------
1 | from pregex.meta.essentials import *


--------------------------------------------------------------------------------
/tests/test_core_assertions.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from pregex.core.assertions import *
  4 | from pregex.core.pre import Pregex, _Type
  5 | from pregex.core.quantifiers import Exactly, Optional
  6 | from pregex.core.exceptions import NonFixedWidthPatternException, \
  7 |     NotEnoughArgumentsException, EmptyNegativeAssertionException
  8 | 
  9 | 
 10 | TEST_STR = "test"
 11 | pre1 = Pregex("PRE1")
 12 | pre2 = Pregex("PRE2")
 13 | pre3 = Pregex("PRE3")
 14 | 
 15 | 
 16 | class TestMatchAtStart(unittest.TestCase):
 17 |     
 18 |     def test_match_at_start(self):
 19 |         self.assertEqual(str(MatchAtStart(TEST_STR)), f"\A{TEST_STR}")
 20 | 
 21 |     def test_match_at_start_on_type(self):
 22 |         self.assertEqual(MatchAtStart("a")._get_type(), _Type.Assertion)
 23 |         self.assertEqual(MatchAtStart("abc")._get_type(), _Type.Assertion)
 24 | 
 25 |     def test_match_at_start_on_quantifiability(self):
 26 |         self.assertEqual(MatchAtStart("a")._is_repeatable(), False)
 27 | 
 28 | 
 29 | class TestMatchAtEnd(unittest.TestCase):
 30 |     
 31 |     def test_match_at_end(self):
 32 |         self.assertEqual(str(MatchAtEnd(TEST_STR)), f"{TEST_STR}\Z")
 33 | 
 34 |     def test_match_at_end_on_type(self):
 35 |         self.assertEqual(MatchAtEnd("a")._get_type(), _Type.Assertion)
 36 | 
 37 |     def test_match_at_end_on_quantifiability(self):
 38 |         self.assertEqual(MatchAtEnd("a")._is_repeatable(), False)
 39 | 
 40 | 
 41 | class TestMatchAtLineStart(unittest.TestCase):
 42 |     
 43 |     def test_match_at_line_start(self):
 44 |         self.assertEqual(str(MatchAtLineStart(TEST_STR)), f"^{TEST_STR}")
 45 | 
 46 |     def test_match_at_line_start_on_type(self):
 47 |         self.assertEqual(MatchAtLineStart("a")._get_type(), _Type.Assertion)
 48 | 
 49 |     def test_match_at_line_start_on_quantifiability(self):
 50 |         self.assertEqual(MatchAtLineStart("a")._is_repeatable(), False)
 51 | 
 52 | 
 53 | class TestMatchAtLineEnd(unittest.TestCase):
 54 |     
 55 |     def test_match_at_line_end(self):
 56 |         self.assertEqual(str(MatchAtLineEnd(TEST_STR)), f"{TEST_STR}$")
 57 | 
 58 |     def test_match_at_line_end_on_type(self):
 59 |         self.assertEqual(MatchAtLineEnd("a")._get_type(), _Type.Assertion)
 60 | 
 61 | 
 62 |     def test_match_at_line_end_on_quantifiability(self):
 63 |         self.assertEqual(MatchAtLineEnd("a")._is_repeatable(), False)
 64 | 
 65 | 
 66 | class TestWordBoundary(unittest.TestCase):
 67 | 
 68 |     left_word_boundary = WordBoundary() + TEST_STR
 69 |     right_word_boundary = TEST_STR + WordBoundary()
 70 |     left_and_right_word_boundary = WordBoundary() + TEST_STR + WordBoundary()
 71 | 
 72 |     def test_word_boundary_on_pattern(self):
 73 |         self.assertEqual(str(WordBoundary()), "\\b")
 74 | 
 75 |     def test_word_boundary_on_matches(self):
 76 |         self.assertEqual((WordBoundary() + "a").get_matches("a ba -a"), ["a", "a"])
 77 | 
 78 |     def test_word_boundary_on_type(self):
 79 |         self.assertEqual(WordBoundary()._get_type(), _Type.Assertion)
 80 | 
 81 |     def test_word_boundary_on_quantifiability(self):
 82 |         self.assertEqual(WordBoundary()._is_repeatable(), True)
 83 |     
 84 |     def test_left_word_boundary(self):
 85 |         self.assertEqual(str(self.left_word_boundary), f"\\b{TEST_STR}")
 86 | 
 87 |     def test_left_word_boundary_on_type(self):
 88 |         self.assertEqual(self.left_word_boundary._get_type(), _Type.Assertion)
 89 | 
 90 |     def test_left_word_boundary_on_quantifiability(self):
 91 |         self.assertEqual(self.left_word_boundary._is_repeatable(), True)
 92 | 
 93 |     def test_right_word_boundary(self):
 94 |         self.assertEqual(str(self.right_word_boundary), f"{TEST_STR}\\b")
 95 | 
 96 |     def test_right_word_boundary_on_type(self):
 97 |         self.assertEqual(self.right_word_boundary._get_type(), _Type.Assertion)
 98 | 
 99 |     def test_right_word_boundary_on_quantifiability(self):
100 |         self.assertEqual(self.right_word_boundary._is_repeatable(), True)
101 | 
102 |     def test_left_and_right_word_boundary(self):
103 |         self.assertEqual(str(self.left_and_right_word_boundary), f"\\b{TEST_STR}\\b")
104 | 
105 |     def test_left_and_right_word_boundary_on_type(self):
106 |         self.assertEqual(self.left_and_right_word_boundary._get_type(), _Type.Assertion)
107 | 
108 |     def test_left_and_right_word_boundary_on_quantifiability(self):
109 |         self.assertEqual(self.left_and_right_word_boundary._is_repeatable(), True)
110 | 
111 | 
112 | class TestNonWordBoundary(unittest.TestCase):
113 | 
114 |     left_non_word_boundary = NonWordBoundary() + TEST_STR
115 |     right_non_word_boundary = TEST_STR + NonWordBoundary()
116 |     left_and_right_non_word_boundary = NonWordBoundary() + TEST_STR + NonWordBoundary()
117 | 
118 |     def test_non_word_boundary_on_pattern(self):
119 |         self.assertEqual(str(NonWordBoundary()), "\\B")
120 | 
121 |     def test_non_word_boundary_on_matches(self):
122 |         self.assertEqual((NonWordBoundary() + "a").get_matches("a ba a"), ["a"])
123 | 
124 |     def test_non_word_boundary_on_type(self):
125 |         self.assertEqual(NonWordBoundary()._get_type(), _Type.Assertion)
126 | 
127 |     def test_non_word_boundary_on_quantifiability(self):
128 |         self.assertEqual(NonWordBoundary()._is_repeatable(), True)
129 |     
130 |     def test_left_non_word_boundary(self):
131 |         self.assertEqual(str(self.left_non_word_boundary), f"\\B{TEST_STR}")
132 | 
133 |     def test_left_non_word_boundary_on_type(self):
134 |         self.assertEqual(self.left_non_word_boundary._get_type(), _Type.Assertion)
135 | 
136 |     def test_left_non_word_boundary_on_quantifiability(self):
137 |         self.assertEqual(self.left_non_word_boundary._is_repeatable(), True)
138 | 
139 |     def test_right_non_word_boundary(self):
140 |         self.assertEqual(str(self.right_non_word_boundary), f"{TEST_STR}\\B")
141 | 
142 |     def test_right_non_word_boundary_on_type(self):
143 |         self.assertEqual(self.right_non_word_boundary._get_type(), _Type.Assertion)
144 | 
145 |     def test_right_non_word_boundary_on_quantifiability(self):
146 |         self.assertEqual(self.right_non_word_boundary._is_repeatable(), True)
147 | 
148 |     def test_left_and_right_non_word_boundary(self):
149 |         self.assertEqual(str(self.left_and_right_non_word_boundary), f"\\B{TEST_STR}\\B")
150 | 
151 |     def test_left_and_right_non_word_boundary_on_type(self):
152 |         self.assertEqual(self.left_and_right_non_word_boundary._get_type(), _Type.Assertion)
153 | 
154 |     def test_left_and_right_non_word_boundary_on_quantifiability(self):
155 |         self.assertEqual(self.left_and_right_non_word_boundary._is_repeatable(), True)
156 | 
157 | 
158 | class TestFollowedBy(unittest.TestCase):
159 |     
160 |     def test_followed_by(self):
161 |         self.assertEqual(str(FollowedBy(pre1, pre2)), f"{pre1}(?={pre2})")
162 | 
163 |     def test_followed_by_on_multiple_patterns(self):
164 |         self.assertEqual(str(FollowedBy(pre1, pre2, pre3)), f"{pre1}(?={pre2})(?={pre3})")
165 | 
166 |     def test_followed_by_on_type(self):
167 |         self.assertEqual(FollowedBy("a", "b")._get_type(), _Type.Assertion)
168 | 
169 |     def test_followed_by_on_quantifiability(self):
170 |         self.assertEqual(FollowedBy("a", "b")._is_repeatable(), False)
171 | 
172 |     def test_followed_by_on_empty_string_as_assertion_pattern(self):
173 |         self.assertEqual(str(FollowedBy(pre1, Pregex())), f"{pre1}")
174 | 
175 |     def test_followed_by_on_not_enough_arguments_exception(self):
176 |         self.assertRaises(NotEnoughArgumentsException, FollowedBy, pre1)
177 | 
178 | 
179 | class TestNotFollowedBy(unittest.TestCase):
180 |     
181 |     def test_not_followed_by(self):
182 |         self.assertEqual(str(NotFollowedBy(pre1, pre2)), f"{pre1}(?!{pre2})")
183 | 
184 |     def test_not_followed_by_on_multiple_patterns(self):
185 |         self.assertEqual(str(NotFollowedBy(pre1, pre2, pre3)), f"{pre1}(?!{pre2})(?!{pre3})")
186 | 
187 |     def test_not_followed_by_on_type(self):
188 |         self.assertEqual(NotFollowedBy("a", "b")._get_type(), _Type.Assertion)
189 | 
190 |     def test_not_followed_by_on_quantifiability(self):
191 |         self.assertEqual(NotFollowedBy("a", "b")._is_repeatable(), True)
192 | 
193 |     def test_not_followed_by_on_not_enough_arguments_exception(self):
194 |         self.assertRaises(NotEnoughArgumentsException, NotFollowedBy, pre1)
195 | 
196 |     def test_not_followed_by_on_empty_string_negative_assertion_exception(self):
197 |         self.assertRaises(EmptyNegativeAssertionException, NotFollowedBy, pre1, Pregex())
198 | 
199 |     def test_not_followed_by_on_multiple_patterns_empty_string_negative_assertion_exception(self):
200 |         self.assertRaises(EmptyNegativeAssertionException, NotFollowedBy, pre1, pre2, Pregex())
201 | 
202 | 
203 | class TestPrecededBy(unittest.TestCase):
204 |     
205 |     def test_preceded_by(self):
206 |         self.assertEqual(str(PrecededBy(pre1, pre2)), f"(?<={pre2}){pre1}")
207 | 
208 |     def test_preceded_by_on_multiple_patterns(self):
209 |         self.assertEqual(str(PrecededBy(pre1, pre2, pre3)), f"(?<={pre3})(?<={pre2}){pre1}")
210 | 
211 |     def test_preceded_by_on_type(self):
212 |         self.assertEqual(PrecededBy("a", "b")._get_type(), _Type.Assertion)
213 | 
214 |     def test_preceded_by_on_quantifiability(self):
215 |         self.assertEqual(PrecededBy("a", "b")._is_repeatable(), False)
216 | 
217 |     def test_preceded_by_on_quantifier(self):
218 |         exactly = Exactly(pre2, 3)
219 |         self.assertEqual(str(PrecededBy(pre1, exactly)), f"(?<={exactly}){pre1}")
220 |         self.assertRaises(NonFixedWidthPatternException, PrecededBy, pre1, Optional(pre2))
221 | 
222 |     def test_preceded_by_on_empty_string_as_assertion_pattern(self):
223 |         self.assertEqual(str(PrecededBy(pre1, Pregex())), f"{pre1}")
224 | 
225 |     def test_preceded_by_on_not_enough_arguments_exception(self):
226 |         self.assertRaises(NotEnoughArgumentsException, PrecededBy, pre1)
227 | 
228 | 
229 | class TestNotPrecededBy(unittest.TestCase):
230 |     
231 |     def test_not_preceded_by(self):
232 |         self.assertEqual(str(NotPrecededBy(pre1, pre2)), f"(?<!{pre2}){pre1}")
233 | 
234 |     def test_not_preceded_by_on_multiple_patterns(self):
235 |         self.assertEqual(str(NotPrecededBy(pre1, pre2, pre3)), f"(?<!{pre3})(?<!{pre2}){pre1}")
236 | 
237 |     def test_not_preceded_by_on_type(self):
238 |         self.assertEqual(NotPrecededBy("a", "b")._get_type(), _Type.Assertion)
239 | 
240 |     def test_not_preceded_by_on_quantifiability(self):
241 |         self.assertEqual(NotPrecededBy("a", "b")._is_repeatable(), True)
242 | 
243 |     def test_not_preceded_by_on_exactly_quantifier(self):
244 |         exactly = Exactly(pre2, 3)
245 |         self.assertEqual(str(NotPrecededBy(pre1, exactly)), f"(?<!{exactly}){pre1}")
246 | 
247 |     def test_not_preceded_by_on_not_enough_arguments_exception(self):
248 |         self.assertRaises(NotEnoughArgumentsException, NotPrecededBy, pre1)
249 | 
250 |     def test_not_preceded_by_on_empty_string_negative_assertion_exception(self):
251 |         self.assertRaises(EmptyNegativeAssertionException, NotPrecededBy, pre1, Pregex())
252 | 
253 |     def test_not_preceded_by_on_multiple_patterns_empty_string_negative_assertion_exception(self):
254 |         self.assertRaises(EmptyNegativeAssertionException, NotPrecededBy, pre1, pre2, Pregex())
255 | 
256 |     def test_not_preceded_by_on_non_fixed_width_pattern_exception(self):
257 |         self.assertRaises(NonFixedWidthPatternException, NotPrecededBy, pre1, Optional(pre2))
258 | 
259 |     def test_not_preceded_by_on_multiple_patterns_non_fixed_width_pattern_exception(self):
260 |         self.assertRaises(NonFixedWidthPatternException, NotPrecededBy, pre1, pre2, Optional(pre3))
261 | 
262 | 
263 | class TestEnclosedBy(unittest.TestCase):
264 |     
265 |     def test_enclosed_by(self):
266 |         self.assertEqual(str(EnclosedBy(pre1, pre2)), f"(?<={pre2}){pre1}(?={pre2})")
267 | 
268 |     def test_enclosed_by_on_multiple_patterns(self):
269 |         self.assertEqual(str(EnclosedBy(pre1, pre2, pre3)),
270 |             f"(?<={pre3})(?<={pre2}){pre1}(?={pre2})(?={pre3})")
271 | 
272 |     def test_enclosed_by_on_type(self):
273 |         self.assertEqual(EnclosedBy("a", "b")._get_type(), _Type.Assertion)
274 | 
275 |     def test_enclosed_by_on_quantifiability(self):
276 |         self.assertEqual(EnclosedBy("a", "b")._is_repeatable(), False)
277 | 
278 |     def test_enclosed_by_on_quantifier(self):
279 |         exactly = Exactly(pre2, 3)
280 |         self.assertEqual(str(EnclosedBy(pre1, exactly)), f"(?<={exactly}){pre1}(?={exactly})")
281 |         self.assertRaises(NonFixedWidthPatternException, EnclosedBy, pre1, Optional(pre2))
282 | 
283 |     def test_enclosed_by_on_empty_string_as_assertion_pattern(self):
284 |         self.assertEqual(str(EnclosedBy(pre1, Pregex())), f"{pre1}")
285 | 
286 |     def test_enclosed_by_on_not_enough_arguments_exception(self):
287 |         self.assertRaises(NotEnoughArgumentsException, EnclosedBy, pre1)
288 | 
289 | 
290 | class TestNotEnclosedBy(unittest.TestCase):
291 |     
292 |     def test_not_enclosed_by(self):
293 |         self.assertEqual(str(NotEnclosedBy(pre1, pre2)), f"(?<!{pre2}){pre1}(?!{pre2})")
294 | 
295 |     def test_not_enclosed_by_on_multiple_patterns(self):
296 |         self.assertEqual(str(NotEnclosedBy(pre1, pre2, pre3)),
297 |             f"(?<!{pre3})(?<!{pre2}){pre1}(?!{pre2})(?!{pre3})")
298 | 
299 |     def test_not_enclosed_by_on_type(self):
300 |         self.assertEqual(NotEnclosedBy("a", "b")._get_type(), _Type.Assertion)
301 | 
302 |     def test_not_enclosed_by_on_quantifiability(self):
303 |         self.assertEqual(NotEnclosedBy("a", "b")._is_repeatable(), True)
304 | 
305 |     def test_not_enclosed_by_on_exactly_quantifier(self):
306 |         exactly = Exactly(pre2, 3)
307 |         self.assertEqual(str(NotEnclosedBy(pre1, exactly)), f"(?<!{exactly}){pre1}(?!{exactly})")
308 | 
309 |     def test_not_enclosed_by_on_not_enough_arguments_exception(self):
310 |         self.assertRaises(NotEnoughArgumentsException, NotEnclosedBy, pre1)
311 | 
312 |     def test_not_enclosed_by_on_empty_string_negative_assertion_exception(self):
313 |         self.assertRaises(EmptyNegativeAssertionException, NotEnclosedBy, pre1, Pregex())
314 | 
315 |     def test_not_enclosed_by_on_multiple_patterns_empty_string_negative_assertion_exception(self):
316 |         self.assertRaises(EmptyNegativeAssertionException, NotEnclosedBy, pre1, pre2, Pregex())
317 | 
318 |     def test_not_enclosed_by_on_non_fixed_width_pattern_exception(self):
319 |         self.assertRaises(NonFixedWidthPatternException, NotEnclosedBy, pre1, Optional(pre2))
320 | 
321 |     def test_not_enclosed_by_on_multiple_patterns_non_fixed_width_pattern_exception(self):
322 |         self.assertRaises(NonFixedWidthPatternException, NotEnclosedBy, pre1, pre2, Optional(pre3))
323 | 
324 | 
325 | if __name__=="__main__":
326 |     unittest.main()


--------------------------------------------------------------------------------
/tests/test_core_groups.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from pregex.core.groups import *
  3 | from pregex.core.tokens import Backslash
  4 | from pregex.core.pre import Pregex, _Type
  5 | from pregex.core.exceptions import InvalidArgumentTypeException, InvalidArgumentValueException, \
  6 |     InvalidCapturingGroupNameException
  7 | 
  8 | 
  9 | TEST_STR = "test"
 10 | 
 11 | 
 12 | class TestCapture(unittest.TestCase):
 13 | 
 14 |     name = "NAME"
 15 | 
 16 |     def test_capture_on_str(self):
 17 |         self.assertEqual(str(Capture(TEST_STR)), f"({TEST_STR})")
 18 | 
 19 |     def test_capture_on_type(self):
 20 |         self.assertEqual(Capture("a")._get_type(), _Type.Group)
 21 | 
 22 |     def test_capture_on_literal(self):
 23 |         literal = Pregex(TEST_STR)
 24 |         self.assertEqual(str(Capture(literal)), f"({literal})")
 25 | 
 26 |     def test_capture_on_capturing_group(self):
 27 |         ''' Grouping a capturing group does nothing. '''
 28 |         group = Capture(TEST_STR)
 29 |         self.assertEqual(str(Capture(group)), f"{group}")
 30 | 
 31 |     def test_capture_on_case_insensitive_group(self):
 32 |         case_insensitive_group = Group(TEST_STR, is_case_insensitive=True)
 33 |         self.assertEqual(str(Capture(case_insensitive_group)), f"((?i:{TEST_STR}))")
 34 | 
 35 |     def test_capture_on_concat_of_capturing_groups(self):
 36 |         pre = Capture("a") + "b" + Capture("c")
 37 |         self.assertEqual(str(Capture(pre)), f"({pre})")
 38 | 
 39 |     def test_capture_on_backslash_group(self):
 40 |         pre = Capture(Backslash())
 41 |         self.assertEqual(str(Capture(pre)), f"{pre}")
 42 | 
 43 |     def test_capture_on_concat_of_capturing_groups_starting_with_backslash_group(self):
 44 |         pre = Capture(Backslash()) + "b" + Capture("c")
 45 |         self.assertEqual(str(Capture(pre)), f"({pre})")
 46 | 
 47 |     def test_capture_on_concat_of_capturing_groups_ending_with_backslash_group(self):
 48 |         pre = Capture("a") + "b" + Capture(Backslash())
 49 |         self.assertEqual(str(Capture(pre)), f"({pre})")
 50 | 
 51 |     def test_capture_on_capturing_group_of_concat_of_capturing_groups(self):
 52 |         group = Capture(Capture("a") + "b" + Capture("c"))
 53 |         self.assertEqual(str(Capture(group)), f"{group}")
 54 | 
 55 |     def test_capture_on_non_capturing_group(self):
 56 |         ''' Grouping a non-capturing group converts it to a capturing group. '''
 57 |         group = Group(TEST_STR)
 58 |         self.assertEqual(str(Capture(group)), f"{str(group).replace('?:', '')}")
 59 | 
 60 |     def test_capture_on_concat_of_non_capturing_groups(self):
 61 |         pre = Group("a") + "b" + Group("c")
 62 |         self.assertEqual(str(Capture(pre)), f"({pre})")
 63 | 
 64 |     def test_capture_on_capturing_group_of_concat_of_non_capturing_groups(self):
 65 |         group = Capture(Group("a") + "b" + Group("c"))
 66 |         self.assertEqual(str(Capture(group)), f"{group}")
 67 | 
 68 |     def test_named_capturing_group_on_str(self):
 69 |         self.assertEqual(str(Capture(TEST_STR, self.name)), f"(?P<{self.name}>{TEST_STR})")
 70 | 
 71 |     def test_named_capturing_group_on_literal(self):
 72 |         literal = Pregex(TEST_STR)
 73 |         self.assertEqual(str(Capture(literal, self.name)), f"(?P<{self.name}>{literal})")
 74 | 
 75 |     def test_named_capturing_group_on_capturing_group(self):
 76 |         ''' Name-grouping a capturing group without a name, names the group. '''
 77 |         group = Capture(TEST_STR)
 78 |         self.assertEqual(str(Capture(group, self.name)), f"(?P<{self.name}>{str(group)[1:-1]})")
 79 | 
 80 |     def test_named_capturing_group_on_named_capturing_group(self):
 81 |         ''' Name-grouping a capturing group with name, changes the group's name. '''
 82 |         group = Capture(TEST_STR, self.name)
 83 |         new_name = "NEW_NAME"
 84 |         self.assertEqual(str(Capture(group, new_name)), str(group).replace(self.name, new_name))
 85 | 
 86 |     def test_named_capturing_group_on_non_capturing_group(self):
 87 |         ''' Name-Grouping a non-capturing group converts it to a named capturing group. '''
 88 |         group = Group(TEST_STR)
 89 |         self.assertEqual(str(Capture(group, self.name)), f"(?P<{self.name}>{str(group)[:-1].replace('(?:', '', 1)})")
 90 | 
 91 |     def test_named_capturing_group_on_invalid_argument_type_exception(self):
 92 |         invalid_type_names = [1, 1.5, True, Pregex("z")]
 93 |         for name in invalid_type_names:
 94 |             self.assertRaises(InvalidArgumentTypeException, Capture, "test", name)
 95 | 
 96 |     def test_named_capturing_group_on_invalid_name_exception(self):
 97 |         invalid_names = ["11zzz", "ald!!", "@%^Fl", "!flflf123", "dld-"]
 98 |         for name in invalid_names:
 99 |             self.assertRaises(InvalidCapturingGroupNameException, Capture, "test", name)
100 | 
101 | 
102 | class TestGroup(unittest.TestCase):
103 | 
104 |     def test_group_on_str(self):
105 |         self.assertEqual(str(Group(TEST_STR)), f"(?:{TEST_STR})")
106 | 
107 |     def test_group_on_type(self):
108 |         self.assertEqual(Group("a")._get_type(), _Type.Group)
109 |         self.assertNotEqual((Group("a") + Group("b"))._get_type(), _Type.Group)
110 | 
111 |     def test_group_on_pregex(self):
112 |         pregex = Pregex(TEST_STR)
113 |         self.assertEqual(str(Group(pregex)), f"(?:{pregex})")
114 | 
115 |     def test_group_on_is_case_insensitive(self):
116 |         self.assertEqual(str(Group(TEST_STR, is_case_insensitive=True)), f"(?i:{TEST_STR})")
117 | 
118 |     def test_group_on_capturing_group(self):
119 |         group = Capture(TEST_STR)
120 |         self.assertEqual(str(Group(group)), f"(?:{TEST_STR})")
121 | 
122 |     def test_group_on_flag_reset(self):
123 |         flag_group = Group(TEST_STR, is_case_insensitive=True)
124 |         self.assertEqual(str(Group(flag_group)), f"(?:{TEST_STR})")
125 | 
126 |     def test_group_on_concat_of_capturing_groups(self):
127 |         pre = Capture('a') + 'b' + Capture('c')
128 |         self.assertEqual(str(Group(pre)), f"(?:{pre})")
129 | 
130 |     def test_group_on_backslash_group(self):
131 |         group = Capture(Backslash())
132 |         self.assertEqual(str(Group(group)),f"{str(group).replace('(', '(?:')}")
133 | 
134 |     def test_group_on_concat_of_capturing_groups_starting_with_backslash_group(self):
135 |         pre = Capture(Backslash()) + "b" + Capture("c")
136 |         self.assertEqual(str(Group(pre)), f"(?:{pre})")
137 | 
138 |     def test_group_on_concat_of_capturing_groups_ending_with_backslash_group(self):
139 |         pre = Capture("a") + "b" + Capture(Backslash())
140 |         self.assertEqual(str(Group(pre)), f"(?:{pre})")
141 | 
142 |     def test_group_on_capturing_group_of_concat_of_capturing_groups(self):
143 |         group = Capture(Capture("a") + "b" + Capture("c"))
144 |         self.assertEqual(str(Group(group)), f"{str(group).replace('(', '(?:', 1)}")
145 | 
146 |     def test_group_on_non_capturing_group(self):
147 |         ''' Applying 'Group' on a non-capturing group does nothing. '''
148 |         group = Group(TEST_STR)
149 |         self.assertEqual(str(Group(group)), f"{group}")
150 | 
151 |     def test_group_on_concat_of_non_capturing_groups(self):
152 |         pre = Group("a") + "b" + Group("c")
153 |         self.assertEqual(str(Group(pre)), f"(?:{pre})")
154 | 
155 |     def test_group_on_non_capturing_group_of_concat_of_non_capturing_groups(self):
156 |         group = Group(Group("a") + "b" + Group("c"))
157 |         self.assertEqual(str(Group(group)), f"{group}")
158 | 
159 |     def test_group_on_named_capturing_group(self):
160 |         ''' Applying 'Group' on a non-capturing group converts it into a non-capturing group. '''
161 |         name = "NAME"
162 |         group = Capture(TEST_STR, name)
163 |         self.assertEqual(str(Group(group)), f"(?:{TEST_STR})")
164 | 
165 | 
166 | class TestBackreference(unittest.TestCase):
167 | 
168 |     def test_backreference_int(self):
169 |         ref = 1
170 |         self.assertEqual(str(Backreference(ref)), f"\\{ref}")
171 | 
172 |     def test_backreference_str(self):
173 |         ref = "name"
174 |         self.assertEqual(str(Backreference(ref)), f"(?P={ref})")
175 | 
176 |     def test_backreference_on_type(self):
177 |         self.assertEqual(Backreference("a")._get_type(), _Type.Group)
178 | 
179 |     def test_backreference_on_invalid_argument_type_exception(self):
180 |         invalid_type_names = [1.5, True, Pregex("z")]
181 |         for name in invalid_type_names:
182 |             self.assertRaises(InvalidArgumentTypeException, Backreference, name)
183 | 
184 |     def test_backreference_on_invalid_argument_value_exception(self):
185 |         ref1, ref2 = 0, 100
186 |         self.assertRaises(InvalidArgumentValueException, Backreference, ref1)
187 |         self.assertRaises(InvalidArgumentValueException, Backreference, ref2)
188 | 
189 |     def test_backreference_on_invalid_name_exception(self):
190 |         invalid_names = ["11zzz", "ald!!", "@%^Fl", "!flflf123", "dld-"]
191 |         for name in invalid_names:
192 |             with self.assertRaises(InvalidCapturingGroupNameException):
193 |                 _ = Backreference(name)
194 | 
195 |     def test_backreference_pattern(self):
196 |         name = "name"
197 |         pre: Pregex = Pregex(f"(?P<{name}>a|b)", escape=False) + Backreference(name)
198 |         self.assertTrue(pre.is_exact_match("aa"))
199 |         self.assertTrue(pre.is_exact_match("bb"))
200 |         self.assertFalse(pre.is_exact_match("ab"))
201 | 
202 | 
203 | class TestConditional(unittest.TestCase):
204 | 
205 |     name = "name"
206 |     then_pre = Pregex("then")
207 |     else_pre = Pregex("else")
208 | 
209 |     def test_conditional(self):
210 |         self.assertEqual(str(Conditional(self.name, self.then_pre)), f"(?({self.name}){self.then_pre})")
211 | 
212 |     def test_conditional_on_type(self):
213 |         self.assertEqual(Conditional("a", "b")._get_type(), _Type.Group)
214 | 
215 |     def test_conditional_with_else_pre(self):
216 |         self.assertEqual(str(Conditional(self.name, self.then_pre, self.else_pre)),
217 |         f"(?({self.name}){self.then_pre}|{self.else_pre})")
218 | 
219 |     def test_conditional_on_invalid_argument_type_exception(self):
220 |         invalid_type_names = [1, 1.5, True, Pregex("z")]
221 |         for name in invalid_type_names:
222 |             self.assertRaises(InvalidArgumentTypeException, Conditional, name, self.then_pre)
223 | 
224 |     def test_conditional_on_invalid_name_exception(self):
225 |         invalid_names = ["11zzz", "ald!!", "@%^Fl", "!flflf123", "dld-"]
226 |         for name in invalid_names:
227 |             with self.assertRaises(InvalidCapturingGroupNameException):
228 |                 _ = Conditional(name, self.then_pre)
229 | 
230 |     def test_conditional_pattern(self):
231 |         pre: Pregex = Pregex(f"(?P<{self.name}>A)", escape=False) + Conditional(self.name, "B")
232 |         self.assertTrue(pre.is_exact_match("AB"))
233 | 
234 |     def test_conditional_pattern_with_else(self):
235 |         pre: Pregex = Pregex(f"(?P<{self.name}>A)?", escape=False) + Conditional(self.name, "B", "C")
236 |         self.assertTrue(pre.is_exact_match("AB"))
237 |         self.assertTrue(pre.is_exact_match("C"))
238 | 
239 | 
240 | if __name__=="__main__":
241 |     unittest.main()


--------------------------------------------------------------------------------
/tests/test_core_operators.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from pregex.core.operators import *
  3 | from pregex.core.quantifiers import Exactly
  4 | from pregex.core.pre import Pregex, _Type
  5 | from pregex.core.classes import AnyLowercaseLetter
  6 | from pregex.core.assertions import FollowedBy, MatchAtStart
  7 | from pregex.core.exceptions import NotEnoughArgumentsException
  8 | 
  9 | 
 10 | TEST_STR_1 = "test1"
 11 | TEST_STR_2 = "test2"
 12 | TEST_STR_3 = "test3"
 13 | 
 14 | 
 15 | class TestConcat(unittest.TestCase):
 16 | 
 17 |     def test_concat_class_type(self):
 18 |         self.assertEqual(Concat("a", "b")._get_type(), _Type.Other)
 19 |     
 20 |     def test_concat_on_pattern(self):
 21 |         self.assertEqual(str(Concat(TEST_STR_1, TEST_STR_2)), f"{TEST_STR_1}{TEST_STR_2}")
 22 |         self.assertEqual(str(Concat(Pregex(TEST_STR_1), Pregex(TEST_STR_2))), f"{TEST_STR_1}{TEST_STR_2}")
 23 | 
 24 |     def test_concat_on_multiple_pattern(self):
 25 |         self.assertEqual(str(Concat(TEST_STR_1, TEST_STR_2, TEST_STR_3)),
 26 |             f"{TEST_STR_1}{TEST_STR_2}{TEST_STR_3}")
 27 | 
 28 |     def test_concat_on_quantifier(self):
 29 |         quantifier = Exactly(TEST_STR_1, 2)
 30 |         self.assertEqual(str(Concat(quantifier, TEST_STR_2)), f"{quantifier}{TEST_STR_2}")
 31 | 
 32 |     def test_concat_on_concat(self):
 33 |         concat = Concat(TEST_STR_1, TEST_STR_2)
 34 |         self.assertEqual(str(Concat(concat, TEST_STR_3)), f"{concat}{TEST_STR_3}")
 35 | 
 36 |     def test_concat_on_either(self):
 37 |         either = Either(TEST_STR_1, TEST_STR_2)
 38 |         self.assertEqual(str(Concat(either, TEST_STR_3)), f"(?:{either}){TEST_STR_3}")
 39 | 
 40 |     def test_concat_on_class(self):
 41 |         any_ll = AnyLowercaseLetter()
 42 |         self.assertEqual(str(Concat(any_ll, TEST_STR_3)), f"{any_ll}{TEST_STR_3}")
 43 | 
 44 |     def test_concat_on_anchor_assertion(self):
 45 |         mat = MatchAtStart("a")
 46 |         self.assertEqual(str(Concat(mat, TEST_STR_1)), f"{mat}{TEST_STR_1}")
 47 | 
 48 |     def test_concat_on_lookaround_assertion(self):
 49 |         followed_by = FollowedBy("a", "b")
 50 |         self.assertEqual(str(Concat(followed_by, TEST_STR_1)), f"{followed_by}{TEST_STR_1}")
 51 | 
 52 |     def test_concat_on_a_single_pattern(self):
 53 |         self.assertEqual(str(Concat(TEST_STR_1)), f"{TEST_STR_1}")
 54 | 
 55 |     def test_concat_on_no_patterns(self):
 56 |         self.assertEqual(str(Concat()), '')
 57 | 
 58 |     def test_concat_on_empty_string(self):
 59 |         self.assertEqual(str(Concat(TEST_STR_1, Pregex())), TEST_STR_1)
 60 | 
 61 | 
 62 | class TestEither(unittest.TestCase):
 63 | 
 64 |     def test_either_class_type(self):
 65 |         self.assertEqual(Either("a", "b")._get_type(), _Type.Alternation)
 66 |         self.assertEqual(Either("a", "|", "b")._get_type(), _Type.Alternation)
 67 |         self.assertNotEqual(("a" + Either("a", "b"))._get_type(), _Type.Alternation)
 68 |         self.assertNotEqual(("a|" + Either("a", "b"))._get_type(), _Type.Alternation)
 69 |         self.assertNotEqual((Either("a", "b") + "b")._get_type(), _Type.Alternation)
 70 |         self.assertNotEqual((Either("a", "b") + "|b")._get_type(), _Type.Alternation)
 71 |         self.assertNotEqual(("a" + Either("a", "b") + "b")._get_type(), _Type.Alternation)
 72 |         self.assertNotEqual(("a|" + Either("a", "b") + "|b")._get_type(), _Type.Alternation)
 73 | 
 74 |     def test_either_on_pattern(self):
 75 |         self.assertEqual(str(Either(TEST_STR_1, TEST_STR_2)), f"{TEST_STR_1}|{TEST_STR_2}")
 76 |         self.assertEqual(str(Either(Pregex(TEST_STR_1), Pregex(TEST_STR_2))), f"{TEST_STR_1}|{TEST_STR_2}")
 77 | 
 78 |     def test_either_on_multiple_pattern(self):
 79 |         self.assertEqual(str(Either(TEST_STR_1, TEST_STR_2, TEST_STR_3)),
 80 |             f"{TEST_STR_1}|{TEST_STR_2}|{TEST_STR_3}")
 81 | 
 82 |     def test_either_on_quantifier(self):
 83 |         quantifier = Exactly(TEST_STR_1, 2)
 84 |         self.assertEqual(str(Either(quantifier, TEST_STR_2)), f"{quantifier}|{TEST_STR_2}")
 85 | 
 86 |     def test_either_for_concat(self):
 87 |         concat = Concat(TEST_STR_1, TEST_STR_2)
 88 |         self.assertEqual(str(Either(concat, TEST_STR_3)), f"{concat}|{TEST_STR_3}")
 89 | 
 90 |     def test_either_on_either(self):
 91 |         either = Either(TEST_STR_1, TEST_STR_2)
 92 |         self.assertEqual(str(Either(either, TEST_STR_3)), f"{either}|{TEST_STR_3}")
 93 | 
 94 |     def test_either_on_class(self):
 95 |         any_ll = AnyLowercaseLetter()
 96 |         self.assertEqual(str(Either(any_ll, TEST_STR_3)), f"{any_ll}|{TEST_STR_3}")
 97 | 
 98 |     def test_either_on_a_single_pattern(self):
 99 |         self.assertEqual(str(Either(TEST_STR_1)), f"{TEST_STR_1}")
100 | 
101 |     def test_either_on_no_patterns(self):
102 |         self.assertEqual(str(Either()), '')
103 | 
104 |     def test_either_on_empty_string(self):
105 |         self.assertEqual(str(Either(TEST_STR_1, Pregex(), TEST_STR_2)), f"{TEST_STR_1}|{TEST_STR_2}")
106 | 
107 | 
108 | class TestEnclose(unittest.TestCase):
109 | 
110 |     def test_enclose_class_type(self):
111 |         self.assertEqual(Enclose("a", "b")._get_type(), _Type.Other)
112 |     
113 |     def test_enclose_on_pattern(self):
114 |         self.assertEqual(str(Enclose(TEST_STR_1, TEST_STR_2)), f"{TEST_STR_2}{TEST_STR_1}{TEST_STR_2}")
115 |         self.assertEqual(str(Enclose(Pregex(TEST_STR_1), Pregex(TEST_STR_2))), f"{TEST_STR_2}{TEST_STR_1}{TEST_STR_2}")
116 | 
117 |     def test_enclose_on_multiple_patterns(self):
118 |         self.assertEqual(str(Enclose(TEST_STR_1, TEST_STR_2, TEST_STR_3)),
119 |             f"{TEST_STR_3}{TEST_STR_2}{TEST_STR_1}{TEST_STR_2}{TEST_STR_3}")
120 | 
121 |     def test_enclose_on_no_enclosing_patterns(self):
122 |         self.assertEqual(str(Enclose(TEST_STR_1)), f"{TEST_STR_1}")
123 | 
124 |     def test_enclose_on_empty_string(self):
125 |         self.assertEqual(str(Enclose(TEST_STR_1, Pregex())), f"{TEST_STR_1}")
126 | 
127 | 
128 | if __name__=="__main__":
129 |     unittest.main()


--------------------------------------------------------------------------------
/tests/test_core_quantifiers.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from pregex.core.quantifiers import *
  3 | from pregex.core.pre import Pregex, _Type
  4 | from pregex.core.assertions import MatchAtStart
  5 | from pregex.core.operators import Concat, Either
  6 | from pregex.core.classes import AnyLowercaseLetter
  7 | from pregex.core.exceptions import InvalidArgumentTypeException, \
  8 |     InvalidArgumentValueException, CannotBeRepeatedException
  9 | 
 10 | 
 11 | TEST_STR_LEN_1 = "t"
 12 | TEST_STR_LEN_N = "test"
 13 | TEST_LITERAL_LEN_1 = Pregex(TEST_STR_LEN_1)
 14 | TEST_LITERAL_LEN_N = Pregex(TEST_STR_LEN_N)
 15 | 
 16 | 
 17 | class Test__Quantifier(unittest.TestCase):
 18 | 
 19 |     def test_quantifier_on_str(self):
 20 |         self.assertEqual(str(Optional(TEST_STR_LEN_N)), f"(?:{TEST_STR_LEN_N})?")
 21 | 
 22 |     def test_quantifier_on_literal(self):
 23 |         self.assertEqual(str(Optional(TEST_LITERAL_LEN_N)), f"(?:{TEST_LITERAL_LEN_N})?")
 24 | 
 25 |     def test_quantifier_on_concat(self):
 26 |         concat = Concat(TEST_STR_LEN_1, TEST_STR_LEN_N)
 27 |         self.assertEqual(str(Optional(concat)), f"(?:{concat})?")
 28 | 
 29 |     def test_quantifier_on_either(self):
 30 |         either = Either(TEST_STR_LEN_1, TEST_STR_LEN_N)
 31 |         self.assertEqual(str(Optional(either)), f"(?:{either})?")
 32 | 
 33 |     def test_quantifier_on_class(self):
 34 |         any_ll = AnyLowercaseLetter()
 35 |         self.assertEqual(str(Optional(any_ll)), f"{any_ll}?")
 36 | 
 37 |     def test_quantifier_on_quantifier(self):
 38 |         optional = Optional(TEST_STR_LEN_N)
 39 |         self.assertEqual(str(Optional(optional)), f"(?:{optional})?")
 40 | 
 41 | 
 42 | class TestOptional(unittest.TestCase):
 43 |     
 44 |     def test_optional_on_len_1_str(self):
 45 |         self.assertEqual(str(Optional(TEST_STR_LEN_1)), f"{TEST_STR_LEN_1}?")
 46 | 
 47 |     def test_optional_on_len_n_str(self):
 48 |         self.assertEqual(str(Optional(TEST_STR_LEN_N)), f"(?:{TEST_STR_LEN_N})?")
 49 | 
 50 |     def test_optional_on_len_1_literal(self):
 51 |         self.assertEqual(str(Optional(TEST_LITERAL_LEN_1)), f"{TEST_STR_LEN_1}?")
 52 | 
 53 |     def test_optional_on_len_n_literal(self):
 54 |         self.assertEqual(str(Optional(TEST_LITERAL_LEN_N)), f"(?:{TEST_STR_LEN_N})?")
 55 | 
 56 |     def test_optional_on_laziness(self):
 57 |         self.assertEqual(str(Optional(TEST_LITERAL_LEN_N, is_greedy=False)), f"(?:{TEST_STR_LEN_N})??")
 58 | 
 59 |     def test_optional_on_type(self):
 60 |         self.assertEqual(Optional("a")._get_type(), _Type.Quantifier)
 61 |         self.assertEqual(Optional("abc")._get_type(), _Type.Quantifier)
 62 |         self.assertNotEqual(Pregex("abc?", escape=False)._get_type(), _Type.Quantifier)
 63 | 
 64 |     def test_optional_on_match(self):
 65 |         self.assertTrue(("a" + Optional("a") + "a").get_matches("aaa") == ["aaa"])
 66 |         self.assertTrue(("a" + Optional("a") + "a").get_matches("aa") == ["aa"])
 67 | 
 68 |     def test_optional_on_lazy_match(self):
 69 |         self.assertTrue(("a" + Optional("a", is_greedy=False) + "a").get_matches("aaa") == ["aa"])
 70 | 
 71 |     def test_optional_on_non_repeatable_pattern(self):
 72 |         self.assertEqual(str(Optional(MatchAtStart("a"))), "(?:\\Aa)?")
 73 | 
 74 | 
 75 | class TestIndefinite(unittest.TestCase):
 76 |     
 77 |     def test_indefinite_on_len_1_str(self):
 78 |         self.assertEqual(str(Indefinite(TEST_STR_LEN_1)), f"{TEST_STR_LEN_1}*")
 79 | 
 80 |     def test_indefinite_on_len_n_str(self):
 81 |         self.assertEqual(str(Indefinite(TEST_STR_LEN_N)), f"(?:{TEST_STR_LEN_N})*")
 82 | 
 83 |     def test_indefinite_on_len_1_literal(self):
 84 |         self.assertEqual(str(Indefinite(TEST_LITERAL_LEN_1)), f"{TEST_STR_LEN_1}*")
 85 | 
 86 |     def test_indefinite_on_len_n_literal(self):
 87 |         self.assertEqual(str(Indefinite(TEST_LITERAL_LEN_N)), f"(?:{TEST_STR_LEN_N})*")
 88 | 
 89 |     def test_indefinite_on_laziness(self):
 90 |         self.assertEqual(str(Indefinite(TEST_LITERAL_LEN_N, is_greedy=False)), f"(?:{TEST_STR_LEN_N})*?")
 91 | 
 92 |     def test_indefinite_on_type(self):
 93 |         self.assertEqual(Indefinite("a")._get_type(), _Type.Quantifier)
 94 |         self.assertEqual(Indefinite("abc")._get_type(), _Type.Quantifier)
 95 |         self.assertNotEqual(Pregex("abc*", escape=False)._get_type(), _Type.Quantifier)
 96 | 
 97 |     def test_indefinite_on_non_repeatable_pattern(self):
 98 |         mat = MatchAtStart("a")
 99 |         self.assertRaises(CannotBeRepeatedException, Indefinite, mat)
100 | 
101 | 
102 | class TestOneOrMore(unittest.TestCase):
103 |     
104 |     def test_one_or_more_on_len_1_str(self):
105 |         self.assertEqual(str(OneOrMore(TEST_STR_LEN_1)), f"{TEST_STR_LEN_1}+")
106 | 
107 |     def test_one_or_more_on_len_n_str(self):
108 |         self.assertEqual(str(OneOrMore(TEST_STR_LEN_N)), f"(?:{TEST_STR_LEN_N})+")
109 | 
110 |     def test_one_or_more_on_len_1_literal(self):
111 |         self.assertEqual(str(OneOrMore(TEST_LITERAL_LEN_1)), f"{TEST_STR_LEN_1}+")
112 | 
113 |     def test_one_or_more_on_len_n_literal(self):
114 |         self.assertEqual(str(OneOrMore(TEST_LITERAL_LEN_N)), f"(?:{TEST_STR_LEN_N})+")
115 | 
116 |     def test_one_or_more_on_laziness(self):
117 |         self.assertEqual(str(OneOrMore(TEST_LITERAL_LEN_N, is_greedy=False)), f"(?:{TEST_STR_LEN_N})+?")
118 | 
119 |     def test_one_or_more_on_type(self):
120 |         self.assertEqual(OneOrMore("a")._get_type(), _Type.Quantifier)
121 |         self.assertEqual(OneOrMore("abc")._get_type(), _Type.Quantifier)
122 |         self.assertNotEqual(Pregex("abc+", escape=False)._get_type(), _Type.Quantifier)
123 | 
124 |     def test_one_or_more_on_non_repeatable_pattern(self):
125 |         mat = MatchAtStart("a")
126 |         self.assertRaises(CannotBeRepeatedException, OneOrMore, mat)
127 |         
128 | 
129 | class TestExactly(unittest.TestCase):
130 | 
131 |     VALID_VALUES = [2, 10]
132 |     
133 |     def test_exactly_on_len_1_str(self):
134 |         for val in self.VALID_VALUES:
135 |             self.assertEqual(str(Exactly(TEST_STR_LEN_1, val)), f"{TEST_STR_LEN_1}{{{val}}}")
136 | 
137 |     def test_exactly_on_len_n_str(self):
138 |         for val in self.VALID_VALUES:
139 |             self.assertEqual(str(Exactly(TEST_STR_LEN_N, val)), f"(?:{TEST_STR_LEN_N}){{{val}}}")
140 | 
141 |     def test_exactly_on_len_1_literal(self):
142 |         for val in self.VALID_VALUES:
143 |             self.assertEqual(str(Exactly(TEST_LITERAL_LEN_1, val)), f"{TEST_LITERAL_LEN_1}{{{val}}}")
144 | 
145 |     def test_exactly_on_len_n_literal(self):
146 |         for val in self.VALID_VALUES:
147 |             self.assertEqual(str(Exactly(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N}){{{val}}}")
148 | 
149 |     def test_exactly_on_value_1(self):
150 |         self.assertEqual(str(Exactly(TEST_LITERAL_LEN_N, 1)), f"{TEST_LITERAL_LEN_N}")
151 | 
152 |     def test_exactly_on_value_0(self):
153 |         self.assertEqual(str(Exactly(TEST_LITERAL_LEN_N, 0)), "")
154 | 
155 |     def test_exactly_on_type(self):
156 |         self.assertEqual(Exactly("a", n=2)._get_type(), _Type.Quantifier)
157 |         self.assertEqual(Exactly("abc", n=2)._get_type(), _Type.Quantifier)
158 |         self.assertNotEqual(Pregex("abc{2}", escape=False)._get_type(), _Type.Quantifier)
159 | 
160 |     def test_exactly_on_invalid_argument_type_exception(self):
161 |         for val in ["s", 1.1, True]:
162 |             self.assertRaises(InvalidArgumentTypeException, Exactly, TEST_STR_LEN_1, val)
163 | 
164 |     def test_exactly_on_invalid_argument_value_exception(self):
165 |         for val in [-10, -1]:
166 |             self.assertRaises(InvalidArgumentValueException, Exactly, TEST_STR_LEN_1, val)
167 | 
168 |     def test_exactly_on_non_repeatable_pattern(self):
169 |         mat = MatchAtStart("a")
170 |         self.assertRaises(CannotBeRepeatedException, Exactly, mat, n=2)
171 |         self.assertEqual(str(Exactly(mat, 1)), str(mat))
172 | 
173 | 
174 | class TestAtLeast(unittest.TestCase):
175 | 
176 |     VALID_VALUES = [2, 10]
177 |     
178 |     def test_at_least_on_len_1_str(self):
179 |         for val in self.VALID_VALUES:
180 |             self.assertEqual(str(AtLeast(TEST_STR_LEN_1, val)), f"{TEST_STR_LEN_1}{{{val},}}")
181 | 
182 |     def test_at_least_on_len_n_str(self):
183 |         for val in self.VALID_VALUES:
184 |             self.assertEqual(str(AtLeast(TEST_STR_LEN_N, val)), f"(?:{TEST_STR_LEN_N}){{{val},}}")
185 | 
186 |     def test_at_least_on_len_1_literal(self):
187 |         for val in self.VALID_VALUES:
188 |             self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_1, val)), f"{TEST_LITERAL_LEN_1}{{{val},}}")
189 | 
190 |     def test_at_least_on_len_n_literal(self):
191 |         for val in self.VALID_VALUES:
192 |             self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N}){{{val},}}") 
193 | 
194 |     def test_at_least_on_value_0(self):
195 |         val = 0
196 |         self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N})*")  
197 | 
198 |     def test_at_least_on_value_1(self):
199 |         val = 1
200 |         self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N})+")
201 | 
202 |     def test_at_least_on_laziness(self):
203 |         val = 3
204 |         self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{{val},}}?")
205 | 
206 |     def test_at_least_on_lazy_value_0(self):
207 |         val = 0
208 |         self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})*?")  
209 | 
210 |     def test_at_least_on_lazy_value_1(self):
211 |         val = 1
212 |         self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})+?")
213 | 
214 |     def test_at_least_on_type(self):
215 |         self.assertEqual(AtLeast("a", n=2)._get_type(), _Type.Quantifier)
216 |         self.assertEqual(AtLeast("abc", n=2)._get_type(), _Type.Quantifier)
217 |         self.assertNotEqual(Pregex("abc{2,}", escape=False)._get_type(), _Type.Quantifier)
218 | 
219 |     def test_at_least_on_invalid_argument_type_exception(self):
220 |         for val in ["s", 1.1, True]:
221 |             self.assertRaises(InvalidArgumentTypeException, AtLeast, TEST_STR_LEN_1, val)
222 | 
223 |     def test_at_least_on_invalid_argument_value_exception(self):
224 |         for val in [-10, -1]:
225 |             self.assertRaises(InvalidArgumentValueException, AtLeast, TEST_STR_LEN_1, val)
226 | 
227 |     def test_at_least_at_on_non_repeatable_pattern(self):
228 |         mat = MatchAtStart("a")
229 |         self.assertRaises(CannotBeRepeatedException, AtLeast, mat, n=5)
230 | 
231 | 
232 | class TestAtMost(unittest.TestCase):
233 | 
234 |     VALID_VALUES = [2, 10]
235 |     
236 |     def test_at_most_on_len_1_str(self):
237 |         for val in self.VALID_VALUES:
238 |             self.assertEqual(str(AtMost(TEST_STR_LEN_1, val)), f"{TEST_STR_LEN_1}{{,{val}}}")
239 | 
240 |     def test_at_most_on_len_n_str(self):
241 |         for val in self.VALID_VALUES:
242 |             self.assertEqual(str(AtMost(TEST_STR_LEN_N, val)), f"(?:{TEST_STR_LEN_N}){{,{val}}}")
243 | 
244 |     def test_at_most_on_len_1_literal(self):
245 |         for val in self.VALID_VALUES:
246 |             self.assertEqual(str(AtMost(TEST_LITERAL_LEN_1, val)), f"{TEST_LITERAL_LEN_1}{{,{val}}}")
247 | 
248 |     def test_at_most_on_len_n_literal(self):
249 |         for val in self.VALID_VALUES:
250 |             self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N}){{,{val}}}")
251 | 
252 |     def test_at_most_on_value_0(self):
253 |         val = 0
254 |         self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), "")
255 | 
256 |     def test_at_most_on_value_1(self):
257 |         val = 1
258 |         self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N})?")
259 | 
260 |     def test_at_most_on_value_None(self):
261 |         val = None
262 |         self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N})*")
263 | 
264 |     def test_at_most_on_laziness(self):
265 |         val = 3
266 |         self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{,{val}}}?")
267 | 
268 |     def test_at_most_on_lazy_value_1(self):
269 |         val = 1
270 |         self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})??")
271 | 
272 |     def test_at_most_on_lazy_value_None(self):
273 |         val = None
274 |         self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})*?")
275 | 
276 |     def test_at_most_on_type(self):
277 |         self.assertEqual(AtMost("a", n=2)._get_type(), _Type.Quantifier)
278 |         self.assertEqual(AtMost("abc", n=2)._get_type(), _Type.Quantifier)
279 |         self.assertNotEqual(Pregex("abc{,2}", escape=False)._get_type(), _Type.Quantifier)
280 | 
281 |     def test_at_most_on_invalid_argument_type_exception(self):
282 |         for val in ["s", 1.1, True]:
283 |             self.assertRaises(InvalidArgumentTypeException, AtMost, TEST_STR_LEN_1, val)
284 | 
285 |     def test_at_most_on_invalid_argument_value_exception(self):
286 |         for val in [-10, -1]:
287 |             self.assertRaises(InvalidArgumentValueException, AtMost, TEST_STR_LEN_1, val)
288 | 
289 |     def test_at_most_on_non_repeatable_pattern(self):
290 |         mat = MatchAtStart("a")
291 |         self.assertRaises(CannotBeRepeatedException, AtMost, mat, n=2)
292 |         self.assertEqual(str(AtMost(mat, 1)), f"(?:{mat})?")
293 | 
294 | 
295 | class TestAtLeastAtMost(unittest.TestCase):
296 | 
297 |     VALID_VALUES = [(2, 3), (10, 20)]
298 |     
299 |     def test_at_least_at_most_on_len_1_str(self):
300 |         for min, max in self.VALID_VALUES:
301 |             self.assertEqual(str(AtLeastAtMost(TEST_STR_LEN_1, min, max)), f"{TEST_STR_LEN_1}{{{min},{max}}}")
302 | 
303 |     def test_at_least_at_most_on_len_n_str(self):
304 |         for min, max in self.VALID_VALUES:
305 |             self.assertEqual(str(AtLeastAtMost(TEST_STR_LEN_N, min, max)), f"(?:{TEST_STR_LEN_N}){{{min},{max}}}")
306 | 
307 |     def test_at_least_at_most_on_len_1_literal(self):
308 |         for min, max in self.VALID_VALUES:
309 |             self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_1, min, max)), f"{TEST_LITERAL_LEN_1}{{{min},{max}}}")
310 | 
311 |     def test_at_least_at_most_on_len_n_literal(self):
312 |         for min, max in self.VALID_VALUES:
313 |             self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min},{max}}}")
314 | 
315 |     def test_at_least_at_most_on_min_equal_to_max_equal_to_zero(self):
316 |         min, max = 0, 0
317 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), "")
318 | 
319 |     def test_at_least_at_most_on_min_equal_to_zero_max_equal_to_1(self):
320 |         min, max = 0, 1
321 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N})?")
322 | 
323 |     def test_at_least_at_most_on_min_equal_to_zero_max_greater_than_1(self):
324 |         min, max = 0, 2
325 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{,{max}}}")
326 | 
327 |     def test_at_least_at_most_on_min_equal_to_zero_max_equal_to_None(self):
328 |         min, max = 0, None
329 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N})*")
330 | 
331 |     def test_at_least_at_most_on_min_equal_to_max_equal_to_one(self):
332 |         min, max = 1, 1
333 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), str(TEST_LITERAL_LEN_N))
334 | 
335 |     def test_at_least_at_most_on_min_equal_to_one_max_equal_to_None(self):
336 |         min, max = 1, None
337 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N})+")
338 | 
339 |     def test_at_least_at_most_on_min_equal_to_max(self):
340 |         min, max = 2, 2
341 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min}}}")
342 | 
343 |     def test_at_least_at_most_on_min_equal_to_two_max_equal_to_None(self):
344 |         min, max = 2, None
345 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min},}}")
346 | 
347 |     def test_at_least_at_most_on_laziness(self):
348 |         min, max = 3, 5
349 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
350 |             f"(?:{TEST_LITERAL_LEN_N}){{{min},{max}}}?")
351 | 
352 |     def test_at_least_at_most_on_lazy_min_equal_to_zero_max_equal_to_1(self):
353 |         min, max = 0, 1
354 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})??")
355 | 
356 |     def test_at_least_at_most_on_lazy_min_equal_to_zero_max_greater_than_1(self):
357 |         min, max = 0, 2
358 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{,{max}}}?")
359 | 
360 |     def test_at_least_at_most_on_lazy_min_equal_to_zero_max_equal_to_None(self):
361 |         min, max = 0, None
362 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
363 |             f"(?:{TEST_LITERAL_LEN_N})*?")
364 | 
365 |     def test_at_least_at_most_on_lazy_min_equal_to_one_max_equal_to_None(self):
366 |         min, max = 1, None
367 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
368 |         f"(?:{TEST_LITERAL_LEN_N})+?")
369 | 
370 |     def test_at_least_at_most_on_lazy_min_equal_to_two_max_equal_to_None(self):
371 |         min, max = 2, None
372 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
373 |             f"(?:{TEST_LITERAL_LEN_N}){{{min},}}?")
374 | 
375 |     def test_at_least_at_most_on_lazy_min_equal_to_max(self):
376 |         min, max = 2, 2
377 |         self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{{min}}}")
378 | 
379 |     def test_at_least_at_most_on_type(self):
380 |         self.assertEqual(AtLeastAtMost("a", n=1, m=2)._get_type(), _Type.Quantifier)
381 |         self.assertEqual(AtLeastAtMost("abc", n=1, m=2)._get_type(), _Type.Quantifier)
382 |         self.assertNotEqual(Pregex("abc{1,2}", escape=False)._get_type(), _Type.Quantifier)
383 | 
384 |     def test_at_least_at_most_on_invalid_argument_type_exception(self):
385 |         for val in ["s", 1.1, True]:
386 |             self.assertRaises(InvalidArgumentTypeException, AtLeastAtMost, TEST_STR_LEN_1, n=val, m=10)
387 |             self.assertRaises(InvalidArgumentTypeException, AtLeastAtMost, TEST_STR_LEN_1, n=2, m=val)
388 | 
389 |     def test_at_least_at_most_on_invalid_argument_value_exception(self):
390 |         self.assertRaises(InvalidArgumentValueException, AtLeastAtMost, TEST_STR_LEN_1, n=-1, m=1)
391 |         self.assertRaises(InvalidArgumentValueException, AtLeastAtMost, TEST_STR_LEN_1, n=1, m=-1)
392 |         self.assertRaises(InvalidArgumentValueException, AtLeastAtMost, TEST_STR_LEN_1, n=5, m=3)
393 | 
394 |     def test_at_least_at_most_on_non_repeatable_pattern(self):
395 |         mat = MatchAtStart("a")
396 |         self.assertRaises(CannotBeRepeatedException, AtLeastAtMost, mat, n=2, m=3)
397 |         self.assertEqual(str(AtLeastAtMost(mat, n=0, m=1)), f"(?:{mat})?")
398 |         self.assertEqual(str(AtLeastAtMost(mat, n=1, m=1)), str(mat))
399 | 
400 | 
401 | if __name__=="__main__":
402 |     unittest.main()


--------------------------------------------------------------------------------
/tests/test_core_tokens.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from pregex.core.tokens import *
  3 | from pregex.core.pre import _Type
  4 | 
  5 | 
  6 | class TestBackslash(unittest.TestCase):
  7 | 
  8 |     def test_backslash(self):
  9 |         self.assertEqual(str(Backslash()), r"\\")
 10 | 
 11 |     def test_backslash_on_type(self):
 12 |         self.assertEqual(Backslash()._get_type(), _Type.Token)
 13 | 
 14 |     def test_backslash_on_match(self):
 15 |         self.assertTrue(Backslash().get_matches(r"text\ttext") == ["\\"])
 16 | 
 17 | 
 18 | class TestBullet(unittest.TestCase):
 19 | 
 20 |     def test_bullet(self):
 21 |         self.assertEqual(str(Bullet()), "\u2022")
 22 | 
 23 |     def test_backslash_on_type(self):
 24 |         self.assertEqual(Bullet()._get_type(), _Type.Token)
 25 | 
 26 |     def test_bullet_on_match(self):
 27 |         self.assertTrue(Bullet().get_matches("text•text") == ["•"])
 28 | 
 29 | 
 30 | class TestCarriageReturn(unittest.TestCase):
 31 | 
 32 |     def test_carriage_return(self):
 33 |         self.assertEqual(str(CarriageReturn()), "\r")
 34 | 
 35 |     def test_carriage_return_on_type(self):
 36 |         self.assertEqual(CarriageReturn()._get_type(), _Type.Token)
 37 | 
 38 |     def test_carriage_return_on_match(self):
 39 |         self.assertTrue(CarriageReturn().get_matches("text\rtext") == ["\r"])        
 40 | 
 41 | 
 42 | class TestCopyright(unittest.TestCase):
 43 | 
 44 |     def test_copyright(self):
 45 |         self.assertEqual(str(Copyright()), "\u00A9")
 46 | 
 47 |     def test_copyright_on_type(self):
 48 |         self.assertEqual(Copyright()._get_type(), _Type.Token)
 49 | 
 50 |     def test_copyright_on_match(self):
 51 |         self.assertTrue(Copyright().get_matches("text©text") == ["©"])
 52 | 
 53 | 
 54 | class TestDivision(unittest.TestCase):
 55 | 
 56 |     def test_division(self):
 57 |         self.assertEqual(str(Division()), "\u00f7")
 58 | 
 59 |     def test_division_on_type(self):
 60 |         self.assertEqual(Division()._get_type(), _Type.Token)
 61 | 
 62 |     def test_division_on_match(self):
 63 |         self.assertTrue(Division().get_matches("text÷text") == ["÷"])         
 64 | 
 65 | 
 66 | class TestDollar(unittest.TestCase):
 67 | 
 68 |     def test_dollar(self):
 69 |         self.assertEqual(str(Dollar()), "\\\u0024")
 70 | 
 71 |     def test_dollar_on_type(self):
 72 |         self.assertEqual(Dollar()._get_type(), _Type.Token)
 73 | 
 74 |     def test_dollar_on_match(self):
 75 |         self.assertTrue(Dollar().get_matches("text$text") == ["$"])
 76 | 
 77 | 
 78 | class TestEuro(unittest.TestCase):
 79 | 
 80 |     def test_euro(self):
 81 |         self.assertEqual(str(Euro()), "\u20ac")
 82 | 
 83 |     def test_euro_on_type(self):
 84 |         self.assertEqual(Euro()._get_type(), _Type.Token)
 85 | 
 86 |     def test_euro_on_match(self):
 87 |         self.assertTrue(Euro().get_matches("text€text") == ["€"])   
 88 | 
 89 | 
 90 | class TestFormFeed(unittest.TestCase):
 91 | 
 92 |     def test_form_feed(self):
 93 |         self.assertEqual(str(FormFeed()), "\f")
 94 | 
 95 |     def test_form_feed_on_type(self):
 96 |         self.assertEqual(FormFeed()._get_type(), _Type.Token)
 97 | 
 98 |     def test_form_feed_on_match(self):
 99 |         self.assertTrue(FormFeed().get_matches("text\ftext") == ["\f"])
100 | 
101 | 
102 | class TestInfinity(unittest.TestCase):
103 | 
104 |     def test_infinity(self):
105 |         self.assertEqual(str(Infinity()), "\u221e")
106 | 
107 |     def test_infinity_on_type(self):
108 |         self.assertEqual(Infinity()._get_type(), _Type.Token)
109 | 
110 |     def test_infinity_on_match(self):
111 |         self.assertTrue(Infinity().get_matches("text∞text") == ["∞"])
112 | 
113 | 
114 | class TestMultiplication(unittest.TestCase):
115 | 
116 |     def test_multiplication(self):
117 |         self.assertEqual(str(Multiplication()), "\u00d7")
118 | 
119 |     def test_multiplication_on_type(self):
120 |         self.assertEqual(Multiplication()._get_type(), _Type.Token)
121 | 
122 |     def test_multiplication_on_match(self):
123 |         self.assertTrue(Multiplication().get_matches("text×text") == ["×"])        
124 | 
125 | 
126 | class TestNewline(unittest.TestCase):
127 | 
128 |     def test_newline(self):
129 |         self.assertEqual(str(Newline()), "\n")
130 | 
131 |     def test_newline_on_type(self):
132 |         self.assertEqual(Newline()._get_type(), _Type.Token)
133 | 
134 |     def test_newline_on_match(self):
135 |         self.assertTrue(Newline().get_matches("text\ntext") == ["\n"])
136 | 
137 | 
138 | class TestPound(unittest.TestCase):
139 | 
140 |     def test_pound(self):
141 |         self.assertEqual(str(Pound()), "\u00a3")
142 | 
143 |     def test_pound_on_type(self):
144 |         self.assertEqual(Pound()._get_type(), _Type.Token)
145 | 
146 |     def test_pound_on_match(self):
147 |         self.assertTrue(Pound().get_matches("text£text") == ["£"]) 
148 | 
149 | 
150 | class TestRegistered(unittest.TestCase):
151 | 
152 |     def test_registered(self):
153 |         self.assertEqual(str(Registered()), "\u00ae")
154 | 
155 |     def test_registered_on_type(self):
156 |         self.assertEqual(Registered()._get_type(), _Type.Token)
157 | 
158 |     def test_registered_on_match(self):
159 |         self.assertTrue(Registered().get_matches("text®text") == ["®"])
160 | 
161 | 
162 | class TestRupee(unittest.TestCase):
163 | 
164 |     def test_rupee(self):
165 |         self.assertEqual(str(Rupee()), "\u20b9")
166 | 
167 |     def test_rupee_on_type(self):
168 |         self.assertEqual(Rupee()._get_type(), _Type.Token)
169 | 
170 |     def test_rupee_on_match(self):
171 |         self.assertTrue(Rupee().get_matches("text₹text") == ["₹"])         
172 | 
173 | 
174 | class TestSpace(unittest.TestCase):
175 | 
176 |     def test_space(self):
177 |         self.assertEqual(str(Space()), r" ")
178 | 
179 |     def test_space_on_type(self):
180 |         self.assertEqual(Space()._get_type(), _Type.Token)
181 | 
182 |     def test_space_on_match(self):
183 |         self.assertTrue(Space().get_matches(r"text ext") == [" "])
184 | 
185 | 
186 | class TestTab(unittest.TestCase):
187 | 
188 |     def test_tab(self):
189 |         self.assertEqual(str(Tab()), "\t")
190 | 
191 |     def test_tab_on_type(self):
192 |         self.assertEqual(Tab()._get_type(), _Type.Token)
193 | 
194 |     def test_tab_on_match(self):
195 |         self.assertTrue(Tab().get_matches("text\ttext") == ["\t"])
196 | 
197 | 
198 | class TestTrademark(unittest.TestCase):
199 | 
200 |     def test_trademark(self):
201 |         self.assertEqual(str(Trademark()), "\u2122")
202 | 
203 |     def test_trademark_on_type(self):
204 |         self.assertEqual(Trademark()._get_type(), _Type.Token)
205 | 
206 |     def test_trademark_on_match(self):
207 |         self.assertTrue(Trademark().get_matches("text™text") == ["™"])
208 | 
209 | 
210 | class TestVerticalTab(unittest.TestCase):
211 | 
212 |     def test_vertical_tab(self):
213 |         self.assertEqual(str(VerticalTab()), "\v")
214 | 
215 |     def test_vertical_tab_on_type(self):
216 |         self.assertEqual(VerticalTab()._get_type(), _Type.Token)
217 | 
218 |     def test_vertical_tab_on_match(self):
219 |         self.assertTrue(VerticalTab().get_matches("text\vtext") == ["\v"])
220 | 
221 | 
222 | class TestWhiteBullet(unittest.TestCase):
223 | 
224 |     def test_white_bullet(self):
225 |         self.assertEqual(str(WhiteBullet()), "\u25e6")
226 | 
227 |     def test_white_bullet_on_type(self):
228 |         self.assertEqual(WhiteBullet()._get_type(), _Type.Token)
229 | 
230 |     def test_white_bullet_on_match(self):
231 |         self.assertTrue(WhiteBullet().get_matches("text◦text") == ["◦"])        
232 | 
233 | 
234 | class TestYen(unittest.TestCase):
235 | 
236 |     def test_yen(self):
237 |         self.assertEqual(str(Yen()), "\u00a5")
238 | 
239 |     def test_yen_on_type(self):
240 |         self.assertEqual(Yen()._get_type(), _Type.Token)
241 | 
242 |     def test_yen_on_match(self):
243 |         self.assertTrue(Yen().get_matches("text¥text") == ["¥"]) 
244 | 
245 | 
246 | if __name__=="__main__":
247 |     unittest.main()


--------------------------------------------------------------------------------