├── .gitignore ├── .travis.yml ├── AUTHORS ├── LICENSE ├── MANIFEST.ini ├── README.md ├── hre.py ├── setup.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | MANIFEST 60 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.7" 5 | script: python test.py -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Marcelo Fonseca Tambalo 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Marcelo Fonseca Tambalo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.ini: -------------------------------------------------------------------------------- 1 | include hre.py 2 | include setup.py 3 | include README.md 4 | include LICENSE 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | HumanRegex 3 | ========== 4 | 5 | 6 | [![Build Status](https://travis-ci.org/zokis/HumanRegex.svg?branch=master)](https://travis-ci.org/zokis/HumanRegex) [![PyPI version](https://badge.fury.io/py/hre.svg)](http://badge.fury.io/py/hre) 7 | 8 | ## Install 9 | 10 | ```sh 11 | pip install hre 12 | ``` 13 | 14 | API to make it easier to use regex 15 | 16 | ``RE('your regex here')`` 17 | 18 | Returns a Callable object that facilitates verification of matches 19 | 20 | It allows the combination of regex to form new regex using the operators & and | 21 | 22 | Flags Support 23 | 24 | It has a verbose api to create regex and with shortcuts, both supported the combinations 25 | 26 | 27 | ## Examples 28 | 29 | ### Testing if the string contains digits 30 | ```python 31 | from hre import RE 32 | 33 | my_re = RE('[0-9]+') 34 | if bool(my_re('number: 25')): 35 | print('Regex Ok') 36 | # >> Regex Ok 37 | my_match = my_re('number: 25') 38 | if my_match[0] == '25': 39 | print('25') 40 | # >> 25 41 | if RE('(?P[0-9]+)')('number: 25')['number'] == '25': 42 | print('number 25') 43 | # >> number 25 44 | ``` 45 | 46 | ### Tests if the email is valid and captures the account and the provider 47 | ```python 48 | my_re = RE(r'(?P[A-Za-z0-9+_.]+)\@(?P[A-Za-z0-9]+)\..+') 49 | my_match = my_re('my_email.1+github@Provider.com') 50 | print my_match['account'] 51 | # >> my_email.1+github 52 | print my_match['provider'] 53 | # >> Provider 54 | ``` 55 | 56 | ### Replacing strings 57 | ```python 58 | from hre import RE 59 | 60 | print RE('(?:red)').replace("violets are red", 'blue') 61 | # >> violets are blue 62 | ``` 63 | 64 | ### Using verbal expressions for the same examples 65 | ```python 66 | from hre import HumanRegex as HR 67 | 68 | my_re = HR().digits() 69 | if bool(my_re('number: 25')): 70 | print('Regex Ok') 71 | # >> Regex Ok 72 | my_match = my_re('number: 25') 73 | if my_match[0] == '25': 74 | print('25') 75 | # >> 25 76 | if HR().digits(name='number')('number: 25')['number'] == '25': 77 | print('number 25') 78 | # >> number 25 79 | 80 | ``` 81 | 82 | ```python 83 | from hre import HumanRegex as HR 84 | 85 | az = ['a', 'z'] 86 | AZ = ['A', 'Z'] 87 | _09 = ['0', '9'] 88 | special = '_.+' 89 | 90 | my_re = HR().ranges( 91 | AZ, az, 92 | _09, special, 93 | name='account' 94 | ).then('@').ranges( 95 | AZ, az, _09, 96 | name='provider' 97 | ).then('.').anything() 98 | my_match = my_re('my_email.1+github@Provider.com') 99 | print my_re 100 | # >> (?P([A-Za-z0-9\_\.\+]+))(?:\@)(?P([A-Za-z0-9]+))(?:\.)(?:.*) 101 | print my_match['account'] 102 | # >> my_email.1+github 103 | print my_match['provider'] 104 | # >> Provider 105 | ``` 106 | 107 | ```python 108 | from hre import HR 109 | 110 | print HR().find('red').replace("violets are red", 'blue') 111 | # >> violets are blue 112 | ``` 113 | 114 | ### Combinations 115 | 116 | ```python 117 | from hre import HR 118 | 119 | 120 | valids = ['abacate', '42', 'tomate', '25'] 121 | 122 | valid_comb = HR().then(valids[0]) 123 | # same as valid_comb = T(valids[0]) 124 | for valid in valids[1:]: 125 | valid_comb |= HR().then(valid) 126 | # same as valid_comb |= T(valid) 127 | 128 | valids = ADD(valid_comb, name='valid') 129 | my_comb = HR().then('{').start_of_line() & valids & HR().then('}').end_of_line() 130 | # same as my_comb = SOL() & T('{') & valids & T('}') & EOL() 131 | print my_comb 132 | # >> ^(?:\{)(?P(?:abacate)|(?:42)|(?:tomate)|(?:25))(?:\})$ 133 | print my_comb('{42}')['valid'] 134 | # >> 42 135 | print my_comb('{abacate}')['valid'] 136 | # >> abacate 137 | print my_comb('{invalid}')['valid'] 138 | # >> None 139 | 140 | x = HR().then('@').word(name='p') 141 | y = HR().char(name='c').then('.') 142 | my_combination = y & x 143 | my_match = my_combination('x.@zokis') 144 | print "regex: ", my_combination 145 | # >> regex: (?P\w)(?:\.)(?:\@)(?P

\w+) 146 | print "c: ", my_match['c'] 147 | # >> c: x 148 | print "p: ", my_match['p'] 149 | # >> p: zokis 150 | ``` 151 | 152 | 153 | ### Examples using shortcuts and combinations 154 | 155 | ```python 156 | from hre import DS 157 | 158 | my_re = DS() 159 | print my_re 160 | # >> \d+ 161 | 162 | if bool(my_re('number: 25')): 163 | print('Regex Ok') 164 | # >> Regex Ok 165 | my_match = my_re('number: 25') 166 | if my_match[0] == '25': 167 | print('25') 168 | # >> 25 169 | 170 | my_named_regex = DS(name='number') 171 | print my_named_regex 172 | # >> (?P\d+) 173 | if my_named_regex('number: 25')['number'] == '25': 174 | print('number 25') 175 | # >> number 25 176 | ``` 177 | 178 | ```python 179 | from hre import RS, T, AT 180 | 181 | az = ['a', 'z'] 182 | AZ = ['A', 'Z'] 183 | _09 = ['0', '9'] 184 | special = '_.+' 185 | 186 | my_re = RS( 187 | AZ, az, _09, special, 188 | name='account' 189 | ) & T('@') & RS( 190 | AZ, az, _09, 191 | name='provider' 192 | ) & AT() 193 | my_match = my_re('my_email.1+github@Provider.com') 194 | print my_re 195 | # >> (?P([A-Za-z0-9\_\.\+]+))(?:\@)(?P([A-Za-z0-9]+))(?:.*) 196 | print my_match['account'] 197 | # >> my_email.1+github 198 | print my_match['provider'] 199 | # >> Provider 200 | ``` 201 | 202 | ```python 203 | from hre import F 204 | 205 | print F('red').replace("violets are red", 'blue') 206 | # >> violets are blue 207 | ``` 208 | 209 | ```python 210 | d3 = D(quantifier=3) 211 | d2 = D() * 2 212 | 213 | t = T('-') 214 | p = T('.') 215 | cpf_re = d3 & p & d3 & p & d3 & t & d2 216 | print cpf_re 217 | # >> \d{3}(?:\.)\d{3}(?:\.)\d{3}(?:\-)\d\d 218 | print bool(cpf_re('412.459.786-08')) 219 | # >> True 220 | 221 | cnpj_re = d2 & p & d3 & p & d3 & T('/') & D(quantifier=4) & t & d2 222 | print cnpj_re 223 | # >> \d\d(?:\.)\d{3}(?:\.)\d{3}(?:\/)\d{4}(?:\-)\d\d 224 | print bool(cnpj_re('76.612.217/0001-14')) 225 | # >> True 226 | 227 | cpf_cnpj_re = G(cpf_re) | G(cnpj_re) 228 | print cpf_cnpj_re 229 | # >> (\d{3}(?:\.)\d{3}(?:\.)\d{3}(?:\-)\d\d)|(\d\d(?:\.)\d{3}(?:\.)\d{3}(?:\/)\d{4}(?:\-)\d\d) 230 | print bool(cpf_cnpj_re('856.324.440-07')) 231 | # >> True 232 | print bool(cpf_cnpj_re('49.347.475/0001-48')) 233 | # >> True 234 | ``` 235 | 236 | 237 | ### FLAGs 238 | 239 | ```python 240 | my_re = HR().find('cat') 241 | my_match = my_re('CAT or dog') 242 | print bool(my_match) 243 | # >> False 244 | my_re = my_re.ignorecase() 245 | my_match = my_re('CAT or dog') 246 | print bool(my_match) 247 | # >> True 248 | 249 | my_re = SOL() & F('DOG') 250 | my_match = my_re('CAT or \ndog') 251 | print bool(my_match) 252 | # >> False 253 | my_re = my_re & FI() | FM() 254 | my_match = my_re('CAT or \ndog') 255 | print bool(my_match) 256 | # >> True 257 | ``` 258 | 259 | 260 | ### Full API 261 | 262 | - column Shortcut: Shortcut Function or Flag Class 263 | - column Verbose: Verbose method => HR().x 264 | - column Example Shortcut: Example using the shortcut functions 265 | - column Resulting: resulting regex 266 | - column V: receives "value" as a parameter 267 | - column N: receives "name" as named parameter => Named groups 268 | - column Q: receives "quantifier" => {x};{x,};{x,y} 269 | 270 | | Shortcut | Verbose | Example Shortcut | Resulting | V | N | Q | 271 | |----------|-----------------|-----------------------|----------------------------|---|---|---| 272 | | ADD | .add | ADD('[0-9]+') | ``[0-9]+ `` | ✓ | ✓ | ✓ | 273 | | RE | .add | RE('[0-9]+') | ``[0-9]+ `` | ✓ | ✓ | ✓ | 274 | | T | .then | T('@') | ``(?:\@) `` | ✓ | ✓ | ✓ | 275 | | F | .find | F('blue') | ``(?:blue) `` | ✓ | ✓ | ✓ | 276 | | G | .group | G(T('A')|T('B')) | ``((?:A)``|``(?:B))`` | ✓ | ✓ | ✗ | 277 | | A | .any | A('0258qaz') | ``[0258qaz] `` | ✓ | ✓ | ✓ | 278 | | AT | .anything | AT() | ``(?:.*) `` | ✗ | ✓ | ✗ | 279 | | ATB | .anything_but | ATB('0258zaq') | ``(?:[^0258zaq]*) `` | ✓ | ✓ | ✗ | 280 | | EOL | .end_of_line | EOL() | ``$ `` | ✗ | ✗ | ✗ | 281 | | MB | .maybe | MB('s') | ``(?:s)? `` | ✓ | ✓ | ✗ | 282 | | MTP | .multiple | MTP() | ``+ `` | ✗ | ✗ | ✗ | 283 | | R | .range | R(['a', 'z']) | ``[a-z] `` | ✓ | ✓ | ✓ | 284 | | RS | .ranges | RS(['a', 'z']) | ``[a-z]+ `` | ✓ | ✓ | ✗ | 285 | | ST | .something | ST() | ``(?:.+) `` | ✗ | ✓ | ✗ | 286 | | STB | .something_but | STB('0258qaz') | ``(?:[^0258qaz]+) `` | ✓ | ✓ | ✗ | 287 | | SOL | .start_of_line | SOL() | ``^ `` | ✗ | ✗ | ✗ | 288 | | BR | .br | BR() | ``(?:\n``|``\r\n)`` | ✗ | ✗ | ✗ | 289 | | D | .digit | D() | ``\d `` | ✗ | ✓ | ✓ | 290 | | DS | .digits | DS() | ``\d+ `` | ✗ | ✓ | ✗ | 291 | | DS | .int_or_decimal | ID() | ``(?:\d*\.)?\d+ `` | ✗ | ✓ | ✗ | 292 | | ND | .non_digit | ND() | ``\D `` | ✗ | ✓ | ✓ | 293 | | NDS | .non_digits | NDS() | ``\D+ `` | ✗ | ✓ | ✗ | 294 | | TAB | .tab | TAB() | ``\t `` | ✗ | ✗ | ✓ | 295 | | WS | .whitespace | WS() | ``\s `` | ✗ | ✗ | ✓ | 296 | | NWS | .non_whitespace | NWS() | ``\S `` | ✗ | ✗ | ✓ | 297 | | W | .word | W() | ``\w+ `` | ✗ | ✓ | ✗ | 298 | | NW | .non_word | NW() | ``\W+ `` | ✗ | ✓ | ✗ | 299 | | C | .char | C() | ``\w `` | ✗ | ✓ | ✓ | 300 | | NC | .non_char | NC() | ``\W `` | ✗ | ✓ | ✓ | 301 | | FS | .dotall/.S | FS() | Flag dotall enabled | ✗ | ✗ | ✗ | 302 | | FI | .ignorecase/.I | FI() | Flag ignorecase enabled | ✗ | ✗ | ✗ | 303 | | FL | .locale/.L | FL() | Flag locale enabled | ✗ | ✗ | ✗ | 304 | | FM | .multiline/.M | FM() | Flag multiline enabled | ✗ | ✗ | ✗ | 305 | | FU | .unicode/.U | FU() | Flag unicode enabled | ✗ | ✗ | ✗ | 306 | | FX | .verbose/.X | FX() | Flag verbose enabled | ✗ | ✗ | ✗ | 307 | 308 | Other methods of regex object 309 | 310 | | Method | Description 311 | |------------|--------------- 312 | | .get_flags | returns an integer with the value of the flags 313 | | .compile | same as re.compile 314 | | .findall | same as re.findall 315 | | .groups | same as re.groups 316 | | .groupdict | return a match object => a defaultdict(None) Like that contains all the results of a match 317 | | .match | same as re.match 318 | | .replace | return the string obtained by replacing 319 | | .search | same as re.search 320 | | .split | same as re.split 321 | | .test | returns true if the result of .findall have size greater than 0 322 | -------------------------------------------------------------------------------- /hre.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | from functools import reduce 5 | from operator import or_ 6 | 7 | 8 | __title__ = 'HumanRegex' 9 | __version__ = '1.0.0' 10 | __author__ = 'Marcelo Fonseca Tambalo' 11 | 12 | 13 | class HumanMatch(dict): 14 | def __getitem__(self, item): 15 | try: 16 | return super(HumanMatch, self).__getitem__(item) 17 | except KeyError: 18 | return None 19 | 20 | def get(self, key, default=None): 21 | r = super(HumanMatch, self).get(key, default) 22 | if r is None: 23 | return default 24 | return r 25 | 26 | 27 | class HumanRegex(str): 28 | _AND = 'AND' 29 | _OR = 'OR' 30 | 31 | def __init__(self): 32 | self.pattern = '' 33 | self.prefixes = '' 34 | self.source = '' 35 | self.suffixes = '' 36 | 37 | self._dotall = False 38 | self._ignorecase = False 39 | self._locale = False 40 | self._multiline = False 41 | self._unicode = False 42 | self._verbose = False 43 | 44 | def escape(self, value): 45 | return re.escape(value) 46 | 47 | def add(self, value=None, name=None, quantifier=None): 48 | if value is not None: 49 | if isinstance(value, HumanRegex): 50 | value = str(value) 51 | if quantifier is not None: 52 | if isinstance(quantifier, int): 53 | value = "%s{%d}" % (value, quantifier) 54 | else: 55 | if len(quantifier) == 1: 56 | value = "%s{%d,}" % (value, quantifier[0]) 57 | else: 58 | value = "%s{%d,%d}" % (value, quantifier[0], quantifier[1]) 59 | if name is None: 60 | self.source += value 61 | else: 62 | self.source += '(?P<{name}>{value})'.format(name=name, value=value) 63 | self.pattern = self.prefixes + self.source + self.suffixes 64 | return self 65 | 66 | def any(self, value, name=None, quantifier=None): 67 | return self.add("[" + self.escape(value) + "]", name=name, quantifier=quantifier) 68 | 69 | def anything(self, name=None): 70 | return self.add(r"(?:.*)", name=name) 71 | 72 | def anything_but(self, value, name=None): 73 | return self.add("(?:[^" + self.escape(value) + "]*)", name=name) 74 | 75 | def br(self): 76 | return self.add(r"(?:\n|\r\n)") 77 | 78 | def int_or_decimal(self, name=None): 79 | return self.add(r"(?:\d*\.)?\d+", name=name) 80 | 81 | def digit(self, name=None, quantifier=None): 82 | return self.add(r"\d", name=name, quantifier=quantifier) 83 | 84 | def digits(self, name=None): 85 | return self.add(r"\d+", name=name) 86 | 87 | def group(self, value, name=None): 88 | if isinstance(value, HumanRegex): 89 | value = str(value) 90 | else: 91 | value = self.escape(value) 92 | return self.add("(" + value + ")", name=name) 93 | 94 | def non_digit(self, name=None, quantifier=None): 95 | return self.add(r"\D", name=name, quantifier=quantifier) 96 | 97 | def non_digits(self, name=None): 98 | return self.add(r"\D+", name=name) 99 | 100 | def end_of_line(self, enable=True): 101 | self.suffixes = "$" if enable else "" 102 | return self.add() 103 | 104 | def maybe(self, value, name=None): 105 | return self.add("(?:" + self.escape(value) + ")?", name=name) 106 | 107 | def multiple(self): 108 | return self.add(r"+") 109 | s = multiple 110 | 111 | def OR(self, value=None): 112 | self.add(r"|") 113 | return self.then(value) if value else self 114 | 115 | def range(self, *args, **kwargs): 116 | name = kwargs.pop('name', None) 117 | quantifier = kwargs.pop('quantifier', None) 118 | r = [] 119 | for arg in args: 120 | if isinstance(arg, str): 121 | r.append(self.escape(arg)) 122 | else: 123 | r.append("-".join(arg)) 124 | return self.add(r"[%s]" % ''.join(r), name=name, quantifier=quantifier) 125 | 126 | def ranges(self, *args, **kwargs): 127 | name = kwargs.pop('name', None) 128 | r = [] 129 | for arg in args: 130 | if isinstance(arg, str): 131 | r.append(self.escape(arg)) 132 | else: 133 | r.append("-".join(arg)) 134 | return self.add(r"[%s]+" % ''.join(r), name=name) 135 | 136 | def something(self, name=None, quantifier=None): 137 | return self.add(r"(?:.+)", name=name, quantifier=quantifier) 138 | 139 | def something_but(self, value, name=None, quantifier=None): 140 | return self.add("(?:[^" + self.escape(value) + "]+)", name=name, quantifier=quantifier) 141 | 142 | def start_of_line(self, enable=True): 143 | self.prefixes = "^" if enable else "" 144 | return self.add() 145 | 146 | def tab(self, quantifier=None): 147 | return self.add(r"\t", quantifier=quantifier) 148 | 149 | def whitespace(self, quantifier=None): 150 | return self.add(r"\s", quantifier=quantifier) 151 | 152 | def non_whitespace(self, quantifier=None): 153 | return self.add(r"\S", quantifier=quantifier) 154 | 155 | def then(self, value, name=None, quantifier=None): 156 | return self.add("(?:" + self.escape(value) + ")", name=name, quantifier=quantifier) 157 | find = then 158 | 159 | def word(self, name=None): 160 | return self.add(r"\w+", name=name) 161 | 162 | def non_word(self, name=None): 163 | return self.add(r"\W+") 164 | 165 | def char(self, name=None, quantifier=None): 166 | return self.add(r"\w", name=name, quantifier=quantifier) 167 | 168 | def non_char(self, name=None, quantifier=None): 169 | return self.add(r"\W", name=name, quantifier=quantifier) 170 | 171 | def dotall(self, enable=True): 172 | self._dotall = enable 173 | return self 174 | S = dotall 175 | 176 | def ignorecase(self, enable=True): 177 | self._ignorecase = enable 178 | return self 179 | I = ignorecase # noqa 180 | 181 | def locale(self, enable=True): 182 | self._locale = enable 183 | return self 184 | L = locale 185 | 186 | def multiline(self, enable=True): 187 | self._multiline = enable 188 | return self 189 | M = multiline 190 | 191 | def unicode(self, enable=True): 192 | self._unicode = enable 193 | return self 194 | 195 | def U(self, enable=True): 196 | return self.unicode(enable) 197 | 198 | def verbose(self, enable=True): 199 | self._verbose = enable 200 | return self 201 | X = verbose 202 | 203 | def get_flags(self): 204 | flag = 0 205 | flag = flag | re.S if self._dotall else flag | 0 206 | flag = flag | re.I if self._ignorecase else flag | 0 207 | flag = flag | re.L if self._locale else flag | 0 208 | flag = flag | re.M if self._multiline else flag | 0 209 | flag = flag | re.U if self._unicode else flag | 0 210 | flag = flag | re.X if self._verbose else flag | 0 211 | return flag 212 | 213 | def compile(self): 214 | return re.compile(str(self), self.get_flags()) 215 | 216 | def findall(self, string): 217 | return self.compile().findall(string) 218 | 219 | def groups(self, string): 220 | return self.search(string).groups() 221 | 222 | def groupdict(self, string): 223 | result = HumanMatch() 224 | match = self.search(string) 225 | if match: 226 | result[0] = match.group() 227 | result.update(enumerate(match.groups(), start=1)) 228 | result.update(match.groupdict()) 229 | return result 230 | 231 | def match(self, string): 232 | return self.compile().match(string) 233 | 234 | def replace(self, string, repl): 235 | return self.compile().sub(repl, string) 236 | sub = replace 237 | 238 | def search(self, string): 239 | return self.compile().search(string) 240 | 241 | def split(self, string): 242 | return re.split(str(self), string, flags=self.get_flags()) 243 | 244 | def test(self, string): 245 | return True if len(self.findall(string)) else False 246 | 247 | def __str__(self): 248 | return r"%s" % self.pattern 249 | 250 | def __enter__(self): 251 | return self 252 | 253 | def __exit__(self, exc_type, exc_value, traceback): 254 | return self 255 | 256 | def __repr__(self): 257 | return repr(str(self)) 258 | 259 | def __call__(self, string): 260 | return self.groupdict(string) 261 | 262 | def __mul__(self, other): 263 | if isinstance(other, int): 264 | return HR().add(str(self) * other) 265 | raise TypeError( 266 | "unsupported operand type(s) for *: '%s' and '%s'" % ( 267 | type(self).__name__, 268 | type(other).__name__ 269 | ) 270 | ) 271 | 272 | def _combine(self, other, op=_AND): 273 | if isinstance(other, Flag): 274 | Flag.set(other, self) 275 | return self 276 | elif isinstance(other, Flags): 277 | for flag in other: 278 | flag.set(self) 279 | return self 280 | 281 | hr = HumanRegex() 282 | 283 | hr._dotall = self._dotall | other._dotall 284 | hr._ignorecase = self._ignorecase | other._ignorecase 285 | hr._locale = self._locale | other._locale 286 | hr._multiline = self._multiline | other._multiline 287 | hr._unicode = self._unicode | other._unicode 288 | hr._verbose = self._verbose | other._verbose 289 | 290 | hr.prefixes = self.prefixes if self.prefixes else other.prefixes 291 | hr.suffixes = other.suffixes if other.suffixes else self.suffixes 292 | hr.add(self.source) 293 | if op == 'OR': 294 | hr.OR() 295 | return hr.add(other.source) 296 | 297 | def __or__(self, other): 298 | return self._combine(other, self._OR) 299 | 300 | def __and__(self, other): 301 | return self._combine(other, self._AND) 302 | 303 | 304 | HR = HumanRegex 305 | 306 | 307 | class Flags(set): 308 | def __or__(self, other): 309 | if isinstance(other, (Flag, HR)): 310 | return other | self 311 | return super(Flags, self).__or__(other) 312 | 313 | def __and__(self, other): 314 | if isinstance(other, HR): 315 | return other & self 316 | return super(Flags, self).__and__(other) 317 | 318 | def __int__(self): 319 | return reduce(or_, map(int, self or [0])) 320 | 321 | 322 | class Flag(object): 323 | f_name = 'nill' 324 | 325 | def __init__(self, enable=True): 326 | self.enable = enable 327 | 328 | def f(self, hr): 329 | return getattr(HR, self.f_name)(hr, self.enable) 330 | 331 | def set(self, hr): 332 | self.f(hr) 333 | 334 | def __or__(self, other): 335 | if isinstance(other, Flag): 336 | return Flags([self, other]) 337 | elif isinstance(other, Flags): 338 | flgs = Flags(other) 339 | flgs.add(self) 340 | return flgs 341 | else: 342 | return other | self 343 | 344 | def __and__(self, other): 345 | if isinstance(other, HR): 346 | return other & self 347 | raise TypeError( 348 | "unsupported operand type(s) for |: '%s' and '%s'" % ( 349 | type(self).__name__, 350 | type(other).__name__ 351 | ) 352 | ) 353 | 354 | def __repr__(self): 355 | return self.f_name 356 | 357 | def __int__(self): 358 | return int(self.v) 359 | 360 | 361 | class FS(Flag): 362 | v = re.S 363 | f_name = 'dotall' 364 | 365 | 366 | class FI(Flag): 367 | v = re.I 368 | f_name = 'ignorecase' 369 | 370 | 371 | class FL(Flag): 372 | v = re.L 373 | f_name = 'locale' 374 | 375 | 376 | class FM(Flag): 377 | v = re.M 378 | f_name = 'multiline' 379 | 380 | 381 | class FU(Flag): 382 | v = re.U 383 | f_name = 'unicode' 384 | 385 | 386 | class FX(Flag): 387 | v = re.X 388 | f_name = 'verbose' 389 | 390 | 391 | def ADD(value=None, name=None, quantifier=None): 392 | return HR().add(value, name=name, quantifier=quantifier) 393 | 394 | 395 | RE = ADD 396 | 397 | 398 | def T(value, name=None, quantifier=None): 399 | return HR().then(value, name=name, quantifier=quantifier) 400 | 401 | 402 | def F(value, name=None, quantifier=None): 403 | return HR().find(value, name=name, quantifier=quantifier) 404 | 405 | 406 | def G(value, name=None): 407 | return HR().group(value, name=name) 408 | 409 | 410 | def A(value, name=None, quantifier=None): 411 | return HR().any(value, name=name, quantifier=quantifier) 412 | 413 | 414 | def AT(name=None): 415 | return HR().anything(name=name) 416 | 417 | 418 | def ATB(value, name=None): 419 | return HR().anything_but(value, name=name) 420 | 421 | 422 | def EOL(enable=True): 423 | return HR().end_of_line(enable) 424 | 425 | 426 | def MB(value, name=None): 427 | return HR().maybe(value) 428 | 429 | 430 | def MTP(): 431 | return HR().multiple() 432 | 433 | 434 | def R(*args, **kwargs): 435 | return HR().range(*args, **kwargs) 436 | 437 | 438 | def RS(*args, **kwargs): 439 | return HR().ranges(*args, **kwargs) 440 | 441 | 442 | def ST(name=None): 443 | return HR().something(name=name) 444 | 445 | 446 | def STB(value, name=None): 447 | return HR().something_but(value, name=name) 448 | 449 | 450 | def SOL(enable=True): 451 | return HR().start_of_line(enable) 452 | 453 | 454 | def BR(): 455 | return HR().br() 456 | 457 | 458 | def D(name=None, quantifier=None): 459 | return HR().digit(name=name, quantifier=quantifier) 460 | 461 | 462 | def DS(name=None): 463 | return HR().digits(name=name) 464 | 465 | 466 | def ID(name=None): 467 | return HR().int_or_decimal(name=name) 468 | 469 | 470 | def ND(name=None, quantifier=None): 471 | return HR().non_digit(name=name, quantifier=quantifier) 472 | 473 | 474 | def NDS(name=None): 475 | return HR().non_digits(name=name) 476 | 477 | 478 | def TAB(quantifier=None): 479 | return HR().tab(quantifier=quantifier) 480 | 481 | 482 | def WS(quantifier=None): 483 | return HR().whitespace(quantifier=quantifier) 484 | 485 | 486 | def NWS(quantifier=None): 487 | return HR().non_whitespace(quantifier=quantifier) 488 | 489 | 490 | def W(name=None): 491 | return HR().word(name=name) 492 | 493 | 494 | def NW(name=None): 495 | return HR().non_word(name=name) 496 | 497 | 498 | def C(name=None, quantifier=None): 499 | return HR().char(name=name, quantifier=quantifier) 500 | 501 | 502 | def NC(name=None, quantifier=None): 503 | return HR().non_char(name=name, quantifier=quantifier) 504 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from setuptools import setup 3 | 4 | version = __import__('hre').__version__ 5 | 6 | setup( 7 | name='hre', 8 | version=version, 9 | description='hre (Human Regex) is an API to make it easier to use Regex.', 10 | long_description='''hre (Human Regex) is an API to make it easier to use Regex. 11 | DOCS: https://github.com/zokis/HumanRegex/blob/master/README.md 12 | DOCS RAW: https://raw.githubusercontent.com/zokis/HumanRegex/master/README.md''', 13 | url='https://github.com/zokis/HumanRegex/', 14 | author='Marcelo Fonseca Tambalo', 15 | author_email='marcelo.zokis@gmail.com', 16 | license='MIT', 17 | py_modules=['hre'], 18 | scripts=['hre.py'], 19 | platforms='any', 20 | classifiers=[ 21 | 'Development Status :: 5 - Production/Stable', 22 | 'Intended Audience :: Developers', 23 | 'Operating System :: OS Independent', 24 | 'Programming Language :: Python', 25 | 'Programming Language :: Python :: 3', 26 | 'Programming Language :: Python :: 3.7', 27 | 'Topic :: Software Development :: Libraries :: Python Modules', 28 | ], 29 | ) 30 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import unittest 3 | 4 | from itertools import combinations 5 | 6 | from hre import * 7 | 8 | 9 | class TestHRE(unittest.TestCase): 10 | 11 | def test_human_match(self): 12 | hm = HumanMatch(a=1, b=2, c=3) 13 | self.assertEqual(hm['a'], 1) 14 | self.assertEqual(hm['b'], 2) 15 | self.assertEqual(hm['c'], 3) 16 | self.assertIsNone(hm['d']) 17 | self.assertEqual(hm.get('a'), 1) 18 | self.assertEqual(hm.get('b'), 2) 19 | self.assertEqual(hm.get('c'), 3) 20 | self.assertIsNone(hm.get('d')) 21 | 22 | self.assertEqual(hm.get('a', 1), 1) 23 | self.assertEqual(hm.get('b', 2), 2) 24 | self.assertEqual(hm.get('c', 3), 3) 25 | self.assertIsNone(hm.get('d', None)) 26 | self.assertIsNotNone(hm.get('d', 6)) 27 | 28 | def test_simple_api(self): 29 | my_re = RE('[0-9]+') 30 | 31 | self.assertTrue(bool(my_re('number: 25'))) 32 | self.assertFalse(bool(my_re('zZz'))) 33 | self.assertEqual(my_re('number: 25')[0], '25') 34 | self.assertIsNone(my_re('zZz')[0]) 35 | 36 | my_re = RE('(?P[0-9]+)') 37 | self.assertEqual(my_re('number: 25')['number'], '25') 38 | self.assertIsNone(my_re('zZz')['number']) 39 | 40 | def test_flags(self): 41 | my_re = HR().find('cat') 42 | self.assertFalse(bool(my_re('CAT or dog'))) 43 | 44 | my_re = my_re.ignorecase() 45 | self.assertTrue(bool(my_re('CAT or dog'))) 46 | 47 | my_re = SOL() & F('DOG') 48 | self.assertFalse(bool(my_re('CAT or \ndog'))) 49 | 50 | my_re = my_re & FI() | FM() 51 | self.assertTrue(bool(my_re('CAT or \ndog'))) 52 | 53 | my_re = my_re & (FI() | FM()) 54 | self.assertTrue(bool(my_re('CAT or \ndog'))) 55 | 56 | with self.assertRaises(TypeError): 57 | ((FI() | FM()) | 1) 58 | with self.assertRaises(TypeError): 59 | ((FI() | FM()) & 1) 60 | with self.assertRaises(TypeError): 61 | (FI() & 1) 62 | 63 | self.assertEqual(((FI() | FM()) & RE('x')).get_flags(), 10) 64 | 65 | self.assertEqual((FI() & RE('z')).get_flags(), 2) 66 | self.assertEqual((FI() | RE('z')).get_flags(), 2) 67 | 68 | self.assertEqual(repr(FI()), 'ignorecase') 69 | self.assertEqual(repr(FL()), 'locale') 70 | self.assertEqual(repr(FM()), 'multiline') 71 | self.assertEqual(repr(FS()), 'dotall') 72 | self.assertEqual(repr(FU()), 'unicode') 73 | self.assertEqual(repr(FX()), 'verbose') 74 | 75 | ops = ['', 'I', 'L', 'M', 'S', 'U', 'X'] 76 | ops_s = ['', FI, FL, FM, FS, FU, FX] 77 | for i in range(1, 8): 78 | for cop in combinations(ops, i): 79 | my_re = RE('x') 80 | flags = 0 81 | co_flags = Flags() 82 | for op in cop: 83 | if op: 84 | j = ops.index(op) 85 | co_flags = co_flags | ops_s[j]() 86 | getattr(my_re, op)() 87 | flags = flags | (2 ** j) 88 | self.assertEqual(my_re.get_flags(), flags) 89 | self.assertEqual(int(co_flags), flags) 90 | 91 | def test_methods(self): 92 | text = "violets are red" 93 | my_re = RE('(?:red)') 94 | 95 | self.assertEqual(my_re.match('red').group(), 'red') 96 | 97 | self.assertEqual(my_re.replace(text, 'blue'), "violets are blue") 98 | 99 | self.assertEqual(my_re.get_flags(), 0) 100 | 101 | text = """Ross McFluff: 834.345.1254 155 Elm Street 102 | Ronald Heathmore: 892.345.3428 436 Finley Avenue 103 | Frank Burger: 925.541.7625 662 South Dogwood Way 104 | Heather Albrecht: 548.326.4584 919 Park Place""" 105 | expected = [ 106 | 'Ross McFluff: 834.345.1254 155 Elm Street', 107 | ' Ronald Heathmore: 892.345.3428 436 Finley Avenue', 108 | ' Frank Burger: 925.541.7625 662 South Dogwood Way', 109 | ' Heather Albrecht: 548.326.4584 919 Park Place' 110 | ] 111 | self.assertEqual(RE(r'\n+').split(text), expected) 112 | 113 | text = "He was carefully disguised but captured quickly by police." 114 | self.assertEqual(RE(r"\w+ly").test(text), True) 115 | self.assertEqual(RE(r"\w+zz").test(text), False) 116 | 117 | self.assertEqual(RE(r"\w+ly").findall(text), ['carefully', 'quickly']) 118 | self.assertEqual(RE(r"\w+zz").findall(text), []) 119 | 120 | my_re = RE(r"(?P\w+) (?P\w+)") 121 | self.assertEqual(my_re.groups('Malcolm Reynolds'), ('Malcolm', 'Reynolds')) 122 | self.assertEqual( 123 | my_re.groupdict('Malcolm Reynolds'), 124 | { 125 | 0: 'Malcolm Reynolds', 126 | 1: 'Malcolm', 127 | 2: 'Reynolds', 128 | 'last_name': 'Reynolds', 129 | 'first_name': 'Malcolm' 130 | } 131 | ) 132 | 133 | def test_then(self): 134 | then_re = HR().then('@') 135 | then_match = then_re('a@b') 136 | self.assertTrue(bool(then_match)) 137 | self.assertEqual(then_match[0], '@') 138 | then_re = HR().then('@', name='x') 139 | then_match = then_re('a@b') 140 | self.assertTrue(bool(then_match)) 141 | self.assertEqual(then_match[0], '@') 142 | self.assertEqual(then_match['x'], '@') 143 | then_re = HR().then('@', quantifier=2) 144 | then_match = then_re('a@b') 145 | self.assertFalse(bool(then_match)) 146 | self.assertIsNone(then_match[0]) 147 | self.assertIsNone(then_match['x']) 148 | then_match = then_re('a@@b') 149 | self.assertTrue(bool(then_match)) 150 | self.assertEqual(then_match[0], '@@') 151 | then_re = HR().then('@', name='x', quantifier=2) 152 | then_match = then_re('a@@b') 153 | self.assertEqual(then_match['x'], '@@') 154 | 155 | then_re = HR() & T('@') 156 | then_match = then_re('a@b') 157 | self.assertTrue(bool(then_match)) 158 | self.assertEqual(then_match[0], '@') 159 | then_re = HR() & T('@', name='x') 160 | then_match = then_re('a@b') 161 | self.assertTrue(bool(then_match)) 162 | self.assertEqual(then_match[0], '@') 163 | self.assertEqual(then_match['x'], '@') 164 | then_re = HR() & T('@', quantifier=2) 165 | then_match = then_re('a@b') 166 | self.assertFalse(bool(then_match)) 167 | self.assertIsNone(then_match[0]) 168 | self.assertIsNone(then_match['x']) 169 | then_match = then_re('a@@b') 170 | self.assertTrue(bool(then_match)) 171 | self.assertEqual(then_match[0], '@@') 172 | then_re = HR() & T('@', name='x', quantifier=2) 173 | then_match = then_re('a@@b') 174 | self.assertEqual(then_match['x'], '@@') 175 | 176 | def test_with(self): 177 | with RE(r'\w+') as r: 178 | self.assertTrue(bool(r('testes'))) 179 | 180 | def test_repr(self): 181 | self.assertEqual(repr(RE(r'x')), "'x'") 182 | 183 | def test_mul(self): 184 | r = RE('a|b') * 2 185 | self.assertTrue(bool(r('a'))) 186 | self.assertTrue(bool(r('ab'))) 187 | self.assertTrue(bool(r('b'))) 188 | self.assertFalse(bool(r('x'))) 189 | with self.assertRaises(TypeError): 190 | RE('a|b') * 'a' 191 | 192 | def test_find(self): 193 | find_re = HR().find('@') 194 | find_match = find_re('a@b') 195 | self.assertTrue(bool(find_match)) 196 | self.assertEqual(find_match[0], '@') 197 | find_re = HR().find('@', name='x') 198 | find_match = find_re('a@b') 199 | self.assertTrue(bool(find_match)) 200 | self.assertEqual(find_match[0], '@') 201 | self.assertEqual(find_match['x'], '@') 202 | find_re = HR().find('@', quantifier=2) 203 | find_match = find_re('a@b') 204 | self.assertFalse(bool(find_match)) 205 | self.assertIsNone(find_match[0]) 206 | self.assertIsNone(find_match['x']) 207 | find_match = find_re('a@@b') 208 | self.assertTrue(bool(find_match)) 209 | self.assertEqual(find_match[0], '@@') 210 | find_re = HR().find('@', name='x', quantifier=2) 211 | find_match = find_re('a@@b') 212 | self.assertEqual(find_match['x'], '@@') 213 | 214 | find_re = HR() & F('@') 215 | find_match = find_re('a@b') 216 | self.assertTrue(bool(find_match)) 217 | self.assertEqual(find_match[0], '@') 218 | find_re = HR() & F('@', name='x') 219 | find_match = find_re('a@b') 220 | self.assertTrue(bool(find_match)) 221 | self.assertEqual(find_match[0], '@') 222 | self.assertEqual(find_match['x'], '@') 223 | find_re = HR() & F('@', quantifier=2) 224 | find_match = find_re('a@b') 225 | self.assertFalse(bool(find_match)) 226 | self.assertIsNone(find_match[0]) 227 | self.assertIsNone(find_match['x']) 228 | find_match = find_re('a@@b') 229 | self.assertTrue(bool(find_match)) 230 | self.assertEqual(find_match[0], '@@') 231 | find_re = HR() & F('@', name='x', quantifier=2) 232 | find_match = find_re('a@@b') 233 | self.assertEqual(find_match['x'], '@@') 234 | 235 | def test_any(self): 236 | any_re = HR().any('xyz') 237 | any_match = any_re('abacate') 238 | self.assertFalse(bool(any_match)) 239 | any_match = any_re('abacaxi') 240 | self.assertTrue(bool(any_match)) 241 | self.assertEqual(any_match[0], 'x') 242 | any_re = HR().any('xyz', name='x') 243 | any_match = any_re('abacaxi') 244 | self.assertEqual(any_match['x'], 'x') 245 | any_re = HR().any('srt', quantifier=2).U() 246 | any_match = any_re(u'Pêssego') 247 | 248 | any_re = HR() & A('xyz') 249 | any_match = any_re('abacate') 250 | self.assertFalse(bool(any_match)) 251 | any_match = any_re('abacaxi') 252 | self.assertTrue(bool(any_match)) 253 | self.assertEqual(any_match[0], 'x') 254 | any_re = HR() & A('xyz', name='x') 255 | any_match = any_re('abacaxi') 256 | self.assertEqual(any_match['x'], 'x') 257 | any_re = HR() & A('srt', quantifier=2).U() 258 | any_match = any_re(u'Pêssego') 259 | 260 | def test_anything(self): 261 | anything_re = HR().then('@').anything(name='x').then('@') 262 | anything_match = anything_re('dsafh4353') 263 | self.assertFalse(bool(anything_match)) 264 | anything_re = HR().then('@').anything(name='x').then('@') 265 | anything_match = anything_re('@dsafh4353@') 266 | self.assertTrue(bool(anything_match)) 267 | self.assertEqual(anything_match[0], '@dsafh4353@') 268 | self.assertEqual(anything_match[1], 'dsafh4353') 269 | self.assertEqual(anything_match['x'], 'dsafh4353') 270 | 271 | anything_re = HR() & T('@') & AT(name='x') & T('@') 272 | anything_match = anything_re('dsafh4353') 273 | self.assertFalse(bool(anything_match)) 274 | anything_re = HR() & T('@') & AT(name='x') & T('@') 275 | anything_match = anything_re('@dsafh4353@') 276 | self.assertTrue(bool(anything_match)) 277 | self.assertEqual(anything_match[0], '@dsafh4353@') 278 | self.assertEqual(anything_match[1], 'dsafh4353') 279 | self.assertEqual(anything_match['x'], 'dsafh4353') 280 | 281 | def test_anything_but(self): 282 | anything_but_re = HR().then('@').anything_but('@#', name='x').then('@') 283 | anything_but_match = anything_but_re('@12@123213213213@') 284 | self.assertTrue(bool(anything_but_match)) 285 | self.assertEqual(anything_but_match[0], '@12@') 286 | self.assertEqual(anything_but_match[1], '12') 287 | self.assertEqual(anything_but_match['x'], '12') 288 | 289 | anything_but_re = HR().then('@') & ATB('@#', name='x') & T('@') 290 | anything_but_match = anything_but_re('@12@123213213213@') 291 | self.assertTrue(bool(anything_but_match)) 292 | self.assertEqual(anything_but_match[0], '@12@') 293 | self.assertEqual(anything_but_match[1], '12') 294 | self.assertEqual(anything_but_match['x'], '12') 295 | 296 | def test_end_of_line(self): 297 | eof_match = HR().then('aaa', name='x').then('xxx', name='y').end_of_line()('aaaxxx') 298 | 299 | self.assertTrue(bool(eof_match)) 300 | self.assertEqual(eof_match[0], 'aaaxxx') 301 | self.assertEqual(eof_match[1], 'aaa') 302 | self.assertEqual(eof_match[2], 'xxx') 303 | self.assertEqual(eof_match['x'], 'aaa') 304 | self.assertEqual(eof_match['y'], 'xxx') 305 | 306 | eof_match = (HR() & T('aaa', name='x') & T('xxx', name='y') & EOL())('aaaxxx') 307 | 308 | self.assertTrue(bool(eof_match)) 309 | self.assertEqual(eof_match[0], 'aaaxxx') 310 | self.assertEqual(eof_match[1], 'aaa') 311 | self.assertEqual(eof_match[2], 'xxx') 312 | self.assertEqual(eof_match['x'], 'aaa') 313 | self.assertEqual(eof_match['y'], 'xxx') 314 | 315 | def test_add_hre(self): 316 | add_re = RE(T('@') & T('e') & T('$')) 317 | add_match = add_re('@e$') 318 | self.assertTrue(bool(add_match)) 319 | self.assertEqual(add_match[0], '@e$') 320 | 321 | def test_br(self): 322 | br_re = HR() & T('a') & BR() & T('e') 323 | br_match = br_re('a\ne\nx') 324 | self.assertTrue(bool(br_match)) 325 | self.assertEqual(br_match[0], 'a\ne') 326 | 327 | def test_group(self): 328 | g_re = HR() & T('a') & G(T('A') | T('B')) & T('e') 329 | g_match = g_re('aAe') 330 | self.assertTrue(bool(g_match)) 331 | self.assertEqual(g_match[0], 'aAe') 332 | self.assertEqual(g_match[1], 'A') 333 | 334 | g_match = g_re('aBe') 335 | self.assertTrue(bool(g_match)) 336 | self.assertEqual(g_match[0], 'aBe') 337 | self.assertEqual(g_match[1], 'B') 338 | 339 | g_match = g_re('aABe') 340 | self.assertFalse(bool(g_match)) 341 | self.assertIsNone(g_match[0]) 342 | self.assertIsNone(g_match[1]) 343 | 344 | g_re = HR() & T('a') & G('12345') & T('e') 345 | g_match = g_re('a12345e') 346 | self.assertTrue(bool(g_match)) 347 | self.assertEqual(g_match[0], 'a12345e') 348 | self.assertEqual(g_match[1], '12345') 349 | 350 | def test_int_or_decimal(self): 351 | br_re = HR() & T('a') & ID() & T('e') 352 | br_match = br_re('a3e4x') 353 | self.assertTrue(bool(br_match)) 354 | self.assertEqual(br_match[0], 'a3e') 355 | 356 | br_re = HR() & T('a') & ID() & T('e') 357 | br_match = br_re('a3.5e4x') 358 | self.assertTrue(bool(br_match)) 359 | self.assertEqual(br_match[0], 'a3.5e') 360 | 361 | def test_digit(self): 362 | d_re = HR() & T('a') & D() & T('e') 363 | d_match = d_re('a3e4x') 364 | self.assertTrue(bool(d_match)) 365 | self.assertEqual(d_match[0], 'a3e') 366 | self.assertFalse(bool(d_re('a33e4x'))) 367 | 368 | def test_digits(self): 369 | d_re = HR() & T('a') & DS() & T('e') 370 | d_match = d_re('a3e4x') 371 | self.assertTrue(bool(d_match)) 372 | self.assertEqual(d_match[0], 'a3e') 373 | 374 | d_match = d_re('a323e4x') 375 | self.assertTrue(bool(d_match)) 376 | self.assertEqual(d_match[0], 'a323e') 377 | 378 | def test_non_digit(self): 379 | d_re = HR() & T('a') & ND() & T('e') 380 | d_match = d_re('a@e4x') 381 | self.assertTrue(bool(d_match)) 382 | self.assertEqual(d_match[0], 'a@e') 383 | self.assertFalse(bool(d_re('a3e4x'))) 384 | self.assertFalse(bool(d_re('aWEe4x'))) 385 | 386 | def test_non_digits(self): 387 | d_re = HR() & T('a') & NDS() & T('e') 388 | d_match = d_re('aRe4x') 389 | self.assertTrue(bool(d_match)) 390 | self.assertEqual(d_match[0], 'aRe') 391 | 392 | d_match = d_re('aRTYe4x') 393 | self.assertTrue(bool(d_match)) 394 | self.assertEqual(d_match[0], 'aRTYe') 395 | self.assertFalse(bool(d_re('a3e4x'))) 396 | self.assertFalse(bool(d_re('a33e4x'))) 397 | 398 | def test_multiple(self): 399 | m_re = T('@') & MTP() 400 | m_match = m_re('@@@@@@') 401 | self.assertTrue(bool(m_match)) 402 | self.assertEqual(m_match[0], '@@@@@@') 403 | 404 | def test_word(self): 405 | m_re = T('@') & W() 406 | m_match = m_re('@palavra') 407 | self.assertTrue(bool(m_match)) 408 | self.assertEqual(m_match[0], '@palavra') 409 | 410 | def test_non_word(self): 411 | m_re = T('@') & NW() 412 | m_match = m_re('@!!!') 413 | self.assertTrue(bool(m_match)) 414 | self.assertEqual(m_match[0], '@!!!') 415 | 416 | def test_char(self): 417 | m_re = T('@') & C() 418 | m_match = m_re('@p') 419 | self.assertTrue(bool(m_match)) 420 | self.assertEqual(m_match[0], '@p') 421 | 422 | def test_non_char(self): 423 | m_re = T('@') & NC() 424 | m_match = m_re('@!') 425 | self.assertTrue(bool(m_match)) 426 | self.assertEqual(m_match[0], '@!') 427 | 428 | def test_whitespace(self): 429 | w_re = T('@') & WS() & T('orelha') 430 | w_match = w_re('@ orelha') 431 | self.assertTrue(bool(w_match)) 432 | self.assertEqual(w_match[0], '@ orelha') 433 | 434 | w_match = w_re('@\torelha') 435 | self.assertTrue(bool(w_match)) 436 | self.assertEqual(w_match[0], '@\torelha') 437 | 438 | w_match = w_re('@\norelha') 439 | self.assertTrue(bool(w_match)) 440 | self.assertEqual(w_match[0], '@\norelha') 441 | 442 | def test_non_whitespace(self): 443 | w_re = T('@') & NWS() & T('orelha') 444 | w_match = w_re('@ orelha') 445 | self.assertFalse(bool(w_match)) 446 | 447 | w_match = w_re('@#orelha') 448 | self.assertTrue(bool(w_match)) 449 | self.assertEqual(w_match[0], '@#orelha') 450 | 451 | def test_tab(self): 452 | t_re = T('@') & TAB() & T('orelha') 453 | t_match = t_re('@\torelha') 454 | self.assertTrue(bool(t_match)) 455 | self.assertEqual(t_match[0], '@\torelha') 456 | 457 | def test_range(self): 458 | my_re = HR() & R(['a', 'z']) 459 | my_match = my_re('3s234') 460 | self.assertTrue(bool(my_match)) 461 | self.assertEqual(my_match[0], 's') 462 | 463 | my_re = HR() & R('abcedfghijklmnopqrsvuwxyz') 464 | my_match = my_re('3r234') 465 | self.assertTrue(bool(my_match)) 466 | self.assertEqual(my_match[0], 'r') 467 | 468 | def test_something(self): 469 | my_re = T('x') & STB('123') & T('y') 470 | my_match = my_re('xeewy') 471 | self.assertTrue(bool(my_match)) 472 | self.assertEqual(my_match[0], 'xeewy') 473 | 474 | my_re = T('x') & STB('123') & T('y') 475 | my_match = my_re('x123y') 476 | self.assertFalse(bool(my_match)) 477 | self.assertIsNone(my_match[0]) 478 | 479 | def test_something_but(self): 480 | my_re = T('x') & ST() & T('y') 481 | my_match = my_re('xeewy') 482 | self.assertTrue(bool(my_match)) 483 | self.assertEqual(my_match[0], 'xeewy') 484 | 485 | def test_ranges(self): 486 | az = ['a', 'z'] 487 | AZ = ['A', 'Z'] 488 | _09 = ['0', '9'] 489 | special = '_.+' 490 | my_re = HR() & RS( 491 | AZ, az, 492 | _09, special, 493 | name='account' 494 | ) & T('@') & RS( 495 | AZ, az, _09, 496 | name='provider' 497 | ) & T('.') & AT() 498 | my_match = my_re('my_email.1+github@Provider.com') 499 | self.assertTrue(bool(my_match)) 500 | self.assertEqual(my_match['account'], 'my_email.1+github') 501 | self.assertEqual(my_match['provider'], 'Provider') 502 | 503 | def test_add_quantifier(self): 504 | add_re = RE('A', quantifier=1) 505 | self.assertEqual(str(add_re), r'A{1}') 506 | add_re = RE('A', quantifier=25) 507 | self.assertEqual(str(add_re), r'A{25}') 508 | add_re = RE('A', quantifier=(2,)) 509 | self.assertEqual(str(add_re), r'A{2,}') 510 | add_re = RE('A', quantifier=(2, 5)) 511 | self.assertEqual(str(add_re), r'A{2,5}') 512 | 513 | def test_maybe(self): 514 | maybe_re = HR().then('@').maybe('x').then('#') 515 | maybe_match = maybe_re('@x#') 516 | self.assertTrue(bool(maybe_match)) 517 | self.assertEqual(maybe_match[0], '@x#') 518 | maybe_match = maybe_re('@#') 519 | self.assertTrue(bool(maybe_match)) 520 | self.assertEqual(maybe_match[0], '@#') 521 | 522 | maybe_re = HR() & T('@') & MB('x') & T('#') 523 | maybe_match = maybe_re('@x#') 524 | self.assertTrue(bool(maybe_match)) 525 | self.assertEqual(maybe_match[0], '@x#') 526 | maybe_match = maybe_re('@#') 527 | self.assertTrue(bool(maybe_match)) 528 | self.assertEqual(maybe_match[0], '@#') 529 | 530 | 531 | if __name__ == '__main__': 532 | unittest.main() 533 | --------------------------------------------------------------------------------