├── rstr ├── py.typed ├── __init__.py ├── xeger.py └── rstr.py ├── tests ├── __init__.py ├── test_package_level_access.py ├── test_xeger.py └── test_rstr.py ├── MANIFEST.in ├── .gitignore ├── AUTHORS ├── .circleci └── config.yml ├── tox.ini ├── RELEASE_NOTES ├── LICENSE.txt ├── pyproject.toml └── README.rst /rstr/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include rstr/py.typed 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | *.pyc 4 | dist/* 5 | *.egg-info 6 | .tox/* 7 | .env/* 8 | .credentials/* 9 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Brendan McCollam 2 | Andy Hayden 3 | MJ Schultz 4 | Tatiana Krikun 5 | 의성 정 6 | Goya Tomohiro 7 | Xiaoqin Zhu 8 | Stéphane Blondon 9 | Pascal Corpet 10 | Mark Mayo 11 | Aurélien Gâteau 12 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | jobs: 4 | test: 5 | docker: 6 | - image: fpob/tox 7 | steps: 8 | - checkout 9 | - run: 10 | name: Run tests 11 | command: tox 12 | 13 | workflows: 14 | test: 15 | jobs: 16 | - test 17 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = pypy39,pypy310,pypy311,py39,py310,py311,py312,py313,linting,typing 3 | skipsdist = true 4 | 5 | [testenv] 6 | commands=python -m unittest {posargs} 7 | 8 | [testenv:linting] 9 | deps = 10 | ruff>=0.9.6 11 | commands = 12 | ruff format --check 13 | ruff check 14 | 15 | 16 | [testenv:typing] 17 | deps = 18 | mypy>=1.6.0,<2 19 | commands = 20 | mypy --strict {posargs: rstr} 21 | -------------------------------------------------------------------------------- /tests/test_package_level_access.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unittest 3 | 4 | import rstr 5 | 6 | 7 | class TestPackageLevelFunctions(unittest.TestCase): 8 | def test_rstr(self) -> None: 9 | assert re.match(r'^[ABC]+$', rstr.rstr('ABC')) 10 | 11 | def test_xeger(self) -> None: 12 | assert re.match(r'^foo[\d]{10}bar$', rstr.xeger(r'^foo[\d]{10}bar$')) 13 | 14 | def test_convenience_function(self) -> None: 15 | assert re.match(r'^[a-zA-Z]+$', rstr.letters()) 16 | -------------------------------------------------------------------------------- /rstr/__init__.py: -------------------------------------------------------------------------------- 1 | from rstr.rstr import Rstr 2 | from rstr.rstr import SameCharacterError as SameCharacterError 3 | from rstr.xeger import Xeger 4 | 5 | _default_xeger = Xeger() 6 | _default_rstr = Rstr() 7 | 8 | rstr = _default_rstr.rstr 9 | xeger = _default_xeger.xeger 10 | 11 | 12 | # This allows convenience methods from rstr to be accessed at the package 13 | # level, without requiring the user to instantiate an Rstr() object. 14 | printable = _default_rstr.printable 15 | letters = _default_rstr.letters 16 | uppercase = _default_rstr.uppercase 17 | lowercase = _default_rstr.lowercase 18 | digits = _default_rstr.digits 19 | punctuation = _default_rstr.punctuation 20 | nondigits = _default_rstr.nondigits 21 | nonletters = _default_rstr.nonletters 22 | whitespace = _default_rstr.whitespace 23 | nonwhitespace = _default_rstr.nonwhitespace 24 | normal = _default_rstr.normal 25 | word = _default_rstr.word 26 | nonword = _default_rstr.nonword 27 | unambiguous = _default_rstr.unambiguous 28 | postalsafe = _default_rstr.postalsafe 29 | urlsafe = _default_rstr.urlsafe 30 | domainsafe = _default_rstr.domainsafe 31 | -------------------------------------------------------------------------------- /RELEASE_NOTES: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## Unreleased 4 | 5 | - Refactor: Xeger and Rstr are distinct, Xeger no longer inherits from XegerBase 6 | - Default to use SystemRandom() CSPRNG 7 | - Expose star_plus_limit as a property on Xeger (#37) 8 | - Fix error when regex specified a repeat larger than the star_plus limit (#38) 9 | - Behavior change: explicit repeats (e.g. `a{1000}`) are not restricted by star_plus_limit 10 | - Drops support for Pythons 3.7, 3.8, adds support for 3.12, 3.13. 11 | 12 | ## 3.2.2 13 | 14 | - Removes type stubs for `random` to fix type-checking failures 15 | under Python 3.11. 16 | 17 | ## 3.2.1 18 | 19 | - Fixes import failure under Python 3.11 20 | 21 | ## 3.2.0 22 | 23 | - PEP 561 compatible typing 24 | - Fix a bug that made xeger() fail under pypy3.8 25 | 26 | ## 3.1.0 27 | 28 | - Fix a bug where setting end_range but not start_range would fail. 29 | - Statically type the library, using PEP 484 annotations. 30 | - Drops test coverage for Python 3.6 (EoL December 2021). 31 | - Adds test coverage for Python 3.10. 32 | 33 | ## 3.0.0 34 | 35 | - Drops support for Python 2.7 and 3.5 36 | - Fixes a bug where `include` could make the generated string too long. 37 | - Add new `rstr.unambiguous()` method that omits homographs. 38 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, Leapfrog Direct Response, LLC 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the Leapfrog Direct Response, LLC, including 12 | its subsidiaries and affiliates nor the names of its 13 | contributors, may be used to endorse or promote products derived 14 | from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LEAPFROG DIRECT 20 | RESPONSE, LLC, INCLUDING ITS SUBSIDIARIES AND AFFILIATES, BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 26 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 27 | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "rstr" 7 | authors = [{name = "Leapfrog Direct Response LLC", email = "oss@leapfrogdevelopment.com"}] 8 | maintainers = [{name = "Brendan McCollam", email = "rstr@mccoll.am"}] 9 | description = "Generate random strings in Python" 10 | readme = "README.rst" 11 | classifiers = [ 12 | "Development Status :: 5 - Production/Stable", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: BSD License", 15 | "Operating System :: OS Independent", 16 | "Programming Language :: Python :: 3.9", 17 | "Programming Language :: Python :: 3.10", 18 | "Programming Language :: Python :: 3.11", 19 | "Programming Language :: Python :: 3.12", 20 | "Programming Language :: Python :: 3.13", 21 | "Topic :: Software Development :: Testing", 22 | ] 23 | keywords = [ 24 | "random string", 25 | "reverse regex", 26 | "reverse regular expression", 27 | "testing", 28 | "fuzz testing", 29 | ] 30 | urls = {Homepage = "https://github.com/leapfrogonline/rstr"} 31 | requires-python = ">=3.8" 32 | dynamic = ["version"] 33 | 34 | [tool.ruff] 35 | extend-exclude = ["build"] 36 | line-length = 100 37 | 38 | [tool.ruff.format] 39 | quote-style = "single" 40 | 41 | [tool.ruff.lint] 42 | select = [ 43 | "E", # pycodestyle 44 | "F", # Pyflakes 45 | "UP", # pyupgrade 46 | "B", # flake8-bugbear 47 | "SIM",# flake8-simplify 48 | "I", # isort 49 | ] 50 | 51 | [tool.setuptools] 52 | package-dir = {} 53 | include-package-data = true 54 | 55 | [tool.setuptools.packages.find] 56 | include = ["rstr", "rstr.*"] 57 | 58 | [tool.setuptools_scm] 59 | -------------------------------------------------------------------------------- /tests/test_xeger.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unittest 3 | 4 | from rstr import Xeger 5 | 6 | 7 | class TestXeger(unittest.TestCase): 8 | def setUp(self) -> None: 9 | self.rs = Xeger() 10 | 11 | def test_literals(self) -> None: 12 | pattern = r'foo' 13 | assert re.match(pattern, self.rs.xeger(pattern)) 14 | 15 | def test_dot(self) -> None: 16 | """ 17 | Verify that the dot character doesn't produce newlines. 18 | See: https://bitbucket.org/leapfrogdevelopment/rstr/issue/1/ 19 | """ 20 | pattern = r'.+' 21 | for _ in range(100): 22 | assert re.match(pattern, self.rs.xeger(pattern)) 23 | 24 | def test_digit(self) -> None: 25 | pattern = r'\d' 26 | assert re.match(pattern, self.rs.xeger(pattern)) 27 | 28 | def test_nondigits(self) -> None: 29 | pattern = r'\D' 30 | assert re.match(pattern, self.rs.xeger(pattern)) 31 | 32 | def test_literal_with_repeat(self) -> None: 33 | pattern = r'A{3}' 34 | assert re.match(pattern, self.rs.xeger(pattern)) 35 | 36 | def test_literal_with_range_repeat(self) -> None: 37 | pattern = r'A{2,5}' 38 | assert re.match(pattern, self.rs.xeger(pattern)) 39 | 40 | def test_word(self) -> None: 41 | pattern = r'\w' 42 | assert re.match(pattern, self.rs.xeger(pattern)) 43 | 44 | def test_nonword(self) -> None: 45 | pattern = r'\W' 46 | assert re.match(pattern, self.rs.xeger(pattern)) 47 | 48 | def test_or(self) -> None: 49 | pattern = r'foo|bar' 50 | assert re.match(pattern, self.rs.xeger(pattern)) 51 | 52 | def test_or_with_subpattern(self) -> None: 53 | pattern = r'(foo|bar)' 54 | assert re.match(pattern, self.rs.xeger(pattern)) 55 | 56 | def test_range(self) -> None: 57 | pattern = r'[A-F]' 58 | assert re.match(pattern, self.rs.xeger(pattern)) 59 | 60 | def test_character_group(self) -> None: 61 | pattern = r'[ABC]' 62 | assert re.match(pattern, self.rs.xeger(pattern)) 63 | 64 | def test_carot(self) -> None: 65 | pattern = r'^foo' 66 | assert re.match(pattern, self.rs.xeger(pattern)) 67 | 68 | def test_dollarsign(self) -> None: 69 | pattern = r'foo$' 70 | assert re.match(pattern, self.rs.xeger(pattern)) 71 | 72 | def test_not_literal(self) -> None: 73 | pattern = r'[^a]' 74 | assert re.match(pattern, self.rs.xeger(pattern)) 75 | 76 | def test_negation_group(self) -> None: 77 | pattern = r'[^AEIOU]' 78 | assert re.match(pattern, self.rs.xeger(pattern)) 79 | 80 | def test_lookahead(self) -> None: 81 | pattern = r'foo(?=bar)' 82 | assert re.match(pattern, self.rs.xeger(pattern)) 83 | 84 | def test_lookbehind(self) -> None: 85 | pattern = r'(?<=foo)bar' 86 | assert re.search(pattern, self.rs.xeger(pattern)) 87 | 88 | def test_backreference(self) -> None: 89 | pattern = r'(foo|bar)baz\1' 90 | assert re.match(pattern, self.rs.xeger(pattern)) 91 | 92 | def test_zero_or_more_greedy(self) -> None: 93 | pattern = r'a*' 94 | assert re.match(pattern, self.rs.xeger(pattern)) 95 | 96 | def test_zero_or_more_non_greedy(self) -> None: 97 | pattern = r'a*?' 98 | assert re.match(pattern, self.rs.xeger(pattern)) 99 | 100 | def test_exact_repeat_exceeds_star_plus_limit(self) -> None: 101 | pattern = r'\d{105}' 102 | assert re.match(pattern, self.rs.xeger(pattern)) 103 | 104 | def test_range_repeat_exceeds_star_plus_limit(self) -> None: 105 | pattern = r'\d{102,105}' 106 | assert re.match(pattern, self.rs.xeger(pattern)) 107 | 108 | def test_star_repeat_respects_limit(self) -> None: 109 | pattern = r'a*' 110 | for _ in range(100): 111 | result = self.rs.xeger(pattern) 112 | assert len(result) <= 100 113 | assert re.match(pattern, result) 114 | 115 | def test_plus_repeat_respects_limit(self) -> None: 116 | pattern = r'b+' 117 | for _ in range(100): 118 | result = self.rs.xeger(pattern) 119 | assert len(result) <= 100 120 | assert re.match(pattern, result) 121 | -------------------------------------------------------------------------------- /rstr/xeger.py: -------------------------------------------------------------------------------- 1 | import string 2 | import typing 3 | from itertools import chain 4 | from random import Random, SystemRandom 5 | from typing import Any, Callable, Mapping, Pattern, Sequence, Union 6 | 7 | try: 8 | import re._parser as sre_parse # type: ignore[import-not-found] 9 | from re._constants import MAXREPEAT # type: ignore[import-not-found] 10 | except ImportError: # Python < 3.11 11 | import sre_parse 12 | from sre_constants import MAXREPEAT 13 | 14 | SYSTEM_RANDOM = SystemRandom() 15 | 16 | 17 | class Xeger: 18 | """Inspired by the Java library Xeger: http://code.google.com/p/xeger/ 19 | Allows users to generate a semi-random string from a regular expression.""" 20 | 21 | def __init__(self, random: Random = SYSTEM_RANDOM, *, star_plus_limit: int = 100) -> None: 22 | self._random = random 23 | self.star_plus_limit = star_plus_limit 24 | self._cache: dict[str, str] = {} 25 | _wordchars = string.ascii_letters + string.digits + '_' 26 | self._categories: dict[str, str] = { 27 | 'category_digit': string.digits, 28 | 'category_not_digit': string.ascii_letters + string.punctuation, 29 | 'category_space': string.whitespace, 30 | 'category_not_space': string.printable.strip(), 31 | 'category_word': _wordchars, 32 | 'category_not_word': ''.join(set(string.printable).difference(_wordchars)), 33 | } 34 | _any_but_newline = ''.join(string.printable.split('\n')) 35 | self._cases: Mapping[str, Callable[..., Any]] = { 36 | 'literal': lambda x: chr(x), 37 | 'not_literal': lambda x: self._random.choice(string.printable.replace(chr(x), '')), 38 | 'at': lambda x: '', 39 | 'in': lambda x: self._handle_in(x), 40 | 'any': lambda x: self._random.choice(_any_but_newline), 41 | 'range': lambda x: [chr(i) for i in range(x[0], x[1] + 1)], 42 | 'category': lambda x: self._categories[x], 43 | 'branch': lambda x: ''.join(self._handle_state(i) for i in self._random.choice(x[1])), 44 | 'subpattern': lambda x: self._handle_group(x), 45 | 'assert': lambda x: ''.join(self._handle_state(i) for i in x[1]), 46 | 'assert_not': lambda x: '', 47 | 'groupref': lambda x: self._cache[x], 48 | 'min_repeat': lambda x: self._handle_repeat(*x), 49 | 'max_repeat': lambda x: self._handle_repeat(*x), 50 | 'negate': lambda x: [False], 51 | } 52 | 53 | def xeger(self, string_or_regex: Union[str, Pattern[str]]) -> str: 54 | """Generate a random string from a regular expression 55 | 56 | By default, * and + metacharacters will generate a maximum of 100 57 | repetitions of the character or group of characters that they modify 58 | for each occurance in the regular expression. You can provide a second 59 | argument to change this limit (note that the maximum amount of repeats 60 | in Python is 65535). 61 | 62 | """ 63 | try: 64 | pattern = typing.cast(Pattern[str], string_or_regex).pattern 65 | except AttributeError: 66 | pattern = typing.cast(str, string_or_regex) 67 | 68 | parsed = sre_parse.parse(pattern) 69 | result = self._build_string(parsed) 70 | self._cache.clear() 71 | return result 72 | 73 | def _build_string(self, parsed: Any) -> str: 74 | newstr = [] 75 | for state in parsed: 76 | newstr.append(self._handle_state(state)) 77 | return ''.join(newstr) 78 | 79 | def _handle_state(self, state: Any) -> Any: 80 | opcode, value = state 81 | opcode = opcode.name.lower() 82 | if opcode == 'category': 83 | value = value.name.lower() 84 | return self._cases[opcode](value) 85 | 86 | def _handle_group(self, value: Sequence[Any]) -> str: 87 | result = ''.join(self._handle_state(i) for i in value[-1]) 88 | if value[0]: 89 | self._cache[value[0]] = result 90 | return result 91 | 92 | def _handle_in(self, value: Any) -> Any: 93 | candidates = list(chain(*(self._handle_state(i) for i in value))) 94 | if candidates[0] is False: 95 | candidates = list(set(string.printable).difference(candidates[1:])) 96 | return self._random.choice(candidates) 97 | 98 | def _handle_repeat(self, start_range: int, end_range: int, value: str) -> str: 99 | result = [] 100 | if end_range is MAXREPEAT: 101 | end_range = self.star_plus_limit 102 | 103 | times = self._random.randint(start_range, end_range) 104 | for i in range(times): 105 | result.append(''.join(self._handle_state(i) for i in value)) 106 | return ''.join(result) 107 | -------------------------------------------------------------------------------- /tests/test_rstr.py: -------------------------------------------------------------------------------- 1 | import random 2 | import re 3 | import unittest 4 | 5 | from rstr import Rstr, SameCharacterError 6 | 7 | 8 | def assert_matches(pattern: str, value: str) -> None: 9 | errmsg = f'{value} does not match {pattern}' 10 | assert re.match(pattern, value), errmsg 11 | 12 | 13 | class TestRstr(unittest.TestCase): 14 | def setUp(self) -> None: 15 | self.rs = Rstr() 16 | 17 | def test_specific_length(self) -> None: 18 | assert_matches('^A{5}$', self.rs.rstr('A', 5)) 19 | 20 | def test_length_range(self) -> None: 21 | assert_matches('^A{11,20}$', self.rs.rstr('A', 11, 20)) 22 | 23 | def test_end_range_no_start_range(self) -> None: 24 | assert_matches('^A{1,20}$', self.rs.rstr('A', end_range=20)) 25 | 26 | def test_custom_alphabet(self) -> None: 27 | assert_matches('^A{1,10}$', self.rs.rstr('AA')) 28 | 29 | def test_alphabet_as_list(self) -> None: 30 | assert_matches('^A{1,10}$', self.rs.rstr(['A', 'A'])) 31 | 32 | def test_include(self) -> None: 33 | assert_matches('^[ABC]*@[ABC]*$', self.rs.rstr('ABC', include='@')) 34 | 35 | def test_include_specific_length(self) -> None: 36 | """ 37 | Verify including characters doesn't make the string longer than intended. 38 | """ 39 | assert_matches('^[ABC@]{5}$', self.rs.rstr('ABC', 5, include='@')) 40 | 41 | def test_exclude(self) -> None: 42 | for _ in range(0, 100): 43 | assert 'C' not in self.rs.rstr('ABC', exclude='C') 44 | 45 | def test_include_as_list(self) -> None: 46 | assert_matches('^[ABC]*@[ABC]*$', self.rs.rstr('ABC', include=['@'])) 47 | 48 | def test_exclude_as_list(self) -> None: 49 | for _ in range(0, 100): 50 | assert 'C' not in self.rs.rstr('ABC', exclude=['C']) 51 | 52 | def test_raise_exception_if_include_and_exclude_parameters_contain_same_character( 53 | self, 54 | ) -> None: 55 | with self.assertRaisesRegex( 56 | SameCharacterError, 57 | r'include and exclude parameters contain same character \(B\)', 58 | ): 59 | self.rs.rstr('A', include='B', exclude='B') 60 | self.rs.rstr('A', include=['B'], exclude=['B']) 61 | with self.assertRaisesRegex( 62 | SameCharacterError, 63 | r'include and exclude parameters contain same characters \(., .\)', 64 | ): 65 | self.rs.rstr('A', include='BC', exclude='BC') 66 | 67 | 68 | class TestSystemRandom(TestRstr): 69 | def setUp(self) -> None: 70 | self.rs = Rstr(random.SystemRandom()) 71 | 72 | 73 | class TestDigits(unittest.TestCase): 74 | def setUp(self) -> None: 75 | self.rs = Rstr() 76 | 77 | def test_all_digits(self) -> None: 78 | assert_matches(r'^\d{1,10}$', self.rs.digits()) 79 | 80 | def test_digits_include(self) -> None: 81 | assert_matches(r'^\d*@\d*$', self.rs.digits(include='@')) 82 | 83 | def test_digits_exclude(self) -> None: 84 | for _ in range(0, 100): 85 | assert '5' not in self.rs.digits(exclude='5') 86 | 87 | 88 | class TestNondigits(unittest.TestCase): 89 | def setUp(self) -> None: 90 | self.rs = Rstr() 91 | 92 | def test_nondigits(self) -> None: 93 | assert_matches(r'^\D{1,10}$', self.rs.nondigits()) 94 | 95 | def test_nondigits_include(self) -> None: 96 | assert_matches(r'^\D*@\D*$', self.rs.nondigits(include='@')) 97 | 98 | def test_nondigits_exclude(self) -> None: 99 | for _ in range(0, 100): 100 | assert 'A' not in self.rs.nondigits(exclude='A') 101 | 102 | 103 | class TestLetters(unittest.TestCase): 104 | def setUp(self) -> None: 105 | self.rs = Rstr() 106 | 107 | def test_letters(self) -> None: 108 | assert_matches(r'^[a-zA-Z]{1,10}$', self.rs.letters()) 109 | 110 | def test_letters_include(self) -> None: 111 | assert_matches(r'^[a-zA-Z]*@[a-zA-Z]*$', self.rs.letters(include='@')) 112 | 113 | def test_letters_exclude(self) -> None: 114 | for _ in range(0, 100): 115 | assert 'A' not in self.rs.letters(exclude='A') 116 | 117 | 118 | class TestUnambiguous(unittest.TestCase): 119 | def setUp(self) -> None: 120 | self.rs = Rstr() 121 | 122 | def test_unambiguous(self) -> None: 123 | assert_matches('^[a-km-zA-HJ-NP-Z2-9]{1,10}$', self.rs.unambiguous()) 124 | 125 | def test_unambiguous_include(self) -> None: 126 | assert_matches('^[a-km-zA-HJ-NP-Z2-9@]{1,10}$', self.rs.unambiguous(include='@')) 127 | 128 | def test_unambiguous_exclude(self) -> None: 129 | for _ in range(0, 100): 130 | assert 'A' not in self.rs.unambiguous(exclude='A') 131 | 132 | 133 | class TestCustomAlphabets(unittest.TestCase): 134 | def test_alphabet_at_instantiation(self) -> None: 135 | rs = Rstr(vowels='AEIOU') 136 | assert_matches('^[AEIOU]{1,10}$', rs.vowels()) 137 | 138 | def test_add_alphabet(self) -> None: 139 | rs = Rstr() 140 | rs.add_alphabet('evens', '02468') 141 | assert_matches('^[02468]{1,10}$', rs.evens()) 142 | 143 | 144 | def main() -> None: 145 | unittest.main() 146 | 147 | 148 | if __name__ == '__main__': 149 | main() 150 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | rstr = Random Strings in Python 3 | =============================== 4 | 5 | .. image:: https://circleci.com/gh/leapfrogonline/rstr.svg?style=svg 6 | :target: https://circleci.com/gh/leapfrogonline/rstr 7 | 8 | rstr is a helper module for easily generating random strings of various types. 9 | It could be useful for fuzz testing, generating dummy data, or other 10 | applications. 11 | 12 | It has no dependencies outside the standard library. 13 | 14 | Use 15 | --- 16 | 17 | The basic method of rstr is ``rstr()``. At a minimum, it requires one argument, 18 | an alphabet of characters from which to create a string. 19 | 20 | :: 21 | 22 | >>> import rstr 23 | >>> rstr.rstr('ABC') 24 | 'AACAACCB' 25 | 26 | By default, it will return a string between 1 and 10 characters in length. You 27 | may specify an exact length by including it as a second argument: 28 | 29 | :: 30 | 31 | >>> rstr.rstr('ABC', 4) 32 | 'ACBC' 33 | 34 | You can also generate a range of lengths by adding two arguments. In the following 35 | case, rstr will return a string with a randomly selected length between 5 and 10 36 | characters. 37 | 38 | :: 39 | 40 | >>> rstr.rstr('ABC', 5, 10) 41 | 'CBCCCABAA' 42 | 43 | It's also possible to include particular characters in your string. This is useful 44 | when testing a validator to make sure that certain characters are rejected. 45 | Characters listed in the 'include' argument will *always* be present somewhere 46 | in the resulting string. 47 | 48 | :: 49 | 50 | >>> rstr.rstr('ABC', include='&') 51 | 'CA&A' 52 | 53 | Conversely, you can exclude particular characters from the generated string. This is 54 | helpful when starting with a pre-defined population of characters. 55 | 56 | :: 57 | 58 | >>> import string 59 | >>> rstr.rstr(string.digits, exclude='5') 60 | '8661442' 61 | 62 | Note that any of the arguments that accept strings can also 63 | accept lists or tuples of strings: 64 | 65 | :: 66 | 67 | >>> rstr.rstr(['A', 'B', 'C'], include = ['@'], exclude=('C',)) 68 | 'BAAABBA@BAA' 69 | 70 | Other methods 71 | ------------- 72 | 73 | The other methods provided by rstr, besides ``rstr()`` and ``xeger()``, are convenience 74 | methods that can be called without arguments, and provide a pre-defined alphabet. 75 | They accept the same arguments as ``rstr()`` for purposes of 76 | specifying lengths and including or excluding particular characters. 77 | 78 | letters() 79 | The characters provided by string.letters in the standard library. 80 | 81 | uppercase() 82 | The characters provided by string.uppercase in the standard library. 83 | 84 | lowercase() 85 | The characters provided by string.lowercase in the standard library. 86 | 87 | printable() 88 | The characters provided by string.printable in the standard library. 89 | 90 | punctuation() 91 | The characters provided by string.punctuation in the standard library. 92 | 93 | nonwhitespace() 94 | The characters provided by string.printable in the standard library, except 95 | for those representing whitespace: tab, space, etc. 96 | 97 | digits() 98 | The characters provided by string.digits in the standard library. 99 | 100 | nondigits() 101 | The characters provided by the concatenation of string.letters and 102 | string.punctuation in the standard library. 103 | 104 | nonletters() 105 | The characters provided by the concatenation of string.digits and 106 | string.punctuation in the standard library. 107 | 108 | normal() 109 | Characters commonly accepted in text input, equivalent to string.digits + 110 | string.letters + ' ' (the space character). 111 | 112 | unambiguous() 113 | The characters provided by the concatenation of string.digits and 114 | string.letters except characters which are similar: 1, l and I, etc. 115 | 116 | postalsafe() 117 | Characters that are safe for use in postal addresses in the United States: 118 | upper- and lower-case letters, digits, spaces, and the punctuation marks period, 119 | hash (#), hyphen, and forward-slash. 120 | 121 | urlsafe() 122 | Characters safe (unreserved) for use in URLs: letters, digits, hyphen, period, underscore, 123 | and tilde. 124 | 125 | domainsafe() 126 | Characters that are allowed for use in hostnames, and consequently, in internet domains: letters, 127 | digits, and the hyphen. 128 | 129 | Xeger 130 | ----- 131 | 132 | Inspired by the Java library of the same name, the ``xeger()`` method allows users to 133 | create a random string from a regular expression. 134 | 135 | For example to generate a postal code that fits the Canadian format: 136 | 137 | >>> import rstr 138 | >>> rstr.xeger(r'[A-Z]\d[A-Z] \d[A-Z]\d') 139 | u'R6M 1W5' 140 | 141 | xeger works fine with most simple regular expressions, but it doesn't support all 142 | Python regular expression features. 143 | 144 | Custom Alphabets 145 | ---------------- 146 | 147 | If you have custom alphabets of characters that you would like to use with a method 148 | shortcut, you can specify them by keyword when instantiating an Rstr object: 149 | 150 | >>> from rstr import Rstr 151 | >>> rs = Rstr(vowels='AEIOU') 152 | >>> rs.vowels() 153 | 'AEEUU' 154 | 155 | You can also add an alphabet to an existing instance with the add_alphabet() method: 156 | 157 | >>> rs.add_alphabet('odds', '13579') 158 | >>> rs.odds() 159 | '339599519' 160 | 161 | Examples 162 | -------- 163 | 164 | You can combine rstr with Python's built-in string formatting to produce strings 165 | that fit a variety of templates. 166 | 167 | An email address: 168 | 169 | :: 170 | 171 | '{0}@{1}.{2}'.format(rstr.nonwhitespace(exclude='@'), 172 | rstr.domainsafe(), 173 | rstr.letters(3)) 174 | 175 | A URL: 176 | 177 | :: 178 | 179 | 'http://{0}.{1}/{2}/?{3}'.format(rstr.domainsafe(), 180 | rstr.letters(3), 181 | rstr.urlsafe(), 182 | rstr.urlsafe()) 183 | 184 | A postal address: 185 | 186 | :: 187 | 188 | """{0} {1} 189 | {2} {3} 190 | {4}, {5} {6} 191 | """.format(rstr.letters(4, 8).title(), 192 | rstr.letters(4, 8).title(), 193 | rstr.digits(3, 5), 194 | rstr.letters(4, 10).title(), 195 | rstr.letters(4, 15).title(), 196 | rstr.uppercase(2), 197 | rstr.digits(5), 198 | ) 199 | 200 | .. _SystemRandom: https://docs.python.org/3/library/random.html#random.SystemRandom 201 | -------------------------------------------------------------------------------- /rstr/rstr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Leapfrog Direct Response, LLC 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of the Leapfrog Direct Response, LLC, including 12 | # its subsidiaries and affiliates nor the names of its 13 | # contributors, may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LEAPFROG DIRECT 20 | # RESPONSE, LLC, INCLUDING ITS SUBSIDIARIES AND AFFILIATES, BE LIABLE 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 26 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 27 | # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | 30 | import itertools 31 | import string 32 | import typing 33 | from functools import partial 34 | from random import Random, SystemRandom 35 | from typing import Iterable, List, Optional, Sequence, TypeVar 36 | 37 | _T = TypeVar('_T') 38 | 39 | 40 | if typing.TYPE_CHECKING: 41 | from typing import Protocol 42 | 43 | class _PartialRstrFunc(Protocol): 44 | def __call__( 45 | self, 46 | start_range: Optional[int] = ..., 47 | end_range: Optional[int] = ..., 48 | include: str = ..., 49 | exclude: str = ..., 50 | ) -> str: ... 51 | 52 | 53 | SYSTEM_RANDOM = SystemRandom() 54 | 55 | DEFAULT_ALPHABETS: dict[str, str] = { 56 | 'printable': string.printable, 57 | 'letters': string.ascii_letters, 58 | 'uppercase': string.ascii_uppercase, 59 | 'lowercase': string.ascii_lowercase, 60 | 'digits': string.digits, 61 | 'punctuation': string.punctuation, 62 | 'nondigits': string.ascii_letters + string.punctuation, 63 | 'nonletters': string.digits + string.punctuation, 64 | 'whitespace': string.whitespace, 65 | 'nonwhitespace': string.printable.strip(), 66 | 'normal': string.ascii_letters + string.digits + ' ', 67 | 'word': string.ascii_letters + string.digits + '_', 68 | 'nonword': ''.join( 69 | set(string.printable).difference(string.ascii_letters + string.digits + '_') 70 | ), 71 | 'unambiguous': ''.join(set(string.ascii_letters + string.digits).difference('0O1lI')), 72 | 'postalsafe': string.ascii_letters + string.digits + ' .-#/', 73 | 'urlsafe': string.ascii_letters + string.digits + '-._~', 74 | 'domainsafe': string.ascii_letters + string.digits + '-', 75 | } 76 | 77 | 78 | class Rstr: 79 | """Create random strings from a variety of alphabets. 80 | 81 | The alphabets for printable(), uppercase(), lowercase(), digits(), and 82 | punctuation() are equivalent to the constants by those same names in the 83 | standard library string module. 84 | 85 | nondigits() uses an alphabet of string.letters + string.punctuation 86 | 87 | nonletters() uses an alphabet of string.digits + string.punctuation 88 | 89 | nonwhitespace() uses an alphabet of string.printable.strip() 90 | 91 | normal() uses an alphabet of string.letters + string.digits + ' ' (the 92 | space character) 93 | 94 | postalsafe() is based on USPS Publication 28 - Postal Addressing Standards: 95 | http://pe.usps.com/text/pub28/pub28c2.html 96 | The characters allowed in postal addresses are letters and digits, periods, 97 | slashes, the pound sign, and the hyphen. 98 | 99 | urlsafe() uses an alphabet of unreserved characters safe for use in URLs. 100 | From section 2.3 of RFC 3986: "Characters that are allowed in a URI but 101 | do not have a reserved purpose are called unreserved. These include 102 | uppercase and lowercase letters, decimal digits, hyphen, period, 103 | underscore, and tilde. 104 | 105 | domainsafe() uses an alphabet of characters allowed in hostnames, and 106 | consequently, in internet domains: letters, digits, and the hyphen. 107 | 108 | """ 109 | 110 | def __init__(self, random: Random = SYSTEM_RANDOM, **custom_alphabets: str) -> None: 111 | self._random = random 112 | self._alphabets = DEFAULT_ALPHABETS.copy() 113 | for alpha_name, alphabet in custom_alphabets.items(): 114 | self.add_alphabet(alpha_name, alphabet) 115 | 116 | def add_alphabet(self, alpha_name: str, characters: str) -> None: 117 | """Add an additional alphabet to an Rstr instance and make it available 118 | via method calls. 119 | 120 | """ 121 | self._alphabets[alpha_name] = characters 122 | 123 | def __getattr__(self, attr: str) -> '_PartialRstrFunc': 124 | if attr in self._alphabets: 125 | return partial(self.rstr, self._alphabets[attr]) 126 | message = f'Rstr instance has no attribute: {attr}' 127 | raise AttributeError(message) 128 | 129 | def sample_wr(self, population: Sequence[str], k: int) -> List[str]: 130 | """Samples k random elements (with replacement) from a population""" 131 | return [self._random.choice(population) for i in itertools.repeat(None, k)] 132 | 133 | def rstr( 134 | self, 135 | alphabet: Iterable[str], 136 | start_range: Optional[int] = None, 137 | end_range: Optional[int] = None, 138 | include: Sequence[str] = '', 139 | exclude: Sequence[str] = '', 140 | ) -> str: 141 | """Generate a random string containing elements from 'alphabet' 142 | 143 | By default, rstr() will return a string between 1 and 10 characters. 144 | You can specify a second argument to get an exact length of string. 145 | 146 | If you want a string in a range of lengths, specify the start and end 147 | of that range as the second and third arguments. 148 | 149 | If you want to make certain that particular characters appear in the 150 | generated string, specify them as "include". 151 | 152 | If you want to *prevent* certain characters from appearing, pass them 153 | as 'exclude'. 154 | 155 | """ 156 | if same_characters := frozenset(include).intersection(exclude): 157 | plural = 's' if len(same_characters) > 1 else '' 158 | chars = ', '.join(same_characters) 159 | message = f'include and exclude parameters contain same character{plural} ({chars})' 160 | raise SameCharacterError(message) 161 | 162 | popul = tuple(frozenset(alphabet).difference(exclude)) 163 | 164 | if end_range is None: 165 | if start_range is None: 166 | start_range, end_range = (1, 10) 167 | else: 168 | k = start_range 169 | elif start_range is None: 170 | start_range = 1 171 | 172 | if end_range: 173 | k = self._random.randint(start_range, end_range) 174 | # Make sure we don't generate too long a string 175 | # when adding 'include' to it: 176 | k = k - len(include) 177 | 178 | result = self.sample_wr(popul, k) + list(include) 179 | self._random.shuffle(result) 180 | return ''.join(result) 181 | 182 | 183 | class SameCharacterError(ValueError): 184 | pass 185 | --------------------------------------------------------------------------------