├── rstr
    ├── py.typed
    ├── __init__.py
    ├── xeger.py
    └── rstr.py
├── tests
    ├── __init__.py
    ├── test_package_level_access.py
    ├── test_xeger.py
    └── test_rstr.py
├── MANIFEST.in
├── .gitignore
├── AUTHORS
├── .circleci
    └── config.yml
├── tox.ini
├── RELEASE_NOTES
├── LICENSE.txt
├── pyproject.toml
└── README.rst


/rstr/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include rstr/py.typed
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | syntax: glob
2 | 
3 | *.pyc
4 | dist/*
5 | *.egg-info
6 | .tox/*
7 | .env/*
8 | .credentials/*
9 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | Brendan McCollam
 2 | Andy Hayden
 3 | MJ Schultz
 4 | Tatiana Krikun
 5 | 의성 정
 6 | Goya Tomohiro
 7 | Xiaoqin Zhu
 8 | Stéphane Blondon
 9 | Pascal Corpet
10 | Mark Mayo
11 | Aurélien Gâteau
12 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | jobs:
 4 |     test:
 5 |         docker:
 6 |             - image: fpob/tox
 7 |         steps:
 8 |             - checkout
 9 |             - run:
10 |                 name: Run tests
11 |                 command: tox
12 | 
13 | workflows:
14 |     test:
15 |         jobs:
16 |             - test
17 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = pypy39,pypy310,pypy311,py39,py310,py311,py312,py313,linting,typing
 3 | skipsdist = true
 4 | 
 5 | [testenv]
 6 | commands=python -m unittest {posargs}
 7 | 
 8 | [testenv:linting]
 9 | deps =
10 |     ruff>=0.9.6
11 | commands =
12 |     ruff format --check
13 |     ruff check
14 | 
15 | 
16 | [testenv:typing]
17 | deps =
18 |     mypy>=1.6.0,<2
19 | commands =
20 |     mypy --strict {posargs: rstr}
21 | 


--------------------------------------------------------------------------------
/tests/test_package_level_access.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import unittest
 3 | 
 4 | import rstr
 5 | 
 6 | 
 7 | class TestPackageLevelFunctions(unittest.TestCase):
 8 |     def test_rstr(self) -> None:
 9 |         assert re.match(r'^[ABC]+$', rstr.rstr('ABC'))
10 | 
11 |     def test_xeger(self) -> None:
12 |         assert re.match(r'^foo[\d]{10}bar$', rstr.xeger(r'^foo[\d]{10}bar$'))
13 | 
14 |     def test_convenience_function(self) -> None:
15 |         assert re.match(r'^[a-zA-Z]+$', rstr.letters())
16 | 


--------------------------------------------------------------------------------
/rstr/__init__.py:
--------------------------------------------------------------------------------
 1 | from rstr.rstr import Rstr
 2 | from rstr.rstr import SameCharacterError as SameCharacterError
 3 | from rstr.xeger import Xeger
 4 | 
 5 | _default_xeger = Xeger()
 6 | _default_rstr = Rstr()
 7 | 
 8 | rstr = _default_rstr.rstr
 9 | xeger = _default_xeger.xeger
10 | 
11 | 
12 | # This allows convenience methods from rstr to be accessed at the package
13 | # level, without requiring the user to instantiate an Rstr() object.
14 | printable = _default_rstr.printable
15 | letters = _default_rstr.letters
16 | uppercase = _default_rstr.uppercase
17 | lowercase = _default_rstr.lowercase
18 | digits = _default_rstr.digits
19 | punctuation = _default_rstr.punctuation
20 | nondigits = _default_rstr.nondigits
21 | nonletters = _default_rstr.nonletters
22 | whitespace = _default_rstr.whitespace
23 | nonwhitespace = _default_rstr.nonwhitespace
24 | normal = _default_rstr.normal
25 | word = _default_rstr.word
26 | nonword = _default_rstr.nonword
27 | unambiguous = _default_rstr.unambiguous
28 | postalsafe = _default_rstr.postalsafe
29 | urlsafe = _default_rstr.urlsafe
30 | domainsafe = _default_rstr.domainsafe
31 | 


--------------------------------------------------------------------------------
/RELEASE_NOTES:
--------------------------------------------------------------------------------
 1 | # Release Notes
 2 | 
 3 | ## Unreleased
 4 | 
 5 | - Refactor: Xeger and Rstr are distinct, Xeger no longer inherits from XegerBase
 6 | - Default to use SystemRandom() CSPRNG
 7 | - Expose star_plus_limit as a property on Xeger (#37)
 8 | - Fix error when regex specified a repeat larger than the star_plus limit (#38)
 9 | - Behavior change: explicit repeats (e.g. `a{1000}`) are not restricted by star_plus_limit
10 | - Drops support for Pythons 3.7, 3.8, adds support for 3.12, 3.13.
11 | 
12 | ## 3.2.2
13 | 
14 | - Removes type stubs for `random` to fix type-checking failures
15 |   under Python 3.11.
16 | 
17 | ## 3.2.1
18 | 
19 | - Fixes import failure under Python 3.11
20 | 
21 | ## 3.2.0
22 | 
23 | - PEP 561 compatible typing
24 | - Fix a bug that made xeger() fail under pypy3.8
25 | 
26 | ## 3.1.0
27 | 
28 | - Fix a bug where setting end_range but not start_range would fail.
29 | - Statically type the library, using PEP 484 annotations.
30 | - Drops test coverage for Python 3.6 (EoL December 2021).
31 | - Adds test coverage for Python 3.10.
32 | 
33 | ## 3.0.0
34 | 
35 | - Drops support for Python 2.7 and 3.5
36 | - Fixes a bug where `include` could make the generated string too long.
37 | - Add new `rstr.unambiguous()` method that omits homographs.
38 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011, Leapfrog Direct Response, LLC
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the Leapfrog Direct Response, LLC, including
12 |       its subsidiaries and affiliates nor the names of its
13 |       contributors, may be used to endorse or promote products derived
14 |       from this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LEAPFROG DIRECT
20 | RESPONSE, LLC, INCLUDING ITS SUBSIDIARIES AND AFFILIATES, BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "rstr"
 7 | authors = [{name = "Leapfrog Direct Response LLC", email = "oss@leapfrogdevelopment.com"}]
 8 | maintainers = [{name = "Brendan McCollam", email = "rstr@mccoll.am"}]
 9 | description = "Generate random strings in Python"
10 | readme = "README.rst"
11 | classifiers = [
12 |     "Development Status :: 5 - Production/Stable",
13 |     "Intended Audience :: Developers",
14 |     "License :: OSI Approved :: BSD License",
15 |     "Operating System :: OS Independent",
16 |     "Programming Language :: Python :: 3.9",
17 |     "Programming Language :: Python :: 3.10",
18 |     "Programming Language :: Python :: 3.11",
19 |     "Programming Language :: Python :: 3.12",
20 |     "Programming Language :: Python :: 3.13",
21 |     "Topic :: Software Development :: Testing",
22 | ]
23 | keywords = [
24 |     "random string",
25 |     "reverse regex",
26 |     "reverse regular expression",
27 |     "testing",
28 |     "fuzz testing",
29 | ]
30 | urls = {Homepage = "https://github.com/leapfrogonline/rstr"}
31 | requires-python = ">=3.8"
32 | dynamic = ["version"]
33 | 
34 | [tool.ruff]
35 | extend-exclude = ["build"]
36 | line-length = 100
37 | 
38 | [tool.ruff.format]
39 | quote-style = "single"
40 | 
41 | [tool.ruff.lint]
42 | select = [
43 |     "E",  # pycodestyle
44 |     "F",  # Pyflakes
45 |     "UP", # pyupgrade
46 |     "B",  # flake8-bugbear
47 |     "SIM",# flake8-simplify
48 |     "I",  # isort
49 | ]
50 | 
51 | [tool.setuptools]
52 | package-dir = {}
53 | include-package-data = true
54 | 
55 | [tool.setuptools.packages.find]
56 | include = ["rstr", "rstr.*"]
57 | 
58 | [tool.setuptools_scm]
59 | 


--------------------------------------------------------------------------------
/tests/test_xeger.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import unittest
  3 | 
  4 | from rstr import Xeger
  5 | 
  6 | 
  7 | class TestXeger(unittest.TestCase):
  8 |     def setUp(self) -> None:
  9 |         self.rs = Xeger()
 10 | 
 11 |     def test_literals(self) -> None:
 12 |         pattern = r'foo'
 13 |         assert re.match(pattern, self.rs.xeger(pattern))
 14 | 
 15 |     def test_dot(self) -> None:
 16 |         """
 17 |         Verify that the dot character doesn't produce newlines.
 18 |         See: https://bitbucket.org/leapfrogdevelopment/rstr/issue/1/
 19 |         """
 20 |         pattern = r'.+'
 21 |         for _ in range(100):
 22 |             assert re.match(pattern, self.rs.xeger(pattern))
 23 | 
 24 |     def test_digit(self) -> None:
 25 |         pattern = r'\d'
 26 |         assert re.match(pattern, self.rs.xeger(pattern))
 27 | 
 28 |     def test_nondigits(self) -> None:
 29 |         pattern = r'\D'
 30 |         assert re.match(pattern, self.rs.xeger(pattern))
 31 | 
 32 |     def test_literal_with_repeat(self) -> None:
 33 |         pattern = r'A{3}'
 34 |         assert re.match(pattern, self.rs.xeger(pattern))
 35 | 
 36 |     def test_literal_with_range_repeat(self) -> None:
 37 |         pattern = r'A{2,5}'
 38 |         assert re.match(pattern, self.rs.xeger(pattern))
 39 | 
 40 |     def test_word(self) -> None:
 41 |         pattern = r'\w'
 42 |         assert re.match(pattern, self.rs.xeger(pattern))
 43 | 
 44 |     def test_nonword(self) -> None:
 45 |         pattern = r'\W'
 46 |         assert re.match(pattern, self.rs.xeger(pattern))
 47 | 
 48 |     def test_or(self) -> None:
 49 |         pattern = r'foo|bar'
 50 |         assert re.match(pattern, self.rs.xeger(pattern))
 51 | 
 52 |     def test_or_with_subpattern(self) -> None:
 53 |         pattern = r'(foo|bar)'
 54 |         assert re.match(pattern, self.rs.xeger(pattern))
 55 | 
 56 |     def test_range(self) -> None:
 57 |         pattern = r'[A-F]'
 58 |         assert re.match(pattern, self.rs.xeger(pattern))
 59 | 
 60 |     def test_character_group(self) -> None:
 61 |         pattern = r'[ABC]'
 62 |         assert re.match(pattern, self.rs.xeger(pattern))
 63 | 
 64 |     def test_carot(self) -> None:
 65 |         pattern = r'^foo'
 66 |         assert re.match(pattern, self.rs.xeger(pattern))
 67 | 
 68 |     def test_dollarsign(self) -> None:
 69 |         pattern = r'foo$'
 70 |         assert re.match(pattern, self.rs.xeger(pattern))
 71 | 
 72 |     def test_not_literal(self) -> None:
 73 |         pattern = r'[^a]'
 74 |         assert re.match(pattern, self.rs.xeger(pattern))
 75 | 
 76 |     def test_negation_group(self) -> None:
 77 |         pattern = r'[^AEIOU]'
 78 |         assert re.match(pattern, self.rs.xeger(pattern))
 79 | 
 80 |     def test_lookahead(self) -> None:
 81 |         pattern = r'foo(?=bar)'
 82 |         assert re.match(pattern, self.rs.xeger(pattern))
 83 | 
 84 |     def test_lookbehind(self) -> None:
 85 |         pattern = r'(?<=foo)bar'
 86 |         assert re.search(pattern, self.rs.xeger(pattern))
 87 | 
 88 |     def test_backreference(self) -> None:
 89 |         pattern = r'(foo|bar)baz\1'
 90 |         assert re.match(pattern, self.rs.xeger(pattern))
 91 | 
 92 |     def test_zero_or_more_greedy(self) -> None:
 93 |         pattern = r'a*'
 94 |         assert re.match(pattern, self.rs.xeger(pattern))
 95 | 
 96 |     def test_zero_or_more_non_greedy(self) -> None:
 97 |         pattern = r'a*?'
 98 |         assert re.match(pattern, self.rs.xeger(pattern))
 99 | 
100 |     def test_exact_repeat_exceeds_star_plus_limit(self) -> None:
101 |         pattern = r'\d{105}'
102 |         assert re.match(pattern, self.rs.xeger(pattern))
103 | 
104 |     def test_range_repeat_exceeds_star_plus_limit(self) -> None:
105 |         pattern = r'\d{102,105}'
106 |         assert re.match(pattern, self.rs.xeger(pattern))
107 | 
108 |     def test_star_repeat_respects_limit(self) -> None:
109 |         pattern = r'a*'
110 |         for _ in range(100):
111 |             result = self.rs.xeger(pattern)
112 |             assert len(result) <= 100
113 |             assert re.match(pattern, result)
114 | 
115 |     def test_plus_repeat_respects_limit(self) -> None:
116 |         pattern = r'b+'
117 |         for _ in range(100):
118 |             result = self.rs.xeger(pattern)
119 |             assert len(result) <= 100
120 |             assert re.match(pattern, result)
121 | 


--------------------------------------------------------------------------------
/rstr/xeger.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | import typing
  3 | from itertools import chain
  4 | from random import Random, SystemRandom
  5 | from typing import Any, Callable, Mapping, Pattern, Sequence, Union
  6 | 
  7 | try:
  8 |     import re._parser as sre_parse  # type: ignore[import-not-found]
  9 |     from re._constants import MAXREPEAT  # type: ignore[import-not-found]
 10 | except ImportError:  # Python < 3.11
 11 |     import sre_parse
 12 |     from sre_constants import MAXREPEAT
 13 | 
 14 | SYSTEM_RANDOM = SystemRandom()
 15 | 
 16 | 
 17 | class Xeger:
 18 |     """Inspired by the Java library Xeger: http://code.google.com/p/xeger/
 19 |     Allows users to generate a semi-random string from a regular expression."""
 20 | 
 21 |     def __init__(self, random: Random = SYSTEM_RANDOM, *, star_plus_limit: int = 100) -> None:
 22 |         self._random = random
 23 |         self.star_plus_limit = star_plus_limit
 24 |         self._cache: dict[str, str] = {}
 25 |         _wordchars = string.ascii_letters + string.digits + '_'
 26 |         self._categories: dict[str, str] = {
 27 |             'category_digit': string.digits,
 28 |             'category_not_digit': string.ascii_letters + string.punctuation,
 29 |             'category_space': string.whitespace,
 30 |             'category_not_space': string.printable.strip(),
 31 |             'category_word': _wordchars,
 32 |             'category_not_word': ''.join(set(string.printable).difference(_wordchars)),
 33 |         }
 34 |         _any_but_newline = ''.join(string.printable.split('\n'))
 35 |         self._cases: Mapping[str, Callable[..., Any]] = {
 36 |             'literal': lambda x: chr(x),
 37 |             'not_literal': lambda x: self._random.choice(string.printable.replace(chr(x), '')),
 38 |             'at': lambda x: '',
 39 |             'in': lambda x: self._handle_in(x),
 40 |             'any': lambda x: self._random.choice(_any_but_newline),
 41 |             'range': lambda x: [chr(i) for i in range(x[0], x[1] + 1)],
 42 |             'category': lambda x: self._categories[x],
 43 |             'branch': lambda x: ''.join(self._handle_state(i) for i in self._random.choice(x[1])),
 44 |             'subpattern': lambda x: self._handle_group(x),
 45 |             'assert': lambda x: ''.join(self._handle_state(i) for i in x[1]),
 46 |             'assert_not': lambda x: '',
 47 |             'groupref': lambda x: self._cache[x],
 48 |             'min_repeat': lambda x: self._handle_repeat(*x),
 49 |             'max_repeat': lambda x: self._handle_repeat(*x),
 50 |             'negate': lambda x: [False],
 51 |         }
 52 | 
 53 |     def xeger(self, string_or_regex: Union[str, Pattern[str]]) -> str:
 54 |         """Generate a random string from a regular expression
 55 | 
 56 |         By default, * and + metacharacters will generate a maximum of 100
 57 |         repetitions of the character or group of characters that they modify
 58 |         for each occurance in the regular expression. You can provide a second
 59 |         argument to change this limit (note that the maximum amount of repeats
 60 |         in Python is 65535).
 61 | 
 62 |         """
 63 |         try:
 64 |             pattern = typing.cast(Pattern[str], string_or_regex).pattern
 65 |         except AttributeError:
 66 |             pattern = typing.cast(str, string_or_regex)
 67 | 
 68 |         parsed = sre_parse.parse(pattern)
 69 |         result = self._build_string(parsed)
 70 |         self._cache.clear()
 71 |         return result
 72 | 
 73 |     def _build_string(self, parsed: Any) -> str:
 74 |         newstr = []
 75 |         for state in parsed:
 76 |             newstr.append(self._handle_state(state))
 77 |         return ''.join(newstr)
 78 | 
 79 |     def _handle_state(self, state: Any) -> Any:
 80 |         opcode, value = state
 81 |         opcode = opcode.name.lower()
 82 |         if opcode == 'category':
 83 |             value = value.name.lower()
 84 |         return self._cases[opcode](value)
 85 | 
 86 |     def _handle_group(self, value: Sequence[Any]) -> str:
 87 |         result = ''.join(self._handle_state(i) for i in value[-1])
 88 |         if value[0]:
 89 |             self._cache[value[0]] = result
 90 |         return result
 91 | 
 92 |     def _handle_in(self, value: Any) -> Any:
 93 |         candidates = list(chain(*(self._handle_state(i) for i in value)))
 94 |         if candidates[0] is False:
 95 |             candidates = list(set(string.printable).difference(candidates[1:]))
 96 |         return self._random.choice(candidates)
 97 | 
 98 |     def _handle_repeat(self, start_range: int, end_range: int, value: str) -> str:
 99 |         result = []
100 |         if end_range is MAXREPEAT:
101 |             end_range = self.star_plus_limit
102 | 
103 |         times = self._random.randint(start_range, end_range)
104 |         for i in range(times):
105 |             result.append(''.join(self._handle_state(i) for i in value))
106 |         return ''.join(result)
107 | 


--------------------------------------------------------------------------------
/tests/test_rstr.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import re
  3 | import unittest
  4 | 
  5 | from rstr import Rstr, SameCharacterError
  6 | 
  7 | 
  8 | def assert_matches(pattern: str, value: str) -> None:
  9 |     errmsg = f'{value} does not match {pattern}'
 10 |     assert re.match(pattern, value), errmsg
 11 | 
 12 | 
 13 | class TestRstr(unittest.TestCase):
 14 |     def setUp(self) -> None:
 15 |         self.rs = Rstr()
 16 | 
 17 |     def test_specific_length(self) -> None:
 18 |         assert_matches('^A{5}$', self.rs.rstr('A', 5))
 19 | 
 20 |     def test_length_range(self) -> None:
 21 |         assert_matches('^A{11,20}$', self.rs.rstr('A', 11, 20))
 22 | 
 23 |     def test_end_range_no_start_range(self) -> None:
 24 |         assert_matches('^A{1,20}$', self.rs.rstr('A', end_range=20))
 25 | 
 26 |     def test_custom_alphabet(self) -> None:
 27 |         assert_matches('^A{1,10}$', self.rs.rstr('AA'))
 28 | 
 29 |     def test_alphabet_as_list(self) -> None:
 30 |         assert_matches('^A{1,10}$', self.rs.rstr(['A', 'A']))
 31 | 
 32 |     def test_include(self) -> None:
 33 |         assert_matches('^[ABC]*@[ABC]*$', self.rs.rstr('ABC', include='@'))
 34 | 
 35 |     def test_include_specific_length(self) -> None:
 36 |         """
 37 |         Verify including characters doesn't make the string longer than intended.
 38 |         """
 39 |         assert_matches('^[ABC@]{5}$', self.rs.rstr('ABC', 5, include='@'))
 40 | 
 41 |     def test_exclude(self) -> None:
 42 |         for _ in range(0, 100):
 43 |             assert 'C' not in self.rs.rstr('ABC', exclude='C')
 44 | 
 45 |     def test_include_as_list(self) -> None:
 46 |         assert_matches('^[ABC]*@[ABC]*$', self.rs.rstr('ABC', include=['@']))
 47 | 
 48 |     def test_exclude_as_list(self) -> None:
 49 |         for _ in range(0, 100):
 50 |             assert 'C' not in self.rs.rstr('ABC', exclude=['C'])
 51 | 
 52 |     def test_raise_exception_if_include_and_exclude_parameters_contain_same_character(
 53 |         self,
 54 |     ) -> None:
 55 |         with self.assertRaisesRegex(
 56 |             SameCharacterError,
 57 |             r'include and exclude parameters contain same character \(B\)',
 58 |         ):
 59 |             self.rs.rstr('A', include='B', exclude='B')
 60 |             self.rs.rstr('A', include=['B'], exclude=['B'])
 61 |         with self.assertRaisesRegex(
 62 |             SameCharacterError,
 63 |             r'include and exclude parameters contain same characters \(., .\)',
 64 |         ):
 65 |             self.rs.rstr('A', include='BC', exclude='BC')
 66 | 
 67 | 
 68 | class TestSystemRandom(TestRstr):
 69 |     def setUp(self) -> None:
 70 |         self.rs = Rstr(random.SystemRandom())
 71 | 
 72 | 
 73 | class TestDigits(unittest.TestCase):
 74 |     def setUp(self) -> None:
 75 |         self.rs = Rstr()
 76 | 
 77 |     def test_all_digits(self) -> None:
 78 |         assert_matches(r'^\d{1,10}$', self.rs.digits())
 79 | 
 80 |     def test_digits_include(self) -> None:
 81 |         assert_matches(r'^\d*@\d*$', self.rs.digits(include='@'))
 82 | 
 83 |     def test_digits_exclude(self) -> None:
 84 |         for _ in range(0, 100):
 85 |             assert '5' not in self.rs.digits(exclude='5')
 86 | 
 87 | 
 88 | class TestNondigits(unittest.TestCase):
 89 |     def setUp(self) -> None:
 90 |         self.rs = Rstr()
 91 | 
 92 |     def test_nondigits(self) -> None:
 93 |         assert_matches(r'^\D{1,10}$', self.rs.nondigits())
 94 | 
 95 |     def test_nondigits_include(self) -> None:
 96 |         assert_matches(r'^\D*@\D*$', self.rs.nondigits(include='@'))
 97 | 
 98 |     def test_nondigits_exclude(self) -> None:
 99 |         for _ in range(0, 100):
100 |             assert 'A' not in self.rs.nondigits(exclude='A')
101 | 
102 | 
103 | class TestLetters(unittest.TestCase):
104 |     def setUp(self) -> None:
105 |         self.rs = Rstr()
106 | 
107 |     def test_letters(self) -> None:
108 |         assert_matches(r'^[a-zA-Z]{1,10}$', self.rs.letters())
109 | 
110 |     def test_letters_include(self) -> None:
111 |         assert_matches(r'^[a-zA-Z]*@[a-zA-Z]*$', self.rs.letters(include='@'))
112 | 
113 |     def test_letters_exclude(self) -> None:
114 |         for _ in range(0, 100):
115 |             assert 'A' not in self.rs.letters(exclude='A')
116 | 
117 | 
118 | class TestUnambiguous(unittest.TestCase):
119 |     def setUp(self) -> None:
120 |         self.rs = Rstr()
121 | 
122 |     def test_unambiguous(self) -> None:
123 |         assert_matches('^[a-km-zA-HJ-NP-Z2-9]{1,10}$', self.rs.unambiguous())
124 | 
125 |     def test_unambiguous_include(self) -> None:
126 |         assert_matches('^[a-km-zA-HJ-NP-Z2-9@]{1,10}$', self.rs.unambiguous(include='@'))
127 | 
128 |     def test_unambiguous_exclude(self) -> None:
129 |         for _ in range(0, 100):
130 |             assert 'A' not in self.rs.unambiguous(exclude='A')
131 | 
132 | 
133 | class TestCustomAlphabets(unittest.TestCase):
134 |     def test_alphabet_at_instantiation(self) -> None:
135 |         rs = Rstr(vowels='AEIOU')
136 |         assert_matches('^[AEIOU]{1,10}$', rs.vowels())
137 | 
138 |     def test_add_alphabet(self) -> None:
139 |         rs = Rstr()
140 |         rs.add_alphabet('evens', '02468')
141 |         assert_matches('^[02468]{1,10}$', rs.evens())
142 | 
143 | 
144 | def main() -> None:
145 |     unittest.main()
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     main()
150 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ===============================
  2 | rstr = Random Strings in Python
  3 | ===============================
  4 | 
  5 | .. image:: https://circleci.com/gh/leapfrogonline/rstr.svg?style=svg
  6 |     :target: https://circleci.com/gh/leapfrogonline/rstr
  7 | 
  8 | rstr is a helper module for easily generating random strings of various types.
  9 | It could be useful for fuzz testing, generating dummy data, or other
 10 | applications.
 11 | 
 12 | It has no dependencies outside the standard library.
 13 | 
 14 | Use
 15 | ---
 16 | 
 17 | The basic method of rstr is ``rstr()``. At a minimum, it requires one argument,
 18 | an alphabet of characters from which to create a string.
 19 | 
 20 | ::
 21 | 
 22 |     >>> import rstr
 23 |     >>> rstr.rstr('ABC')
 24 |     'AACAACCB'
 25 | 
 26 | By default, it will return a string between 1 and 10 characters in length. You
 27 | may specify an exact length by including it as a second argument:
 28 | 
 29 | ::
 30 | 
 31 |     >>> rstr.rstr('ABC', 4)
 32 |     'ACBC'
 33 | 
 34 | You can also generate a range of lengths by adding two arguments. In the following
 35 | case, rstr will return a string with a randomly selected length between 5 and 10
 36 | characters.
 37 | 
 38 | ::
 39 | 
 40 |     >>> rstr.rstr('ABC', 5, 10)
 41 |     'CBCCCABAA'
 42 | 
 43 | It's also possible to include particular characters in your string. This is useful
 44 | when testing a validator to make sure that certain characters are rejected.
 45 | Characters listed in the 'include' argument will *always* be present somewhere
 46 | in the resulting string.
 47 | 
 48 | ::
 49 | 
 50 |     >>> rstr.rstr('ABC', include='&')
 51 |     'CA&A'
 52 | 
 53 | Conversely, you can exclude particular characters from the generated string. This is
 54 | helpful when starting with a pre-defined population of characters.
 55 | 
 56 | ::
 57 | 
 58 |     >>> import string
 59 |     >>> rstr.rstr(string.digits, exclude='5')
 60 |     '8661442'
 61 | 
 62 | Note that any of the arguments that accept strings can also
 63 | accept lists or tuples of strings:
 64 | 
 65 | ::
 66 | 
 67 |     >>> rstr.rstr(['A', 'B', 'C'], include = ['@'], exclude=('C',))
 68 |     'BAAABBA@BAA'
 69 | 
 70 | Other methods
 71 | -------------
 72 | 
 73 | The other methods provided by rstr, besides ``rstr()`` and ``xeger()``, are convenience
 74 | methods that can be called without arguments, and provide a pre-defined alphabet.
 75 | They accept the same arguments as ``rstr()`` for purposes of
 76 | specifying lengths and including or excluding particular characters.
 77 | 
 78 | letters()
 79 |     The characters provided by string.letters in the standard library.
 80 | 
 81 | uppercase()
 82 |     The characters provided by string.uppercase in the standard library.
 83 | 
 84 | lowercase()
 85 |     The characters provided by string.lowercase in the standard library.
 86 | 
 87 | printable()
 88 |     The characters provided by string.printable in the standard library.
 89 | 
 90 | punctuation()
 91 |     The characters provided by string.punctuation in the standard library.
 92 | 
 93 | nonwhitespace()
 94 |     The characters provided by string.printable in the standard library, except
 95 |     for those representing whitespace: tab, space, etc.
 96 | 
 97 | digits()
 98 |     The characters provided by string.digits in the standard library.
 99 | 
100 | nondigits()
101 |     The characters provided by the concatenation of string.letters and
102 |     string.punctuation in the standard library.
103 | 
104 | nonletters()
105 |     The characters provided by the concatenation of string.digits and
106 |     string.punctuation in the standard library.
107 | 
108 | normal()
109 |     Characters commonly accepted in text input, equivalent to string.digits +
110 |     string.letters + ' ' (the space character).
111 | 
112 | unambiguous()
113 |     The characters provided by the concatenation of string.digits and
114 |     string.letters except characters which are similar: 1, l and I, etc.
115 | 
116 | postalsafe()
117 |     Characters that are safe for use in postal addresses in the United States:
118 |     upper- and lower-case letters, digits, spaces, and the punctuation marks period,
119 |     hash (#), hyphen, and forward-slash.
120 | 
121 | urlsafe()
122 |     Characters safe (unreserved) for use in URLs: letters, digits, hyphen, period, underscore,
123 |     and tilde.
124 | 
125 | domainsafe()
126 |     Characters that are allowed for use in hostnames, and consequently, in internet domains: letters,
127 |     digits, and the hyphen.
128 | 
129 | Xeger
130 | -----
131 | 
132 | Inspired by the Java library of the same name, the ``xeger()`` method allows users to
133 | create a random string from a regular expression.
134 | 
135 | For example to generate a postal code that fits the Canadian format:
136 | 
137 |     >>> import rstr
138 |     >>> rstr.xeger(r'[A-Z]\d[A-Z] \d[A-Z]\d')
139 |     u'R6M 1W5'
140 | 
141 | xeger works fine with most simple regular expressions, but it doesn't support all
142 | Python regular expression features.
143 | 
144 | Custom Alphabets
145 | ----------------
146 | 
147 | If you have custom alphabets of characters that you would like to use with a method
148 | shortcut, you can specify them by keyword when instantiating an Rstr object:
149 | 
150 |     >>> from rstr import Rstr
151 |     >>> rs = Rstr(vowels='AEIOU')
152 |     >>> rs.vowels()
153 |     'AEEUU'
154 | 
155 | You can also add an alphabet to an existing instance with the add_alphabet() method:
156 | 
157 |     >>> rs.add_alphabet('odds', '13579')
158 |     >>> rs.odds()
159 |     '339599519'
160 | 
161 | Examples
162 | --------
163 | 
164 | You can combine rstr with Python's built-in string formatting to produce strings
165 | that fit a variety of templates.
166 | 
167 | An email address:
168 | 
169 | ::
170 | 
171 |     '{0}@{1}.{2}'.format(rstr.nonwhitespace(exclude='@'),
172 |                          rstr.domainsafe(),
173 |                          rstr.letters(3))
174 | 
175 | A URL:
176 | 
177 | ::
178 | 
179 |     'http://{0}.{1}/{2}/?{3}'.format(rstr.domainsafe(),
180 |                                     rstr.letters(3),
181 |                                     rstr.urlsafe(),
182 |                                     rstr.urlsafe())
183 | 
184 | A postal address:
185 | 
186 | ::
187 | 
188 |     """{0} {1}
189 |     {2} {3}
190 |     {4}, {5} {6}
191 |     """.format(rstr.letters(4, 8).title(),
192 |                rstr.letters(4, 8).title(),
193 |                rstr.digits(3, 5),
194 |                rstr.letters(4, 10).title(),
195 |                rstr.letters(4, 15).title(),
196 |                rstr.uppercase(2),
197 |                rstr.digits(5),
198 |                )
199 | 
200 | .. _SystemRandom: https://docs.python.org/3/library/random.html#random.SystemRandom
201 | 


--------------------------------------------------------------------------------
/rstr/rstr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Leapfrog Direct Response, LLC
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #    * Redistributions of source code must retain the above copyright
  7 | #      notice, this list of conditions and the following disclaimer.
  8 | #    * Redistributions in binary form must reproduce the above copyright
  9 | #      notice, this list of conditions and the following disclaimer in the
 10 | #      documentation and/or other materials provided with the distribution.
 11 | #    * Neither the name of the Leapfrog Direct Response, LLC, including
 12 | #      its subsidiaries and affiliates nor the names of its
 13 | #      contributors, may be used to endorse or promote products derived
 14 | #      from this software without specific prior written permission.
 15 | #
 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LEAPFROG DIRECT
 20 | # RESPONSE, LLC, INCLUDING ITS SUBSIDIARIES AND AFFILIATES, BE LIABLE
 21 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 24 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 25 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 26 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 27 | # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | 
 30 | import itertools
 31 | import string
 32 | import typing
 33 | from functools import partial
 34 | from random import Random, SystemRandom
 35 | from typing import Iterable, List, Optional, Sequence, TypeVar
 36 | 
 37 | _T = TypeVar('_T')
 38 | 
 39 | 
 40 | if typing.TYPE_CHECKING:
 41 |     from typing import Protocol
 42 | 
 43 |     class _PartialRstrFunc(Protocol):
 44 |         def __call__(
 45 |             self,
 46 |             start_range: Optional[int] = ...,
 47 |             end_range: Optional[int] = ...,
 48 |             include: str = ...,
 49 |             exclude: str = ...,
 50 |         ) -> str: ...
 51 | 
 52 | 
 53 | SYSTEM_RANDOM = SystemRandom()
 54 | 
 55 | DEFAULT_ALPHABETS: dict[str, str] = {
 56 |     'printable': string.printable,
 57 |     'letters': string.ascii_letters,
 58 |     'uppercase': string.ascii_uppercase,
 59 |     'lowercase': string.ascii_lowercase,
 60 |     'digits': string.digits,
 61 |     'punctuation': string.punctuation,
 62 |     'nondigits': string.ascii_letters + string.punctuation,
 63 |     'nonletters': string.digits + string.punctuation,
 64 |     'whitespace': string.whitespace,
 65 |     'nonwhitespace': string.printable.strip(),
 66 |     'normal': string.ascii_letters + string.digits + ' ',
 67 |     'word': string.ascii_letters + string.digits + '_',
 68 |     'nonword': ''.join(
 69 |         set(string.printable).difference(string.ascii_letters + string.digits + '_')
 70 |     ),
 71 |     'unambiguous': ''.join(set(string.ascii_letters + string.digits).difference('0O1lI')),
 72 |     'postalsafe': string.ascii_letters + string.digits + ' .-#/',
 73 |     'urlsafe': string.ascii_letters + string.digits + '-._~',
 74 |     'domainsafe': string.ascii_letters + string.digits + '-',
 75 | }
 76 | 
 77 | 
 78 | class Rstr:
 79 |     """Create random strings from a variety of alphabets.
 80 | 
 81 |     The alphabets for printable(), uppercase(), lowercase(), digits(), and
 82 |     punctuation() are equivalent to the constants by those same names in the
 83 |     standard library string module.
 84 | 
 85 |     nondigits() uses an alphabet of string.letters + string.punctuation
 86 | 
 87 |     nonletters() uses an alphabet of string.digits + string.punctuation
 88 | 
 89 |     nonwhitespace() uses an alphabet of string.printable.strip()
 90 | 
 91 |     normal() uses an alphabet of string.letters + string.digits + ' ' (the
 92 |     space character)
 93 | 
 94 |     postalsafe() is based on USPS Publication 28 - Postal Addressing Standards:
 95 |     http://pe.usps.com/text/pub28/pub28c2.html
 96 |     The characters allowed in postal addresses are letters and digits, periods,
 97 |     slashes, the pound sign, and the hyphen.
 98 | 
 99 |     urlsafe() uses an alphabet of unreserved characters safe for use in URLs.
100 |     From section 2.3 of RFC 3986: "Characters that are allowed in a URI but
101 |     do not have a reserved purpose are called unreserved. These include
102 |     uppercase and lowercase letters, decimal digits, hyphen, period,
103 |     underscore, and tilde.
104 | 
105 |     domainsafe() uses an alphabet of characters allowed in hostnames, and
106 |     consequently, in internet domains: letters, digits, and the hyphen.
107 | 
108 |     """
109 | 
110 |     def __init__(self, random: Random = SYSTEM_RANDOM, **custom_alphabets: str) -> None:
111 |         self._random = random
112 |         self._alphabets = DEFAULT_ALPHABETS.copy()
113 |         for alpha_name, alphabet in custom_alphabets.items():
114 |             self.add_alphabet(alpha_name, alphabet)
115 | 
116 |     def add_alphabet(self, alpha_name: str, characters: str) -> None:
117 |         """Add an additional alphabet to an Rstr instance and make it available
118 |         via method calls.
119 | 
120 |         """
121 |         self._alphabets[alpha_name] = characters
122 | 
123 |     def __getattr__(self, attr: str) -> '_PartialRstrFunc':
124 |         if attr in self._alphabets:
125 |             return partial(self.rstr, self._alphabets[attr])
126 |         message = f'Rstr instance has no attribute: {attr}'
127 |         raise AttributeError(message)
128 | 
129 |     def sample_wr(self, population: Sequence[str], k: int) -> List[str]:
130 |         """Samples k random elements (with replacement) from a population"""
131 |         return [self._random.choice(population) for i in itertools.repeat(None, k)]
132 | 
133 |     def rstr(
134 |         self,
135 |         alphabet: Iterable[str],
136 |         start_range: Optional[int] = None,
137 |         end_range: Optional[int] = None,
138 |         include: Sequence[str] = '',
139 |         exclude: Sequence[str] = '',
140 |     ) -> str:
141 |         """Generate a random string containing elements from 'alphabet'
142 | 
143 |         By default, rstr() will return a string between 1 and 10 characters.
144 |         You can specify a second argument to get an exact length of string.
145 | 
146 |         If you want a string in a range of lengths, specify the start and end
147 |         of that range as the second and third arguments.
148 | 
149 |         If you want to make certain that particular characters appear in the
150 |         generated string, specify them as "include".
151 | 
152 |         If you want to *prevent* certain characters from appearing, pass them
153 |         as 'exclude'.
154 | 
155 |         """
156 |         if same_characters := frozenset(include).intersection(exclude):
157 |             plural = 's' if len(same_characters) > 1 else ''
158 |             chars = ', '.join(same_characters)
159 |             message = f'include and exclude parameters contain same character{plural} ({chars})'
160 |             raise SameCharacterError(message)
161 | 
162 |         popul = tuple(frozenset(alphabet).difference(exclude))
163 | 
164 |         if end_range is None:
165 |             if start_range is None:
166 |                 start_range, end_range = (1, 10)
167 |             else:
168 |                 k = start_range
169 |         elif start_range is None:
170 |             start_range = 1
171 | 
172 |         if end_range:
173 |             k = self._random.randint(start_range, end_range)
174 |         # Make sure we don't generate too long a string
175 |         # when adding 'include' to it:
176 |         k = k - len(include)
177 | 
178 |         result = self.sample_wr(popul, k) + list(include)
179 |         self._random.shuffle(result)
180 |         return ''.join(result)
181 | 
182 | 
183 | class SameCharacterError(ValueError):
184 |     pass
185 | 


--------------------------------------------------------------------------------