├── sqids
    ├── py.typed
    ├── __init__.py
    ├── sqids.py
    └── constants.py
├── .github
    ├── dependabot.yaml
    └── workflows
    │   └── tests.yml
├── tox.ini
├── tests
    ├── test_alphabet.py
    ├── test_round_trip.py
    ├── test_encoding.py
    ├── test_blocklist.py
    └── test_minlength.py
├── LICENSE
├── assets
    ├── performance.py
    └── filter_blocklist.py
├── pyproject.toml
├── CHANGELOG.md
├── README.md
└── .gitignore


/sqids/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sqids/__init__.py:
--------------------------------------------------------------------------------
1 | from .sqids import Sqids
2 | 
3 | __all__ = ["Sqids"]
4 | 


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 |     groups:
 8 |       github-actions:
 9 |         patterns:
10 |           - "*"
11 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist =
 3 |     coverage_erase
 4 |     py{3.13, 3.12, 3.11, 3.10, 3.9, 3.8, 3.7, 3.6}
 5 |     coverage_report
 6 | 
 7 | skip_missing_interpreters = True
 8 | isolated_build = True
 9 | 
10 | 
11 | [testenv]
12 | depends =
13 |     py{3.13, 3.12, 3.11, 3.10, 3.9, 3.8, 3.7, 3.6}: coverage_erase
14 | deps =
15 |     pytest
16 |     hypothesis
17 |     coverage[toml]
18 | commands =
19 |     coverage run -m pytest
20 | 
21 | 
22 | [testenv:coverage_erase]
23 | skipsdist = true
24 | skip_install = true
25 | deps =
26 |     coverage[toml]
27 | commands = coverage erase
28 | 
29 | 
30 | [testenv:coverage_report]
31 | depends =
32 |     py{3.13, 3.12, 3.11, 3.10, 3.9, 3.8, 3.7, 3.6}
33 | skipsdist = true
34 | skip_install = true
35 | deps =
36 |     coverage[toml]
37 | commands_pre =
38 |     coverage combine
39 |     coverage html --fail-under=0
40 | commands = coverage report
41 | 


--------------------------------------------------------------------------------
/tests/test_alphabet.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sqids import Sqids
 3 | 
 4 | 
 5 | def test_simple():
 6 |     sqids = Sqids(alphabet="0123456789abcdef")
 7 | 
 8 |     numbers = [1, 2, 3]
 9 |     id = "489158"
10 | 
11 |     assert sqids.encode(numbers) == id
12 |     assert sqids.decode(id) == numbers
13 | 
14 | 
15 | def test_short_alphabet():
16 |     sqids = Sqids(alphabet="abc")
17 | 
18 |     numbers = [1, 2, 3]
19 |     assert sqids.decode(sqids.encode(numbers)) == numbers
20 | 
21 | 
22 | def test_long_alphabet():
23 |     sqids = Sqids(
24 |         alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()-_+|{}[];:'\"/?.>,<`~"
25 |     )
26 | 
27 |     numbers = [1, 2, 3]
28 |     assert sqids.decode(sqids.encode(numbers)) == numbers
29 | 
30 | 
31 | def test_multibyte_alphabet():
32 |     with pytest.raises(Exception):
33 |         Sqids(alphabet="ë1092")
34 | 
35 | 
36 | def test_repeating_alphabet_characters():
37 |     with pytest.raises(Exception):
38 |         Sqids(alphabet="aabcdefg")
39 | 
40 | 
41 | def test_too_short_alphabet():
42 |     with pytest.raises(Exception):
43 |         Sqids(alphabet="ab")
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023-present Sqids maintainers.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   tests:
13 |     runs-on: ubuntu-22.04
14 |     strategy:
15 |       matrix:
16 |         python-version:
17 |           - "3.7"
18 |           - "3.8"
19 |           - "3.9"
20 |           - "3.10"
21 |           - "3.11"
22 |           - "3.12"
23 |           - "3.13"
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |       - name: Set up Python ${{ matrix.python-version }}
27 |         uses: actions/setup-python@v5
28 |         with:
29 |           python-version: ${{ matrix.python-version }}
30 |           allow-prereleases: true
31 |       - name: Install dependencies
32 |         run: |
33 |           python -m pip install --upgrade pip
34 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
35 |       - name: Lint with Ruff
36 |         run: |
37 |           pip install ruff
38 |           # stop the build if there are Python syntax errors or undefined names
39 |           ruff check --output-format=github --select=E9,F63,F7,F82 .
40 |           # default set of ruff rules with GitHub Annotations
41 |           ruff check --output-format=github .
42 |       - name: Test with tox
43 |         run: |
44 |           pip install tox
45 |           tox run -e py${{ matrix.python-version }},coverage_report
46 | 


--------------------------------------------------------------------------------
/tests/test_round_trip.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import sqids
 4 | 
 5 | from hypothesis import given, target, assume
 6 | import hypothesis.strategies as st
 7 | 
 8 | 
 9 | lists_of_integers = st.lists(
10 |     elements=st.integers(min_value=0, max_value=sys.maxsize),
11 |     min_size=1,
12 | )
13 | min_lengths = st.integers(min_value=0, max_value=255)
14 | alphabets = st.text(
15 |     alphabet=st.characters(min_codepoint=0, max_codepoint=0x7F),
16 |     min_size=3,
17 | )
18 | 
19 | 
20 | @given(numbers=lists_of_integers, min_length=min_lengths, alphabet=alphabets)
21 | def test_round_trip_encoding(numbers, min_length, alphabet):
22 |     # Encourage hypothesis to find ideal alphabets
23 |     # by giving unique alphabets a score of 1.0
24 |     # and non-unique alphabets a lower score.
25 |     target(len(set(alphabet)) / len(alphabet))
26 |     # Reject non-unique alphabets without failing the test.
27 |     assume(len(set(alphabet)) == len(alphabet))
28 | 
29 |     sqid_1 = sqids.Sqids(min_length=min_length, alphabet=alphabet, blocklist=[])
30 |     id_1 = sqid_1.encode(numbers)
31 |     assert sqid_1.decode(id_1) == numbers
32 | 
33 |     # If the ID is long enough, use it as a blocklist word and ensure it is blocked.
34 |     if len(id_1) >= 3:  # pragma: nocover
35 |         sqid_2 = sqids.Sqids(min_length=min_length, alphabet=alphabet, blocklist=[id_1])
36 |         id_2 = sqid_2.encode(numbers)
37 |         assert id_1 != id_2
38 |         assert sqid_2.decode(id_2) == numbers
39 | 


--------------------------------------------------------------------------------
/assets/performance.py:
--------------------------------------------------------------------------------
 1 | import sqids
 2 | import timeit
 3 | 
 4 | number = 100_000
 5 | 
 6 | print(f"Iterations: {number:,d}")
 7 | 
 8 | print(
 9 |     "{0:<20s} {1:7.3f}".format(
10 |         "Instantiate:",
11 |         timeit.timeit(
12 |             stmt="sqids.Sqids()",
13 |             globals={"sqids": sqids},
14 |             number=number,
15 |         )
16 |     )
17 | )
18 | 
19 | print(
20 |     "{0:<20s} {1:7.3f}".format(
21 |         "Encode [0]:",  # [0] -> 'bM'
22 |         timeit.timeit(
23 |             stmt="squid.encode([0])",
24 |             globals={"squid": sqids.Sqids()},
25 |             number=number,
26 |         )
27 |     )
28 | )
29 | 
30 | print(
31 |     "{0:<20s} {1:7.3f}".format(
32 |         "Encode [0, 1, 2]:",  # [0, 1, 2] -> 'rSCtlB'
33 |         timeit.timeit(
34 |             stmt="squid.encode([0, 1, 2])",
35 |             globals={"squid": sqids.Sqids()},
36 |             number=number,
37 |         )
38 |     )
39 | )
40 | 
41 | print(
42 |     "{0:<20s} {1:7.3f}".format(
43 |         "Decode 'bM':",  # 'bM' -> [0]
44 |         timeit.timeit(
45 |             stmt="squid.decode('bM')",
46 |             globals={"squid": sqids.Sqids()},
47 |             number=number,
48 |         )
49 |     )
50 | )
51 | 
52 | print(
53 |     "{0:<20s} {1:7.3f}".format(
54 |         "Decode 'rSCtlB':",  # 'rSCtlB' -> [0, 1, 2]
55 |         timeit.timeit(
56 |             stmt="squid.decode('rSCtlB')",
57 |             globals={"squid": sqids.Sqids()},
58 |             number=number,
59 |         )
60 |     ),
61 | )
62 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "sqids"
 3 | description = "Generate YouTube-like ids from numbers."
 4 | version = "0.5.2"
 5 | readme = "README.md"
 6 | authors = []
 7 | license = { file = "LICENSE" }
 8 | classifiers = [
 9 |     "License :: OSI Approved :: MIT License",
10 |     "Programming Language :: Python :: 3.6",
11 |     "Programming Language :: Python :: 3.7",
12 |     "Programming Language :: Python :: 3.8",
13 |     "Programming Language :: Python :: 3.9",
14 |     "Programming Language :: Python :: 3.10",
15 |     "Programming Language :: Python :: 3.11",
16 |     "Programming Language :: Python :: 3.12",
17 |     "Programming Language :: Python :: 3.13",
18 | ]
19 | keywords = ["sqids", "encode", "generate", "ids", "hashids"]
20 | requires-python = ">=3.6"
21 | 
22 | [project.urls]
23 | Homepage = "https://sqids.org/python"
24 | 
25 | [build-system]
26 | requires = ["setuptools", "setuptools-scm"]
27 | build-backend = "setuptools.build_meta"
28 | 
29 | [tool.setuptools]
30 | packages = ["sqids"]
31 | 
32 | [tool.setuptools.package-data]
33 | sqids = ["py.typed"]
34 | 
35 | [tool.coverage.run]
36 | relative_files = true
37 | parallel = true
38 | branch = true
39 | source = [
40 |     "sqids",
41 |     "tests",
42 | ]
43 | 
44 | [tool.coverage.paths]
45 | source = [
46 |     "sqids",
47 |     "*/site-packages",
48 | ]
49 | 
50 | [tool.coverage.report]
51 | skip_covered = true
52 | fail_under = 100
53 | 
54 | [tool.coverage.html]
55 | directory = "htmlcov/"
56 | skip_covered = false
57 | 
58 | [tool.pytest.ini_options]
59 | addopts = "--color=yes"
60 | filterwarnings = [
61 |     "error",
62 | ]
63 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | **v0.5.2**
 4 | - Export Sqids from sqids module ([PR #28](https://github.com/sqids/sqids-python/pull/28), thanks to [@Popkornium18](https://github.com/Popkornium18)).
 5 | 
 6 | **v0.5.1**
 7 | - Replace `numbers: List[int]` with `numbers: Sequence[int]` in encode function ([PR #25](https://github.com/sqids/sqids-python/pull/25), thanks to [@aliwo](https://github.com/aliwo)).
 8 | 
 9 | **v0.5.0**
10 | - Support Python 3.12 and 3.13.
11 | - Speed up encoding by ~85% by optimizing blocklist checks ([PR #23](https://github.com/sqids/sqids-python/pull/23), thanks to [@kurtmckee](https://github.com/kurtmckee)).
12 |   This improvement requires more calculation when the `Sqids` class is instantiated,
13 |   so users are encouraged to instantiate `Sqids` once and always reuse the instance.
14 | 
15 | **v0.4.1**
16 | - Compatibility with Python 3.6 (not officially supported)
17 | 
18 | **v0.4.0**
19 | - Use double underscore convention to specify private methods
20 | - Separate single module into a package for better readability
21 | - Add [PEP 561](https://peps.python.org/pep-0561/) compatible type hints
22 | 
23 | **v0.3.0:** **⚠️ BREAKING CHANGE**
24 | - **Breaking change**: IDs change. Algorithm has been fine-tuned for better performance [[Issue #11](https://github.com/sqids/sqids-spec/issues/11)]
25 | - `alphabet` cannot contain multibyte characters
26 | - `min_length` upper limit has increased from alphabet length to `255`
27 | - Max blocklist re-encoding attempts has been capped at the length of the alphabet - 1
28 | - Minimum alphabet length has changed from 5 to 3
29 | - `min_value()` and `max_value()` functions have been removed
30 | - Max integer encoding value is `sys.maxsize`
31 | 
32 | **v0.2.1:**
33 | - Bug fix: spec update (PR #7): blocklist filtering in uppercase-only alphabet [[PR #7](https://github.com/sqids/sqids-spec/pull/7)]
34 | - Lower uniques test from 1_000_000 to 10_000
35 | 
36 | **v0.2.0:**
37 | - Bug fix: test for decoding an invalid ID with a [repeating reserved character](https://github.com/sqids/sqids-spec/commit/f52b57836b0463097018f984f853b284e50a5ce4)
38 | - Build cleanup
39 | 
40 | **v0.1.0:**
41 | - First implementation of [the spec](https://github.com/sqids/sqids-spec)
42 | 


--------------------------------------------------------------------------------
/tests/test_encoding.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import sys
  3 | from sqids import Sqids
  4 | 
  5 | 
  6 | def test_simple():
  7 |     sqids = Sqids()
  8 |     numbers = [1, 2, 3]
  9 |     id_str = "86Rf07"
 10 |     assert sqids.encode(numbers) == id_str
 11 |     assert sqids.decode(id_str) == numbers
 12 | 
 13 | 
 14 | def test_different_inputs():
 15 |     sqids = Sqids()
 16 |     numbers = [0, 0, 0, 1, 2, 3, 100, 1_000, 100_000, 1_000_000, sys.maxsize]
 17 |     assert sqids.decode(sqids.encode(numbers)) == numbers
 18 | 
 19 | 
 20 | @pytest.mark.parametrize(
 21 |     "id_str, numbers",
 22 |     (
 23 |         ("bM", [0]),
 24 |         ("Uk", [1]),
 25 |         ("gb", [2]),
 26 |         ("Ef", [3]),
 27 |         ("Vq", [4]),
 28 |         ("uw", [5]),
 29 |         ("OI", [6]),
 30 |         ("AX", [7]),
 31 |         ("p6", [8]),
 32 |         ("nJ", [9]),
 33 |     )
 34 | )
 35 | def test_incremental_numbers(id_str, numbers):
 36 |     sqids = Sqids()
 37 |     assert sqids.encode(numbers) == id_str
 38 |     assert sqids.decode(id_str) == numbers
 39 | 
 40 | 
 41 | @pytest.mark.parametrize(
 42 |     "id_str, numbers",
 43 |     (
 44 |         ("SvIz", [0, 0]),
 45 |         ("n3qa", [0, 1]),
 46 |         ("tryF", [0, 2]),
 47 |         ("eg6q", [0, 3]),
 48 |         ("rSCF", [0, 4]),
 49 |         ("sR8x", [0, 5]),
 50 |         ("uY2M", [0, 6]),
 51 |         ("74dI", [0, 7]),
 52 |         ("30WX", [0, 8]),
 53 |         ("moxr", [0, 9]),
 54 |     )
 55 | )
 56 | def test_incremental_numbers_same_index_0(id_str, numbers):
 57 |     sqids = Sqids()
 58 |     assert sqids.encode(numbers) == id_str
 59 |     assert sqids.decode(id_str) == numbers
 60 | 
 61 | 
 62 | @pytest.mark.parametrize(
 63 |     "id_str, numbers",
 64 |     (
 65 |         ("SvIz", [0, 0]),
 66 |         ("nWqP", [1, 0]),
 67 |         ("tSyw", [2, 0]),
 68 |         ("eX68", [3, 0]),
 69 |         ("rxCY", [4, 0]),
 70 |         ("sV8a", [5, 0]),
 71 |         ("uf2K", [6, 0]),
 72 |         ("7Cdk", [7, 0]),
 73 |         ("3aWP", [8, 0]),
 74 |         ("m2xn", [9, 0]),
 75 |     )
 76 | )
 77 | def test_incremental_numbers_same_index_1(id_str, numbers):
 78 |     sqids = Sqids()
 79 |     assert sqids.encode(numbers) == id_str
 80 |     assert sqids.decode(id_str) == numbers
 81 | 
 82 | 
 83 | def test_multi_input():
 84 |     sqids = Sqids()
 85 |     numbers = list(range(100))
 86 |     output = sqids.decode(sqids.encode(numbers))
 87 |     assert numbers == output
 88 | 
 89 | 
 90 | def test_encoding_no_numbers():
 91 |     sqids = Sqids()
 92 |     assert sqids.encode([]) == ""
 93 | 
 94 | 
 95 | def test_decoding_empty_string():
 96 |     sqids = Sqids()
 97 |     assert sqids.decode("") == []
 98 | 
 99 | 
100 | def test_decoding_invalid_character():
101 |     sqids = Sqids()
102 |     assert sqids.decode("*") == []
103 | 
104 | 
105 | def test_encode_out_of_range_numbers():
106 |     sqids = Sqids()
107 |     with pytest.raises(ValueError):
108 |         sqids.encode([-1])
109 |     with pytest.raises(ValueError):
110 |         sqids.encode([sys.maxsize + 1])
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [Sqids Python](https://sqids.org/python)
 2 | 
 3 | [![PyPI package](https://badge.fury.io/py/sqids.svg)](https://pypi.org/project/sqids/)
 4 | [![Github Actions](https://img.shields.io/github/actions/workflow/status/sqids/sqids-python/tests.yml)](https://github.com/sqids/sqids-python/actions)
 5 | [![Downloads](https://img.shields.io/pypi/dm/sqids)](https://pypi.org/project/sqids/)
 6 | 
 7 | [Sqids](https://sqids.org/python) (*pronounced "squids"*) is a small library that lets you **generate unique IDs from numbers**. It's good for link shortening, fast & URL-safe ID generation and decoding back into numbers for quicker database lookups.
 8 | 
 9 | Features:
10 | 
11 | - **Encode multiple numbers** - generate short IDs from one or several non-negative numbers
12 | - **Quick decoding** - easily decode IDs back into numbers
13 | - **Unique IDs** - generate unique IDs by shuffling the alphabet once
14 | - **ID padding** - provide minimum length to make IDs more uniform
15 | - **URL safe** - auto-generated IDs do not contain common profanity
16 | - **Randomized output** - Sequential input provides nonconsecutive IDs
17 | - **Many implementations** - Support for [40+ programming languages](https://sqids.org/)
18 | 
19 | ## 🧰 Use-cases
20 | 
21 | Good for:
22 | 
23 | - Generating IDs for public URLs (eg: link shortening)
24 | - Generating IDs for internal systems (eg: event tracking)
25 | - Decoding for quicker database lookups (eg: by primary keys)
26 | 
27 | Not good for:
28 | 
29 | - Sensitive data (this is not an encryption library)
30 | - User IDs (can be decoded revealing user count)
31 | 
32 | ## 🚀 Getting started
33 | 
34 | Install the package from PyPI, e. g. with pip:
35 | 
36 | ```bash
37 | pip install sqids
38 | ```
39 | 
40 | Import the `Sqids` class from the `sqids` package:
41 | 
42 | ```python
43 | from sqids import Sqids
44 | sqids = Sqids()
45 | ```
46 | 
47 | ## 👩‍💻 Examples
48 | 
49 | Simple encode & decode:
50 | 
51 | ```python
52 | sqids = Sqids()
53 | id = sqids.encode([1, 2, 3]) # "86Rf07"
54 | numbers = sqids.decode(id) # [1, 2, 3]
55 | ```
56 | 
57 | > **Note**
58 | > 🚧 Because of the algorithm's design, **multiple IDs can decode back into the same sequence of numbers**. If it's important to your design that IDs are canonical, you have to manually re-encode decoded numbers and check that the generated ID matches.
59 | 
60 | Enforce a *minimum* length for IDs:
61 | 
62 | ```python
63 | sqids = Sqids(min_length=10)
64 | id = sqids.encode([1, 2, 3]) # "86Rf07xd4z"
65 | numbers = sqids.decode(id) # [1, 2, 3]
66 | ```
67 | 
68 | Randomize IDs by providing a custom alphabet:
69 | 
70 | ```python
71 | sqids = Sqids(alphabet="FxnXM1kBN6cuhsAvjW3Co7l2RePyY8DwaU04Tzt9fHQrqSVKdpimLGIJOgb5ZE")
72 | id = sqids.encode([1, 2, 3]) # "B4aajs"
73 | numbers = sqids.decode(id) # [1, 2, 3]
74 | ```
75 | 
76 | Prevent specific words from appearing anywhere in the auto-generated IDs:
77 | 
78 | ```python
79 | sqids = Sqids(blocklist=["86Rf07"])
80 | id = sqids.encode([1, 2, 3]) # "se8ojk"
81 | numbers = sqids.decode(id) # [1, 2, 3]
82 | ```
83 | 
84 | ## 📝 License
85 | 
86 | [MIT](LICENSE)
87 | 


--------------------------------------------------------------------------------
/assets/filter_blocklist.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import sys
  3 | from typing import Set, Tuple
  4 | 
  5 | 
  6 | repo_root = pathlib.Path(__file__).parent.parent
  7 | this_file = pathlib.Path(__file__).relative_to(repo_root)
  8 | constants_path = repo_root / "sqids/constants.py"
  9 | import sqids.constants  # noqa
 10 | 
 11 | 
 12 | DIGITS = set("0123456789")
 13 | 
 14 | 
 15 | def filter_blocklist() -> Tuple[Set[str], Set[str], Set[str]]:
 16 |     """Pre-filter the blocklist and update the constants file."""
 17 | 
 18 |     exact_match = set()
 19 |     match_at_ends = set()
 20 |     match_anywhere = set()
 21 | 
 22 |     for word in sqids.constants.DEFAULT_BLOCKLIST:
 23 |         if len(word) == 3:
 24 |             exact_match.add(word)
 25 |         elif set(word) & DIGITS:
 26 |             match_at_ends.add(word)
 27 |         else:
 28 |             match_anywhere.add(word)
 29 | 
 30 |     return exact_match, match_at_ends, match_anywhere
 31 | 
 32 | 
 33 | def generate_new_constants_file(
 34 |     exact_match: Set[str],
 35 |     match_at_ends: Set[str],
 36 |     match_anywhere: Set[str],
 37 | ) -> str:
 38 |     """Generate the text of a new constants file."""
 39 | 
 40 |     lines = [
 41 |         f'DEFAULT_ALPHABET = "{sqids.constants.DEFAULT_ALPHABET}"',
 42 |         f"DEFAULT_MIN_LENGTH = {sqids.constants.DEFAULT_MIN_LENGTH}",
 43 |         "",
 44 |         "# =======",
 45 |         "#  NOTE",
 46 |         "# =======",
 47 |         "#",
 48 |         f"# When updating the blocklist, run {this_file} to pre-filter constants.",
 49 |         "# This is critical for performance.",
 50 |         "#",
 51 |         "",
 52 |         "DEFAULT_BLOCKLIST = [",
 53 |     ]
 54 |     # Output a sorted blocklist.
 55 |     for word in sorted(sqids.constants.DEFAULT_BLOCKLIST):
 56 |         lines.append(f'    "{word}",')
 57 |     lines.append("]")
 58 | 
 59 |     # Output exact-match blocklist words.
 60 |     lines.append("")
 61 |     lines.append("_exact_match = {")
 62 |     for word in sorted(exact_match):
 63 |         lines.append(f'    "{word}",')
 64 |     lines.append("}")
 65 | 
 66 |     # Output match-at-ends blocklist words.
 67 |     lines.append("")
 68 |     lines.append("_match_at_ends = (")
 69 |     for word in sorted(match_at_ends):
 70 |         lines.append(f'    "{word}",')
 71 |     lines.append(")")
 72 | 
 73 |     # Output match-anywhere blocklist words.
 74 |     lines.append("")
 75 |     lines.append("_match_anywhere = {")
 76 |     for word in sorted(match_anywhere):
 77 |         lines.append(f'    "{word}",')
 78 |     lines.append("}")
 79 | 
 80 |     return "\n".join(lines).rstrip() + "\n"  # Include a trailing newline.
 81 | 
 82 | 
 83 | def main() -> int:
 84 |     text = constants_path.read_text()
 85 | 
 86 |     exact_match, match_at_ends, match_anywhere = filter_blocklist()
 87 |     new_text = generate_new_constants_file(exact_match, match_at_ends, match_anywhere)
 88 | 
 89 |     if text == new_text:
 90 |         print("No changes necessary")
 91 |         return 0
 92 | 
 93 |     print(f"Updating {constants_path.relative_to(repo_root)}")
 94 |     constants_path.write_text(new_text, newline="\n", encoding="utf-8")
 95 |     return 1
 96 | 
 97 | 
 98 | if __name__ == "__main__":
 99 |     sys.exit(main())
100 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | .ruff_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 


--------------------------------------------------------------------------------
/tests/test_blocklist.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import sys
  3 | 
  4 | import pytest
  5 | from sqids import Sqids
  6 | 
  7 | 
  8 | def test_default_blocklist():
  9 |     sqids = Sqids()
 10 | 
 11 |     assert sqids.decode("aho1e") == [4572721]
 12 |     assert sqids.encode([4572721]) == "JExTR"
 13 | 
 14 | 
 15 | def test_empty_blocklist():
 16 |     sqids = Sqids(blocklist=set())
 17 | 
 18 |     assert sqids.decode("aho1e") == [4572721]
 19 |     assert sqids.encode([4572721]) == "aho1e"
 20 | 
 21 | 
 22 | def test_custom_blocklist():
 23 |     sqids = Sqids(blocklist={"ArUO"})
 24 | 
 25 |     assert sqids.decode("aho1e") == [4572721]
 26 |     assert sqids.encode([4572721]) == "aho1e"
 27 | 
 28 |     assert sqids.decode("ArUO") == [100000]
 29 |     assert sqids.encode([100000]) == "QyG4"
 30 |     assert sqids.decode("QyG4") == [100000]
 31 | 
 32 | 
 33 | def test_blocklist():
 34 |     sqids = Sqids(
 35 |         blocklist={
 36 |             "JSwXFaosAN",  # normal result of 1st encoding, block that word on purpose
 37 |             "OCjV9JK64o",  # result of 2nd encoding
 38 |             "rBHf",  # result of 3rd encoding is `4rBHfOiqd3`, let's block a substring
 39 |             "79SM",  # result of 4th encoding is `dyhgw479SM`, let's block the postfix
 40 |             "7tE6",  # result of 4th encoding is `7tE6jdAHLe`, let's block the prefix
 41 |         }
 42 |     )
 43 | 
 44 |     assert sqids.encode([1000000, 2000000]) == "1aYeB7bRUt"
 45 |     assert sqids.decode("1aYeB7bRUt") == [1000000, 2000000]
 46 | 
 47 | 
 48 | def test_decoding_blocklist_words():
 49 |     sqids = Sqids(blocklist={"86Rf07", "se8ojk", "ARsz1p", "Q8AI49", "5sQRZO"})
 50 | 
 51 |     assert sqids.decode("86Rf07") == [1, 2, 3]
 52 |     assert sqids.decode("se8ojk") == [1, 2, 3]
 53 |     assert sqids.decode("ARsz1p") == [1, 2, 3]
 54 |     assert sqids.decode("Q8AI49") == [1, 2, 3]
 55 |     assert sqids.decode("5sQRZO") == [1, 2, 3]
 56 | 
 57 | 
 58 | def test_match_against_short_blocklist_word():
 59 |     sqids = Sqids(blocklist={"pnd"})
 60 | 
 61 |     assert sqids.decode(sqids.encode([1000])) == [1000]
 62 | 
 63 | 
 64 | def test_blocklist_filtering_in_constructor():
 65 |     # lowercase blocklist in only-uppercase alphabet
 66 |     sqids = Sqids(alphabet="ABCDEFGHIJKLMNOPQRSTUVWXYZ", blocklist={"sxnzkl"})
 67 | 
 68 |     id = sqids.encode([1, 2, 3])
 69 |     numbers = sqids.decode(id)
 70 | 
 71 |     # without blocklist, would've been "SXNZKL"
 72 |     assert id == "IBSHOZ"
 73 |     assert numbers == [1, 2, 3]
 74 | 
 75 | 
 76 | @pytest.mark.parametrize("word", ("ab!", "abc!", "xyz"))
 77 | def test_alphabet_is_not_superset_of_blocklist_word_characters(word):
 78 |     """Verify that a non-subset blocklist word is ignored."""
 79 | 
 80 |     sqids = Sqids(alphabet="abc", blocklist=[word])
 81 |     assert sqids._Sqids__blocklist_exact_match == set()
 82 |     assert sqids._Sqids__blocklist_match_at_ends == tuple()
 83 |     assert sqids._Sqids__blocklist_match_anywhere == set()
 84 | 
 85 | 
 86 | def test_max_encoding_attempts():
 87 |     alphabet = "abc"
 88 |     min_length = 3
 89 |     blocklist = {"cab", "abc", "bca"}
 90 | 
 91 |     sqids = Sqids(alphabet, min_length, blocklist)
 92 | 
 93 |     assert min_length == len(alphabet)
 94 |     assert min_length == len(blocklist)
 95 | 
 96 |     with pytest.raises(Exception):
 97 |         sqids.encode([0])
 98 | 
 99 | 
100 | def test_small_words_are_ignored():
101 |     """Blocklist words shorter than 3 characters must be ignored."""
102 | 
103 |     id_ = Sqids().encode([0])
104 |     assert id_ == "bM"
105 |     id_ = Sqids(blocklist=[id_]).encode([0])
106 |     assert id_ == "bM"
107 | 
108 | 
109 | def test_constants_file_is_pristine():
110 |     """Verify the constants file is pristine."""
111 | 
112 |     repo_root = pathlib.Path(__file__).parent.parent
113 |     sys.path.append(str(repo_root / "assets"))
114 |     import filter_blocklist
115 | 
116 |     sets = filter_blocklist.filter_blocklist()
117 |     new_text = filter_blocklist.generate_new_constants_file(*sets)
118 |     error_message = "You must run assets/filter_blocklist.py!"
119 |     assert filter_blocklist.constants_path.read_text() == new_text, error_message
120 | 


--------------------------------------------------------------------------------
/tests/test_minlength.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import sys
  3 | from sqids import Sqids
  4 | from sqids.constants import DEFAULT_ALPHABET
  5 | 
  6 | 
  7 | def test_simple():
  8 |     sqids = Sqids(min_length=len(DEFAULT_ALPHABET))
  9 | 
 10 |     numbers = [1, 2, 3]
 11 |     id_str = "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTM"
 12 | 
 13 |     assert sqids.encode(numbers) == id_str
 14 |     assert sqids.decode(id_str) == numbers
 15 | 
 16 | 
 17 | @pytest.mark.parametrize(
 18 |     "min_length, expected_id",
 19 |     (
 20 |         (6, "86Rf07"),
 21 |         (7, "86Rf07x"),
 22 |         (8, "86Rf07xd"),
 23 |         (9, "86Rf07xd4"),
 24 |         (10, "86Rf07xd4z"),
 25 |         (11, "86Rf07xd4zB"),
 26 |         (12, "86Rf07xd4zBm"),
 27 |         (13, "86Rf07xd4zBmi"),
 28 |         (
 29 |             len(DEFAULT_ALPHABET) + 0,
 30 |             "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTM",
 31 |         ),
 32 |         (
 33 |             len(DEFAULT_ALPHABET) + 1,
 34 |             "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTMy",
 35 |         ),
 36 |         (
 37 |             len(DEFAULT_ALPHABET) + 2,
 38 |             "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTMyf",
 39 |         ),
 40 |         (
 41 |             len(DEFAULT_ALPHABET) + 3,
 42 |             "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTMyf1",
 43 |         ),
 44 |     )
 45 | )
 46 | def test_incremental(min_length, expected_id):
 47 |     numbers = [1, 2, 3]
 48 |     sqids = Sqids(min_length=min_length)
 49 | 
 50 |     assert sqids.encode(numbers) == expected_id
 51 |     assert len(sqids.encode(numbers)) == min_length
 52 |     assert sqids.decode(expected_id) == numbers
 53 | 
 54 | 
 55 | @pytest.mark.parametrize(
 56 |     "id_str, numbers",
 57 |     (
 58 |         ("SvIzsqYMyQwI3GWgJAe17URxX8V924Co0DaTZLtFjHriEn5bPhcSkfmvOslpBu", [0, 0]),
 59 |         ("n3qafPOLKdfHpuNw3M61r95svbeJGk7aAEgYn4WlSjXURmF8IDqZBy0CT2VxQc", [0, 1]),
 60 |         ("tryFJbWcFMiYPg8sASm51uIV93GXTnvRzyfLleh06CpodJD42B7OraKtkQNxUZ", [0, 2]),
 61 |         ("eg6ql0A3XmvPoCzMlB6DraNGcWSIy5VR8iYup2Qk4tjZFKe1hbwfgHdUTsnLqE", [0, 3]),
 62 |         ("rSCFlp0rB2inEljaRdxKt7FkIbODSf8wYgTsZM1HL9JzN35cyoqueUvVWCm4hX", [0, 4]),
 63 |         ("sR8xjC8WQkOwo74PnglH1YFdTI0eaf56RGVSitzbjuZ3shNUXBrqLxEJyAmKv2", [0, 5]),
 64 |         ("uY2MYFqCLpgx5XQcjdtZK286AwWV7IBGEfuS9yTmbJvkzoUPeYRHr4iDs3naN0", [0, 6]),
 65 |         ("74dID7X28VLQhBlnGmjZrec5wTA1fqpWtK4YkaoEIM9SRNiC3gUJH0OFvsPDdy", [0, 7]),
 66 |         ("30WXpesPhgKiEI5RHTY7xbB1GnytJvXOl2p0AcUjdF6waZDo9Qk8VLzMuWrqCS", [0, 8]),
 67 |         ("moxr3HqLAK0GsTND6jowfZz3SUx7cQ8aC54Pl1RbIvFXmEJuBMYVeW9yrdOtin", [0, 9]),
 68 |     )
 69 | )
 70 | def test_incremental_numbers(id_str, numbers):
 71 |     sqids = Sqids(min_length=len(DEFAULT_ALPHABET))
 72 | 
 73 |     assert sqids.encode(numbers) == id_str
 74 |     assert sqids.decode(id_str) == numbers
 75 | 
 76 | 
 77 | @pytest.mark.parametrize("min_length", (0, 1, 5, 10, len(DEFAULT_ALPHABET)))
 78 | @pytest.mark.parametrize(
 79 |     "numbers",
 80 |     (
 81 |         [0],
 82 |         [0, 0, 0, 0, 0],
 83 |         [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 84 |         [100, 200, 300],
 85 |         [1000, 2000, 3000],
 86 |         [1000000],
 87 |         [sys.maxsize],
 88 |     ),
 89 | )
 90 | def test_min_lengths(min_length, numbers):
 91 |     sqids = Sqids(min_length=min_length)
 92 | 
 93 |     id_str = sqids.encode(numbers)
 94 |     assert len(id_str) >= min_length
 95 |     assert sqids.decode(id_str) == numbers
 96 | 
 97 | 
 98 | def test_out_of_range_invalid_min_length():
 99 |     with pytest.raises(ValueError):
100 |         Sqids(min_length=-1)
101 | 
102 |     with pytest.raises(ValueError):
103 |         Sqids(min_length=256)
104 | 
105 | 
106 | @pytest.mark.parametrize("min_length", ("bogus", {}, True))
107 | def test_min_length_type(min_length):
108 |     """Verify that non-integer min_length values are rejected.
109 | 
110 |     `True` is a unique case; Python bools are subclasses of int.
111 |     """
112 | 
113 |     with pytest.raises(TypeError, match="must be an int"):
114 |         Sqids(min_length=min_length)
115 | 


--------------------------------------------------------------------------------
/sqids/sqids.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Sequence, Set, Tuple
  2 | import sys
  3 | from .constants import (
  4 |     DEFAULT_ALPHABET,
  5 |     DEFAULT_BLOCKLIST,
  6 |     DEFAULT_MIN_LENGTH,
  7 |     _exact_match,
  8 |     _match_at_ends,
  9 |     _match_anywhere,
 10 | )
 11 | 
 12 | DIGITS = set("0123456789")
 13 | 
 14 | 
 15 | class Sqids:
 16 |     def __init__(
 17 |         self,
 18 |         alphabet: str = DEFAULT_ALPHABET,
 19 |         min_length: int = DEFAULT_MIN_LENGTH,
 20 |         blocklist: List[str] = DEFAULT_BLOCKLIST,
 21 |     ):
 22 |         if any(ord(char) > 127 for char in alphabet):
 23 |             raise ValueError("Alphabet cannot contain multibyte characters")
 24 | 
 25 |         if len(alphabet) < 3:
 26 |             raise ValueError("Alphabet length must be at least 3")
 27 | 
 28 |         if len(set(alphabet)) != len(alphabet):
 29 |             raise ValueError("Alphabet must contain unique characters")
 30 | 
 31 |         if not isinstance(min_length, int) or isinstance(min_length, bool):
 32 |             raise TypeError("Minimum length must be an int")
 33 | 
 34 |         MIN_LENGTH_LIMIT = 255
 35 |         if min_length < 0 or min_length > MIN_LENGTH_LIMIT:
 36 |             raise ValueError(
 37 |                 f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}"
 38 |             )
 39 | 
 40 |         # When the blocklist and alphabet are defaults, use pre-filtered blocklists.
 41 |         if blocklist is DEFAULT_BLOCKLIST and alphabet is DEFAULT_ALPHABET:
 42 |             self.__blocklist_exact_match: Set[str] = _exact_match
 43 |             self.__blocklist_match_at_ends: Tuple[str, ...] = _match_at_ends
 44 |             self.__blocklist_match_anywhere: Set[str] = _match_anywhere
 45 |         else:
 46 |             alphabet_lower = set(alphabet.lower())
 47 |             exact_match: Set[str] = set()
 48 |             match_at_ends: Set[str] = set()
 49 |             match_anywhere: Set[str] = set()
 50 |             for word in blocklist:
 51 |                 if len(word) < 3:
 52 |                     continue
 53 |                 word_lower = word.lower()
 54 |                 word_lower_set = set(word_lower)
 55 |                 if word_lower_set & alphabet_lower != word_lower_set:
 56 |                     continue
 57 | 
 58 |                 if len(word) == 3:
 59 |                     exact_match.add(word.lower())
 60 |                 elif word_lower_set & DIGITS:
 61 |                     match_at_ends.add(word_lower)
 62 |                 else:
 63 |                     match_anywhere.add(word_lower)
 64 | 
 65 |             self.__blocklist_exact_match = exact_match
 66 |             # When matching at the ends, `.startswith()` and `.endswith()` need a tuple.
 67 |             self.__blocklist_match_at_ends = tuple(match_at_ends)
 68 |             self.__blocklist_match_anywhere = match_anywhere
 69 | 
 70 |         self.__alphabet = self.__shuffle(alphabet)
 71 |         self.__min_length = min_length
 72 | 
 73 |     def encode(self, numbers: Sequence[int]) -> str:
 74 |         if not numbers:
 75 |             return ""
 76 | 
 77 |         if not all(0 <= number <= sys.maxsize for number in numbers):
 78 |             raise ValueError(f"Encoding supports numbers between 0 and {sys.maxsize}")
 79 | 
 80 |         return self.__encode_numbers(numbers, 0)
 81 | 
 82 |     def __encode_numbers(self, numbers: Sequence[int], increment: int = 0) -> str:
 83 |         if increment > len(self.__alphabet):
 84 |             raise ValueError("Reached max attempts to re-generate the ID")
 85 | 
 86 |         offset = sum(
 87 |             (
 88 |                 ord(self.__alphabet[v % len(self.__alphabet)]) + i
 89 |                 for i, v in enumerate(numbers)
 90 |             )
 91 |         )
 92 |         offset = (offset + len(numbers)) % len(self.__alphabet)
 93 |         offset = (offset + increment) % len(self.__alphabet)
 94 |         alphabet = self.__alphabet[offset:] + self.__alphabet[:offset]
 95 |         prefix = alphabet[0]
 96 |         alphabet = alphabet[::-1]
 97 | 
 98 |         ret = [prefix]
 99 | 
100 |         for i, num in enumerate(numbers):
101 |             ret.append(self.__to_id(num, alphabet[1:]))
102 | 
103 |             if i >= len(numbers) - 1:
104 |                 continue
105 | 
106 |             ret.append(alphabet[0])
107 |             alphabet = self.__shuffle(alphabet)
108 | 
109 |         id_ = "".join(ret)
110 | 
111 |         if self.__min_length > len(id_):
112 |             id_ += alphabet[0]
113 | 
114 |             while self.__min_length - len(id_) > 0:
115 |                 alphabet = self.__shuffle(alphabet)
116 |                 id_ += alphabet[: min(self.__min_length - len(id_), len(alphabet))]
117 | 
118 |         if len(id_) >= 3 and self.__is_blocked_id(id_):
119 |             id_ = self.__encode_numbers(numbers, increment + 1)
120 | 
121 |         return id_
122 | 
123 |     def decode(self, id_: str) -> List[int]:
124 |         ret: List[int] = []
125 | 
126 |         if not id_:
127 |             return ret
128 | 
129 |         alphabet_chars = list(self.__alphabet)
130 |         if any(c not in alphabet_chars for c in id_):
131 |             return ret
132 | 
133 |         prefix = id_[0]
134 |         offset = self.__alphabet.index(prefix)
135 |         alphabet = self.__alphabet[offset:] + self.__alphabet[:offset]
136 |         alphabet = alphabet[::-1]
137 |         id_ = id_[1:]
138 | 
139 |         while id_:
140 |             separator = alphabet[0]
141 |             chunks = id_.split(separator)
142 |             if not chunks[0]:
143 |                 return ret
144 | 
145 |             ret.append(self.__to_number(chunks[0], alphabet[1:]))
146 |             if len(chunks) > 1:
147 |                 alphabet = self.__shuffle(alphabet)
148 | 
149 |             id_ = separator.join(chunks[1:])
150 | 
151 |         return ret
152 | 
153 |     def __shuffle(self, alphabet: str) -> str:
154 |         chars = list(alphabet)
155 | 
156 |         i = 0
157 |         j = len(chars) - 1
158 |         while j > 0:
159 |             r = (i * j + ord(chars[i]) + ord(chars[j])) % len(chars)
160 |             chars[i], chars[r] = chars[r], chars[i]
161 |             i += 1
162 |             j -= 1
163 | 
164 |         return "".join(chars)
165 | 
166 |     def __to_id(self, num: int, alphabet: str) -> str:
167 |         id_chars: List[str] = []
168 |         result = num
169 |         alphabet_length = len(alphabet)
170 | 
171 |         while True:
172 |             id_chars.insert(0, alphabet[result % alphabet_length])
173 |             result = result // alphabet_length
174 |             if not result:
175 |                 break
176 | 
177 |         return "".join(id_chars)
178 | 
179 |     def __to_number(self, id_: str, alphabet: str) -> int:
180 |         chars = list(alphabet)
181 |         return sum(chars.index(c) * (len(chars) ** i) for i, c in enumerate(id_[::-1]))
182 | 
183 |     def __is_blocked_id(self, id_: str) -> bool:
184 |         id_ = id_.lower()
185 | 
186 |         if len(id_) == 3:
187 |             return id_ in self.__blocklist_exact_match
188 | 
189 |         if (
190 |             id_.startswith(self.__blocklist_match_at_ends)
191 |             or id_.endswith(self.__blocklist_match_at_ends)
192 |         ):
193 |             return True
194 | 
195 |         for word in self.__blocklist_match_anywhere:
196 |             if word in id_:
197 |                 return True
198 | 
199 |         return False
200 | 


--------------------------------------------------------------------------------
/sqids/constants.py:
--------------------------------------------------------------------------------
   1 | DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
   2 | DEFAULT_MIN_LENGTH = 0
   3 | 
   4 | # =======
   5 | #  NOTE
   6 | # =======
   7 | #
   8 | # When updating the blocklist, run assets/filter_blocklist.py to pre-filter constants.
   9 | # This is critical for performance.
  10 | #
  11 | 
  12 | DEFAULT_BLOCKLIST = [
  13 |     "0rgasm",
  14 |     "1d10t",
  15 |     "1d1ot",
  16 |     "1di0t",
  17 |     "1diot",
  18 |     "1eccacu10",
  19 |     "1eccacu1o",
  20 |     "1eccacul0",
  21 |     "1eccaculo",
  22 |     "1mbec11e",
  23 |     "1mbec1le",
  24 |     "1mbeci1e",
  25 |     "1mbecile",
  26 |     "a11upat0",
  27 |     "a11upato",
  28 |     "a1lupat0",
  29 |     "a1lupato",
  30 |     "aand",
  31 |     "ah01e",
  32 |     "ah0le",
  33 |     "aho1e",
  34 |     "ahole",
  35 |     "al1upat0",
  36 |     "al1upato",
  37 |     "allupat0",
  38 |     "allupato",
  39 |     "ana1",
  40 |     "ana1e",
  41 |     "anal",
  42 |     "anale",
  43 |     "anus",
  44 |     "arrapat0",
  45 |     "arrapato",
  46 |     "arsch",
  47 |     "arse",
  48 |     "ass",
  49 |     "b00b",
  50 |     "b00be",
  51 |     "b01ata",
  52 |     "b0ceta",
  53 |     "b0iata",
  54 |     "b0ob",
  55 |     "b0obe",
  56 |     "b0sta",
  57 |     "b1tch",
  58 |     "b1te",
  59 |     "b1tte",
  60 |     "ba1atkar",
  61 |     "balatkar",
  62 |     "bastard0",
  63 |     "bastardo",
  64 |     "batt0na",
  65 |     "battona",
  66 |     "bitch",
  67 |     "bite",
  68 |     "bitte",
  69 |     "bo0b",
  70 |     "bo0be",
  71 |     "bo1ata",
  72 |     "boceta",
  73 |     "boiata",
  74 |     "boob",
  75 |     "boobe",
  76 |     "bosta",
  77 |     "bran1age",
  78 |     "bran1er",
  79 |     "bran1ette",
  80 |     "bran1eur",
  81 |     "bran1euse",
  82 |     "branlage",
  83 |     "branler",
  84 |     "branlette",
  85 |     "branleur",
  86 |     "branleuse",
  87 |     "c0ck",
  88 |     "c0g110ne",
  89 |     "c0g11one",
  90 |     "c0g1i0ne",
  91 |     "c0g1ione",
  92 |     "c0gl10ne",
  93 |     "c0gl1one",
  94 |     "c0gli0ne",
  95 |     "c0glione",
  96 |     "c0na",
  97 |     "c0nnard",
  98 |     "c0nnasse",
  99 |     "c0nne",
 100 |     "c0u111es",
 101 |     "c0u11les",
 102 |     "c0u1l1es",
 103 |     "c0u1lles",
 104 |     "c0ui11es",
 105 |     "c0ui1les",
 106 |     "c0uil1es",
 107 |     "c0uilles",
 108 |     "c11t",
 109 |     "c11t0",
 110 |     "c11to",
 111 |     "c1it",
 112 |     "c1it0",
 113 |     "c1ito",
 114 |     "cabr0n",
 115 |     "cabra0",
 116 |     "cabrao",
 117 |     "cabron",
 118 |     "caca",
 119 |     "cacca",
 120 |     "cacete",
 121 |     "cagante",
 122 |     "cagar",
 123 |     "cagare",
 124 |     "cagna",
 125 |     "cara1h0",
 126 |     "cara1ho",
 127 |     "caracu10",
 128 |     "caracu1o",
 129 |     "caracul0",
 130 |     "caraculo",
 131 |     "caralh0",
 132 |     "caralho",
 133 |     "cazz0",
 134 |     "cazz1mma",
 135 |     "cazzata",
 136 |     "cazzimma",
 137 |     "cazzo",
 138 |     "ch00t1a",
 139 |     "ch00t1ya",
 140 |     "ch00tia",
 141 |     "ch00tiya",
 142 |     "ch0d",
 143 |     "ch0ot1a",
 144 |     "ch0ot1ya",
 145 |     "ch0otia",
 146 |     "ch0otiya",
 147 |     "ch1asse",
 148 |     "ch1avata",
 149 |     "ch1er",
 150 |     "ch1ng0",
 151 |     "ch1ngadaz0s",
 152 |     "ch1ngadazos",
 153 |     "ch1ngader1ta",
 154 |     "ch1ngaderita",
 155 |     "ch1ngar",
 156 |     "ch1ngo",
 157 |     "ch1ngues",
 158 |     "ch1nk",
 159 |     "chatte",
 160 |     "chiasse",
 161 |     "chiavata",
 162 |     "chier",
 163 |     "ching0",
 164 |     "chingadaz0s",
 165 |     "chingadazos",
 166 |     "chingader1ta",
 167 |     "chingaderita",
 168 |     "chingar",
 169 |     "chingo",
 170 |     "chingues",
 171 |     "chink",
 172 |     "cho0t1a",
 173 |     "cho0t1ya",
 174 |     "cho0tia",
 175 |     "cho0tiya",
 176 |     "chod",
 177 |     "choot1a",
 178 |     "choot1ya",
 179 |     "chootia",
 180 |     "chootiya",
 181 |     "cl1t",
 182 |     "cl1t0",
 183 |     "cl1to",
 184 |     "clit",
 185 |     "clit0",
 186 |     "clito",
 187 |     "cock",
 188 |     "cog110ne",
 189 |     "cog11one",
 190 |     "cog1i0ne",
 191 |     "cog1ione",
 192 |     "cogl10ne",
 193 |     "cogl1one",
 194 |     "cogli0ne",
 195 |     "coglione",
 196 |     "cona",
 197 |     "connard",
 198 |     "connasse",
 199 |     "conne",
 200 |     "cou111es",
 201 |     "cou11les",
 202 |     "cou1l1es",
 203 |     "cou1lles",
 204 |     "coui11es",
 205 |     "coui1les",
 206 |     "couil1es",
 207 |     "couilles",
 208 |     "cracker",
 209 |     "crap",
 210 |     "cu10",
 211 |     "cu1att0ne",
 212 |     "cu1attone",
 213 |     "cu1er0",
 214 |     "cu1ero",
 215 |     "cu1o",
 216 |     "cul0",
 217 |     "culatt0ne",
 218 |     "culattone",
 219 |     "culer0",
 220 |     "culero",
 221 |     "culo",
 222 |     "cum",
 223 |     "cunt",
 224 |     "d11d0",
 225 |     "d11do",
 226 |     "d1ck",
 227 |     "d1ld0",
 228 |     "d1ldo",
 229 |     "damn",
 230 |     "de1ch",
 231 |     "deich",
 232 |     "depp",
 233 |     "di1d0",
 234 |     "di1do",
 235 |     "dick",
 236 |     "dild0",
 237 |     "dildo",
 238 |     "dyke",
 239 |     "encu1e",
 240 |     "encule",
 241 |     "enema",
 242 |     "enf01re",
 243 |     "enf0ire",
 244 |     "enfo1re",
 245 |     "enfoire",
 246 |     "estup1d0",
 247 |     "estup1do",
 248 |     "estupid0",
 249 |     "estupido",
 250 |     "etr0n",
 251 |     "etron",
 252 |     "f0da",
 253 |     "f0der",
 254 |     "f0ttere",
 255 |     "f0tters1",
 256 |     "f0ttersi",
 257 |     "f0tze",
 258 |     "f0utre",
 259 |     "f1ca",
 260 |     "f1cker",
 261 |     "f1ga",
 262 |     "fag",
 263 |     "fica",
 264 |     "ficker",
 265 |     "figa",
 266 |     "foda",
 267 |     "foder",
 268 |     "fottere",
 269 |     "fotters1",
 270 |     "fottersi",
 271 |     "fotze",
 272 |     "foutre",
 273 |     "fr0c10",
 274 |     "fr0c1o",
 275 |     "fr0ci0",
 276 |     "fr0cio",
 277 |     "fr0sc10",
 278 |     "fr0sc1o",
 279 |     "fr0sci0",
 280 |     "fr0scio",
 281 |     "froc10",
 282 |     "froc1o",
 283 |     "froci0",
 284 |     "frocio",
 285 |     "frosc10",
 286 |     "frosc1o",
 287 |     "frosci0",
 288 |     "froscio",
 289 |     "fuck",
 290 |     "g00",
 291 |     "g0o",
 292 |     "g0u1ne",
 293 |     "g0uine",
 294 |     "gandu",
 295 |     "go0",
 296 |     "goo",
 297 |     "gou1ne",
 298 |     "gouine",
 299 |     "gr0gnasse",
 300 |     "grognasse",
 301 |     "haram1",
 302 |     "harami",
 303 |     "haramzade",
 304 |     "hund1n",
 305 |     "hundin",
 306 |     "id10t",
 307 |     "id1ot",
 308 |     "idi0t",
 309 |     "idiot",
 310 |     "imbec11e",
 311 |     "imbec1le",
 312 |     "imbeci1e",
 313 |     "imbecile",
 314 |     "j1zz",
 315 |     "jerk",
 316 |     "jizz",
 317 |     "k1ke",
 318 |     "kam1ne",
 319 |     "kamine",
 320 |     "kike",
 321 |     "leccacu10",
 322 |     "leccacu1o",
 323 |     "leccacul0",
 324 |     "leccaculo",
 325 |     "m1erda",
 326 |     "m1gn0tta",
 327 |     "m1gnotta",
 328 |     "m1nch1a",
 329 |     "m1nchia",
 330 |     "m1st",
 331 |     "mam0n",
 332 |     "mamahuev0",
 333 |     "mamahuevo",
 334 |     "mamon",
 335 |     "masturbat10n",
 336 |     "masturbat1on",
 337 |     "masturbate",
 338 |     "masturbati0n",
 339 |     "masturbation",
 340 |     "merd0s0",
 341 |     "merd0so",
 342 |     "merda",
 343 |     "merde",
 344 |     "merdos0",
 345 |     "merdoso",
 346 |     "mierda",
 347 |     "mign0tta",
 348 |     "mignotta",
 349 |     "minch1a",
 350 |     "minchia",
 351 |     "mist",
 352 |     "musch1",
 353 |     "muschi",
 354 |     "n1gger",
 355 |     "neger",
 356 |     "negr0",
 357 |     "negre",
 358 |     "negro",
 359 |     "nerch1a",
 360 |     "nerchia",
 361 |     "nigger",
 362 |     "orgasm",
 363 |     "p00p",
 364 |     "p011a",
 365 |     "p01la",
 366 |     "p0l1a",
 367 |     "p0lla",
 368 |     "p0mp1n0",
 369 |     "p0mp1no",
 370 |     "p0mpin0",
 371 |     "p0mpino",
 372 |     "p0op",
 373 |     "p0rca",
 374 |     "p0rn",
 375 |     "p0rra",
 376 |     "p0uff1asse",
 377 |     "p0uffiasse",
 378 |     "p1p1",
 379 |     "p1pi",
 380 |     "p1r1a",
 381 |     "p1rla",
 382 |     "p1sc10",
 383 |     "p1sc1o",
 384 |     "p1sci0",
 385 |     "p1scio",
 386 |     "p1sser",
 387 |     "pa11e",
 388 |     "pa1le",
 389 |     "pal1e",
 390 |     "palle",
 391 |     "pane1e1r0",
 392 |     "pane1e1ro",
 393 |     "pane1eir0",
 394 |     "pane1eiro",
 395 |     "panele1r0",
 396 |     "panele1ro",
 397 |     "paneleir0",
 398 |     "paneleiro",
 399 |     "patakha",
 400 |     "pec0r1na",
 401 |     "pec0rina",
 402 |     "pecor1na",
 403 |     "pecorina",
 404 |     "pen1s",
 405 |     "pendej0",
 406 |     "pendejo",
 407 |     "penis",
 408 |     "pip1",
 409 |     "pipi",
 410 |     "pir1a",
 411 |     "pirla",
 412 |     "pisc10",
 413 |     "pisc1o",
 414 |     "pisci0",
 415 |     "piscio",
 416 |     "pisser",
 417 |     "po0p",
 418 |     "po11a",
 419 |     "po1la",
 420 |     "pol1a",
 421 |     "polla",
 422 |     "pomp1n0",
 423 |     "pomp1no",
 424 |     "pompin0",
 425 |     "pompino",
 426 |     "poop",
 427 |     "porca",
 428 |     "porn",
 429 |     "porra",
 430 |     "pouff1asse",
 431 |     "pouffiasse",
 432 |     "pr1ck",
 433 |     "prick",
 434 |     "pussy",
 435 |     "put1za",
 436 |     "puta",
 437 |     "puta1n",
 438 |     "putain",
 439 |     "pute",
 440 |     "putiza",
 441 |     "puttana",
 442 |     "queca",
 443 |     "r0mp1ba11e",
 444 |     "r0mp1ba1le",
 445 |     "r0mp1bal1e",
 446 |     "r0mp1balle",
 447 |     "r0mpiba11e",
 448 |     "r0mpiba1le",
 449 |     "r0mpibal1e",
 450 |     "r0mpiballe",
 451 |     "rand1",
 452 |     "randi",
 453 |     "rape",
 454 |     "recch10ne",
 455 |     "recch1one",
 456 |     "recchi0ne",
 457 |     "recchione",
 458 |     "retard",
 459 |     "romp1ba11e",
 460 |     "romp1ba1le",
 461 |     "romp1bal1e",
 462 |     "romp1balle",
 463 |     "rompiba11e",
 464 |     "rompiba1le",
 465 |     "rompibal1e",
 466 |     "rompiballe",
 467 |     "ruff1an0",
 468 |     "ruff1ano",
 469 |     "ruffian0",
 470 |     "ruffiano",
 471 |     "s1ut",
 472 |     "sa10pe",
 473 |     "sa1aud",
 474 |     "sa1ope",
 475 |     "sacanagem",
 476 |     "sal0pe",
 477 |     "salaud",
 478 |     "salope",
 479 |     "saugnapf",
 480 |     "sb0rr0ne",
 481 |     "sb0rra",
 482 |     "sb0rrone",
 483 |     "sbattere",
 484 |     "sbatters1",
 485 |     "sbattersi",
 486 |     "sborr0ne",
 487 |     "sborra",
 488 |     "sborrone",
 489 |     "sc0pare",
 490 |     "sc0pata",
 491 |     "sch1ampe",
 492 |     "sche1se",
 493 |     "sche1sse",
 494 |     "scheise",
 495 |     "scheisse",
 496 |     "schlampe",
 497 |     "schwachs1nn1g",
 498 |     "schwachs1nnig",
 499 |     "schwachsinn1g",
 500 |     "schwachsinnig",
 501 |     "schwanz",
 502 |     "scopare",
 503 |     "scopata",
 504 |     "sexy",
 505 |     "sh1t",
 506 |     "shit",
 507 |     "slut",
 508 |     "sp0mp1nare",
 509 |     "sp0mpinare",
 510 |     "spomp1nare",
 511 |     "spompinare",
 512 |     "str0nz0",
 513 |     "str0nza",
 514 |     "str0nzo",
 515 |     "stronz0",
 516 |     "stronza",
 517 |     "stronzo",
 518 |     "stup1d",
 519 |     "stupid",
 520 |     "succh1am1",
 521 |     "succh1ami",
 522 |     "succhiam1",
 523 |     "succhiami",
 524 |     "sucker",
 525 |     "t0pa",
 526 |     "tapette",
 527 |     "test1c1e",
 528 |     "test1cle",
 529 |     "testic1e",
 530 |     "testicle",
 531 |     "tette",
 532 |     "topa",
 533 |     "tr01a",
 534 |     "tr0ia",
 535 |     "tr0mbare",
 536 |     "tr1ng1er",
 537 |     "tr1ngler",
 538 |     "tring1er",
 539 |     "tringler",
 540 |     "tro1a",
 541 |     "troia",
 542 |     "trombare",
 543 |     "turd",
 544 |     "twat",
 545 |     "vaffancu10",
 546 |     "vaffancu1o",
 547 |     "vaffancul0",
 548 |     "vaffanculo",
 549 |     "vag1na",
 550 |     "vagina",
 551 |     "verdammt",
 552 |     "verga",
 553 |     "w1chsen",
 554 |     "wank",
 555 |     "wichsen",
 556 |     "x0ch0ta",
 557 |     "x0chota",
 558 |     "xana",
 559 |     "xoch0ta",
 560 |     "xochota",
 561 |     "z0cc01a",
 562 |     "z0cc0la",
 563 |     "z0cco1a",
 564 |     "z0ccola",
 565 |     "z1z1",
 566 |     "z1zi",
 567 |     "ziz1",
 568 |     "zizi",
 569 |     "zocc01a",
 570 |     "zocc0la",
 571 |     "zocco1a",
 572 |     "zoccola",
 573 | ]
 574 | 
 575 | _exact_match = {
 576 |     "ass",
 577 |     "cum",
 578 |     "fag",
 579 |     "g00",
 580 |     "g0o",
 581 |     "go0",
 582 |     "goo",
 583 | }
 584 | 
 585 | _match_at_ends = (
 586 |     "0rgasm",
 587 |     "1d10t",
 588 |     "1d1ot",
 589 |     "1di0t",
 590 |     "1diot",
 591 |     "1eccacu10",
 592 |     "1eccacu1o",
 593 |     "1eccacul0",
 594 |     "1eccaculo",
 595 |     "1mbec11e",
 596 |     "1mbec1le",
 597 |     "1mbeci1e",
 598 |     "1mbecile",
 599 |     "a11upat0",
 600 |     "a11upato",
 601 |     "a1lupat0",
 602 |     "a1lupato",
 603 |     "ah01e",
 604 |     "ah0le",
 605 |     "aho1e",
 606 |     "al1upat0",
 607 |     "al1upato",
 608 |     "allupat0",
 609 |     "ana1",
 610 |     "ana1e",
 611 |     "arrapat0",
 612 |     "b00b",
 613 |     "b00be",
 614 |     "b01ata",
 615 |     "b0ceta",
 616 |     "b0iata",
 617 |     "b0ob",
 618 |     "b0obe",
 619 |     "b0sta",
 620 |     "b1tch",
 621 |     "b1te",
 622 |     "b1tte",
 623 |     "ba1atkar",
 624 |     "bastard0",
 625 |     "batt0na",
 626 |     "bo0b",
 627 |     "bo0be",
 628 |     "bo1ata",
 629 |     "bran1age",
 630 |     "bran1er",
 631 |     "bran1ette",
 632 |     "bran1eur",
 633 |     "bran1euse",
 634 |     "c0ck",
 635 |     "c0g110ne",
 636 |     "c0g11one",
 637 |     "c0g1i0ne",
 638 |     "c0g1ione",
 639 |     "c0gl10ne",
 640 |     "c0gl1one",
 641 |     "c0gli0ne",
 642 |     "c0glione",
 643 |     "c0na",
 644 |     "c0nnard",
 645 |     "c0nnasse",
 646 |     "c0nne",
 647 |     "c0u111es",
 648 |     "c0u11les",
 649 |     "c0u1l1es",
 650 |     "c0u1lles",
 651 |     "c0ui11es",
 652 |     "c0ui1les",
 653 |     "c0uil1es",
 654 |     "c0uilles",
 655 |     "c11t",
 656 |     "c11t0",
 657 |     "c11to",
 658 |     "c1it",
 659 |     "c1it0",
 660 |     "c1ito",
 661 |     "cabr0n",
 662 |     "cabra0",
 663 |     "cara1h0",
 664 |     "cara1ho",
 665 |     "caracu10",
 666 |     "caracu1o",
 667 |     "caracul0",
 668 |     "caralh0",
 669 |     "cazz0",
 670 |     "cazz1mma",
 671 |     "ch00t1a",
 672 |     "ch00t1ya",
 673 |     "ch00tia",
 674 |     "ch00tiya",
 675 |     "ch0d",
 676 |     "ch0ot1a",
 677 |     "ch0ot1ya",
 678 |     "ch0otia",
 679 |     "ch0otiya",
 680 |     "ch1asse",
 681 |     "ch1avata",
 682 |     "ch1er",
 683 |     "ch1ng0",
 684 |     "ch1ngadaz0s",
 685 |     "ch1ngadazos",
 686 |     "ch1ngader1ta",
 687 |     "ch1ngaderita",
 688 |     "ch1ngar",
 689 |     "ch1ngo",
 690 |     "ch1ngues",
 691 |     "ch1nk",
 692 |     "ching0",
 693 |     "chingadaz0s",
 694 |     "chingader1ta",
 695 |     "cho0t1a",
 696 |     "cho0t1ya",
 697 |     "cho0tia",
 698 |     "cho0tiya",
 699 |     "choot1a",
 700 |     "choot1ya",
 701 |     "cl1t",
 702 |     "cl1t0",
 703 |     "cl1to",
 704 |     "clit0",
 705 |     "cog110ne",
 706 |     "cog11one",
 707 |     "cog1i0ne",
 708 |     "cog1ione",
 709 |     "cogl10ne",
 710 |     "cogl1one",
 711 |     "cogli0ne",
 712 |     "cou111es",
 713 |     "cou11les",
 714 |     "cou1l1es",
 715 |     "cou1lles",
 716 |     "coui11es",
 717 |     "coui1les",
 718 |     "couil1es",
 719 |     "cu10",
 720 |     "cu1att0ne",
 721 |     "cu1attone",
 722 |     "cu1er0",
 723 |     "cu1ero",
 724 |     "cu1o",
 725 |     "cul0",
 726 |     "culatt0ne",
 727 |     "culer0",
 728 |     "d11d0",
 729 |     "d11do",
 730 |     "d1ck",
 731 |     "d1ld0",
 732 |     "d1ldo",
 733 |     "de1ch",
 734 |     "di1d0",
 735 |     "di1do",
 736 |     "dild0",
 737 |     "encu1e",
 738 |     "enf01re",
 739 |     "enf0ire",
 740 |     "enfo1re",
 741 |     "estup1d0",
 742 |     "estup1do",
 743 |     "estupid0",
 744 |     "etr0n",
 745 |     "f0da",
 746 |     "f0der",
 747 |     "f0ttere",
 748 |     "f0tters1",
 749 |     "f0ttersi",
 750 |     "f0tze",
 751 |     "f0utre",
 752 |     "f1ca",
 753 |     "f1cker",
 754 |     "f1ga",
 755 |     "fotters1",
 756 |     "fr0c10",
 757 |     "fr0c1o",
 758 |     "fr0ci0",
 759 |     "fr0cio",
 760 |     "fr0sc10",
 761 |     "fr0sc1o",
 762 |     "fr0sci0",
 763 |     "fr0scio",
 764 |     "froc10",
 765 |     "froc1o",
 766 |     "froci0",
 767 |     "frosc10",
 768 |     "frosc1o",
 769 |     "frosci0",
 770 |     "g0u1ne",
 771 |     "g0uine",
 772 |     "gou1ne",
 773 |     "gr0gnasse",
 774 |     "haram1",
 775 |     "hund1n",
 776 |     "id10t",
 777 |     "id1ot",
 778 |     "idi0t",
 779 |     "imbec11e",
 780 |     "imbec1le",
 781 |     "imbeci1e",
 782 |     "j1zz",
 783 |     "k1ke",
 784 |     "kam1ne",
 785 |     "leccacu10",
 786 |     "leccacu1o",
 787 |     "leccacul0",
 788 |     "m1erda",
 789 |     "m1gn0tta",
 790 |     "m1gnotta",
 791 |     "m1nch1a",
 792 |     "m1nchia",
 793 |     "m1st",
 794 |     "mam0n",
 795 |     "mamahuev0",
 796 |     "masturbat10n",
 797 |     "masturbat1on",
 798 |     "masturbati0n",
 799 |     "merd0s0",
 800 |     "merd0so",
 801 |     "merdos0",
 802 |     "mign0tta",
 803 |     "minch1a",
 804 |     "musch1",
 805 |     "n1gger",
 806 |     "negr0",
 807 |     "nerch1a",
 808 |     "p00p",
 809 |     "p011a",
 810 |     "p01la",
 811 |     "p0l1a",
 812 |     "p0lla",
 813 |     "p0mp1n0",
 814 |     "p0mp1no",
 815 |     "p0mpin0",
 816 |     "p0mpino",
 817 |     "p0op",
 818 |     "p0rca",
 819 |     "p0rn",
 820 |     "p0rra",
 821 |     "p0uff1asse",
 822 |     "p0uffiasse",
 823 |     "p1p1",
 824 |     "p1pi",
 825 |     "p1r1a",
 826 |     "p1rla",
 827 |     "p1sc10",
 828 |     "p1sc1o",
 829 |     "p1sci0",
 830 |     "p1scio",
 831 |     "p1sser",
 832 |     "pa11e",
 833 |     "pa1le",
 834 |     "pal1e",
 835 |     "pane1e1r0",
 836 |     "pane1e1ro",
 837 |     "pane1eir0",
 838 |     "pane1eiro",
 839 |     "panele1r0",
 840 |     "panele1ro",
 841 |     "paneleir0",
 842 |     "pec0r1na",
 843 |     "pec0rina",
 844 |     "pecor1na",
 845 |     "pen1s",
 846 |     "pendej0",
 847 |     "pip1",
 848 |     "pir1a",
 849 |     "pisc10",
 850 |     "pisc1o",
 851 |     "pisci0",
 852 |     "po0p",
 853 |     "po11a",
 854 |     "po1la",
 855 |     "pol1a",
 856 |     "pomp1n0",
 857 |     "pomp1no",
 858 |     "pompin0",
 859 |     "pouff1asse",
 860 |     "pr1ck",
 861 |     "put1za",
 862 |     "puta1n",
 863 |     "r0mp1ba11e",
 864 |     "r0mp1ba1le",
 865 |     "r0mp1bal1e",
 866 |     "r0mp1balle",
 867 |     "r0mpiba11e",
 868 |     "r0mpiba1le",
 869 |     "r0mpibal1e",
 870 |     "r0mpiballe",
 871 |     "rand1",
 872 |     "recch10ne",
 873 |     "recch1one",
 874 |     "recchi0ne",
 875 |     "romp1ba11e",
 876 |     "romp1ba1le",
 877 |     "romp1bal1e",
 878 |     "romp1balle",
 879 |     "rompiba11e",
 880 |     "rompiba1le",
 881 |     "rompibal1e",
 882 |     "ruff1an0",
 883 |     "ruff1ano",
 884 |     "ruffian0",
 885 |     "s1ut",
 886 |     "sa10pe",
 887 |     "sa1aud",
 888 |     "sa1ope",
 889 |     "sal0pe",
 890 |     "sb0rr0ne",
 891 |     "sb0rra",
 892 |     "sb0rrone",
 893 |     "sbatters1",
 894 |     "sborr0ne",
 895 |     "sc0pare",
 896 |     "sc0pata",
 897 |     "sch1ampe",
 898 |     "sche1se",
 899 |     "sche1sse",
 900 |     "schwachs1nn1g",
 901 |     "schwachs1nnig",
 902 |     "schwachsinn1g",
 903 |     "sh1t",
 904 |     "sp0mp1nare",
 905 |     "sp0mpinare",
 906 |     "spomp1nare",
 907 |     "str0nz0",
 908 |     "str0nza",
 909 |     "str0nzo",
 910 |     "stronz0",
 911 |     "stup1d",
 912 |     "succh1am1",
 913 |     "succh1ami",
 914 |     "succhiam1",
 915 |     "t0pa",
 916 |     "test1c1e",
 917 |     "test1cle",
 918 |     "testic1e",
 919 |     "tr01a",
 920 |     "tr0ia",
 921 |     "tr0mbare",
 922 |     "tr1ng1er",
 923 |     "tr1ngler",
 924 |     "tring1er",
 925 |     "tro1a",
 926 |     "vaffancu10",
 927 |     "vaffancu1o",
 928 |     "vaffancul0",
 929 |     "vag1na",
 930 |     "w1chsen",
 931 |     "x0ch0ta",
 932 |     "x0chota",
 933 |     "xoch0ta",
 934 |     "z0cc01a",
 935 |     "z0cc0la",
 936 |     "z0cco1a",
 937 |     "z0ccola",
 938 |     "z1z1",
 939 |     "z1zi",
 940 |     "ziz1",
 941 |     "zocc01a",
 942 |     "zocc0la",
 943 |     "zocco1a",
 944 | )
 945 | 
 946 | _match_anywhere = {
 947 |     "aand",
 948 |     "ahole",
 949 |     "allupato",
 950 |     "anal",
 951 |     "anale",
 952 |     "anus",
 953 |     "arrapato",
 954 |     "arsch",
 955 |     "arse",
 956 |     "balatkar",
 957 |     "bastardo",
 958 |     "battona",
 959 |     "bitch",
 960 |     "bite",
 961 |     "bitte",
 962 |     "boceta",
 963 |     "boiata",
 964 |     "boob",
 965 |     "boobe",
 966 |     "bosta",
 967 |     "branlage",
 968 |     "branler",
 969 |     "branlette",
 970 |     "branleur",
 971 |     "branleuse",
 972 |     "cabrao",
 973 |     "cabron",
 974 |     "caca",
 975 |     "cacca",
 976 |     "cacete",
 977 |     "cagante",
 978 |     "cagar",
 979 |     "cagare",
 980 |     "cagna",
 981 |     "caraculo",
 982 |     "caralho",
 983 |     "cazzata",
 984 |     "cazzimma",
 985 |     "cazzo",
 986 |     "chatte",
 987 |     "chiasse",
 988 |     "chiavata",
 989 |     "chier",
 990 |     "chingadazos",
 991 |     "chingaderita",
 992 |     "chingar",
 993 |     "chingo",
 994 |     "chingues",
 995 |     "chink",
 996 |     "chod",
 997 |     "chootia",
 998 |     "chootiya",
 999 |     "clit",
1000 |     "clito",
1001 |     "cock",
1002 |     "coglione",
1003 |     "cona",
1004 |     "connard",
1005 |     "connasse",
1006 |     "conne",
1007 |     "couilles",
1008 |     "cracker",
1009 |     "crap",
1010 |     "culattone",
1011 |     "culero",
1012 |     "culo",
1013 |     "cunt",
1014 |     "damn",
1015 |     "deich",
1016 |     "depp",
1017 |     "dick",
1018 |     "dildo",
1019 |     "dyke",
1020 |     "encule",
1021 |     "enema",
1022 |     "enfoire",
1023 |     "estupido",
1024 |     "etron",
1025 |     "fica",
1026 |     "ficker",
1027 |     "figa",
1028 |     "foda",
1029 |     "foder",
1030 |     "fottere",
1031 |     "fottersi",
1032 |     "fotze",
1033 |     "foutre",
1034 |     "frocio",
1035 |     "froscio",
1036 |     "fuck",
1037 |     "gandu",
1038 |     "gouine",
1039 |     "grognasse",
1040 |     "harami",
1041 |     "haramzade",
1042 |     "hundin",
1043 |     "idiot",
1044 |     "imbecile",
1045 |     "jerk",
1046 |     "jizz",
1047 |     "kamine",
1048 |     "kike",
1049 |     "leccaculo",
1050 |     "mamahuevo",
1051 |     "mamon",
1052 |     "masturbate",
1053 |     "masturbation",
1054 |     "merda",
1055 |     "merde",
1056 |     "merdoso",
1057 |     "mierda",
1058 |     "mignotta",
1059 |     "minchia",
1060 |     "mist",
1061 |     "muschi",
1062 |     "neger",
1063 |     "negre",
1064 |     "negro",
1065 |     "nerchia",
1066 |     "nigger",
1067 |     "orgasm",
1068 |     "palle",
1069 |     "paneleiro",
1070 |     "patakha",
1071 |     "pecorina",
1072 |     "pendejo",
1073 |     "penis",
1074 |     "pipi",
1075 |     "pirla",
1076 |     "piscio",
1077 |     "pisser",
1078 |     "polla",
1079 |     "pompino",
1080 |     "poop",
1081 |     "porca",
1082 |     "porn",
1083 |     "porra",
1084 |     "pouffiasse",
1085 |     "prick",
1086 |     "pussy",
1087 |     "puta",
1088 |     "putain",
1089 |     "pute",
1090 |     "putiza",
1091 |     "puttana",
1092 |     "queca",
1093 |     "randi",
1094 |     "rape",
1095 |     "recchione",
1096 |     "retard",
1097 |     "rompiballe",
1098 |     "ruffiano",
1099 |     "sacanagem",
1100 |     "salaud",
1101 |     "salope",
1102 |     "saugnapf",
1103 |     "sbattere",
1104 |     "sbattersi",
1105 |     "sborra",
1106 |     "sborrone",
1107 |     "scheise",
1108 |     "scheisse",
1109 |     "schlampe",
1110 |     "schwachsinnig",
1111 |     "schwanz",
1112 |     "scopare",
1113 |     "scopata",
1114 |     "sexy",
1115 |     "shit",
1116 |     "slut",
1117 |     "spompinare",
1118 |     "stronza",
1119 |     "stronzo",
1120 |     "stupid",
1121 |     "succhiami",
1122 |     "sucker",
1123 |     "tapette",
1124 |     "testicle",
1125 |     "tette",
1126 |     "topa",
1127 |     "tringler",
1128 |     "troia",
1129 |     "trombare",
1130 |     "turd",
1131 |     "twat",
1132 |     "vaffanculo",
1133 |     "vagina",
1134 |     "verdammt",
1135 |     "verga",
1136 |     "wank",
1137 |     "wichsen",
1138 |     "xana",
1139 |     "xochota",
1140 |     "zizi",
1141 |     "zoccola",
1142 | }
1143 | 


--------------------------------------------------------------------------------