├── .coveragerc
├── .github
└── workflows
│ ├── pypi-publish.yml
│ └── python-package.yml
├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── README.md
├── docs
├── coverage.svg
├── mkdocs.yml
├── pages
│ ├── cli.md
│ ├── css
│ │ └── extra.css
│ ├── demos
│ │ ├── using-bases.gif
│ │ ├── using-codext.gif
│ │ └── using-debase.gif
│ ├── enc
│ │ ├── base.md
│ │ ├── binary.md
│ │ ├── common.md
│ │ ├── compressions.md
│ │ ├── crypto.md
│ │ ├── hashing.md
│ │ ├── languages.md
│ │ ├── others.md
│ │ ├── stegano.md
│ │ └── web.md
│ ├── features.md
│ ├── guessing.md
│ ├── howto.md
│ ├── img
│ │ ├── banner.png
│ │ ├── icon.png
│ │ └── logo.png
│ ├── index.md
│ └── manipulations.md
└── requirements.txt
├── pyproject.toml
├── pytest.ini
├── requirements.txt
├── src
└── codext
│ ├── VERSION.txt
│ ├── __common__.py
│ ├── __info__.py
│ ├── __init__.py
│ ├── base
│ ├── __init__.py
│ ├── _base.py
│ ├── _base2n.py
│ ├── base100.py
│ ├── base122.py
│ ├── base45.py
│ ├── base85.py
│ ├── base91.py
│ └── baseN.py
│ ├── binary
│ ├── __init__.py
│ ├── baudot.py
│ ├── bcd.py
│ ├── excess3.py
│ ├── gray.py
│ ├── manchester.py
│ └── rotate.py
│ ├── common
│ ├── __init__.py
│ ├── a1z26.py
│ ├── cases.py
│ ├── dummy.py
│ ├── octal.py
│ └── ordinal.py
│ ├── compressions
│ ├── __init__.py
│ ├── gzipp.py
│ ├── lz77.py
│ ├── lz78.py
│ └── pkzip.py
│ ├── crypto
│ ├── __init__.py
│ ├── affine.py
│ ├── atbash.py
│ ├── bacon.py
│ ├── barbie.py
│ ├── citrix.py
│ ├── railfence.py
│ ├── rot.py
│ ├── scytale.py
│ ├── shift.py
│ └── xor.py
│ ├── hashing
│ ├── __init__.py
│ ├── blake.py
│ ├── checksums.py
│ ├── crypt.py
│ ├── md.py
│ ├── sha.py
│ └── shake.py
│ ├── languages
│ ├── __init__.py
│ ├── braille.py
│ ├── galactic.py
│ ├── ipsum.py
│ ├── leetspeak.py
│ ├── morse.py
│ ├── navajo.py
│ ├── radio.py
│ ├── southpark.py
│ ├── tap.py
│ └── tomtom.py
│ ├── macros.json
│ ├── others
│ ├── __init__.py
│ ├── dna.py
│ ├── kbshift.py
│ ├── letters.py
│ ├── markdown.py
│ └── uuencode.py
│ ├── stegano
│ ├── __init__.py
│ ├── hexagram.py
│ ├── klopf.py
│ ├── resistor.py
│ ├── rick.py
│ ├── sms.py
│ └── whitespace.py
│ └── web
│ ├── __init__.py
│ ├── html.py
│ └── url.py
└── tests
├── __init__.py
├── test_base.py
├── test_common.py
├── test_generated.py
└── test_manual.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source = codext
3 | omit =
4 | src/codext/__info__.py
5 | src/codext/**/__init__.py
6 |
7 | [report]
8 | exclude_lines =
9 | pragma: no cover
10 | if.*?__name__.*?==.*?.__main__.:
11 | def main\(\)\:
12 | def __stdin_pipe\(\)\:
13 | for line in __stdin_pipe\(\)\:
14 | def __format_list\(items, include\=True\)\:
15 | def __print_tabular\(lst, space\=4\)\:
16 | except ImportError:
17 | except NameError:
18 | raise NotImplementedError
19 | def _detect\(text\)\:
20 | def _lang\(lang\)\:
21 | if stopfunc\.LANG_BACKEND\:
22 | def _validate\(stop_function, lang_backend\=\"none\"\)\:
23 | except KeyboardInterrupt\:
24 | if alt and len\(t\) \% 2 \=\= 1\:
25 |
--------------------------------------------------------------------------------
/.github/workflows/pypi-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will deploy the Python package to PyPi.org
2 |
3 | name: deploy
4 |
5 | env:
6 | package: codext
7 |
8 | on:
9 | push:
10 | branches:
11 | - main
12 | paths:
13 | - '**/VERSION.txt'
14 | workflow_run:
15 | workflows: ["build"]
16 | types: [completed]
17 |
18 | jobs:
19 | deploy:
20 | runs-on: ubuntu-latest
21 | if: ${{ github.event.workflow_run.conclusion == 'success' }}
22 | steps:
23 | - uses: actions/checkout@v3
24 | with:
25 | fetch-depth: 0
26 | - name: Cleanup README
27 | run: |
28 | sed -ri 's/^(##*)\s*:.*:\s*/\1 /g' README.md
29 | awk '{if (match($0,"## Supporters")) exit; print}' README.md > README
30 | mv -f README README.md
31 | - run: python3 -m pip install --upgrade build && python3 -m build
32 | - name: Upload ${{ env.package }} to PyPI
33 | uses: pypa/gh-action-pypi-publish@release/v1
34 | with:
35 | password: ${{ secrets.PYPI_API_TOKEN }}
36 | verbose: true
37 | verify_metadata: false
38 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: build
5 |
6 | env:
7 | package: codext
8 |
9 | on:
10 | push:
11 | branches: [ "main" ]
12 | pull_request:
13 | branches: [ "main" ]
14 |
15 | jobs:
16 | build:
17 | runs-on: ${{ matrix.os }}
18 | strategy:
19 | fail-fast: false
20 | matrix:
21 | os: [ubuntu-latest]
22 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
23 | steps:
24 | - uses: actions/checkout@v3
25 | - name: Set up Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 | - name: Install ${{ env.package }}
30 | run: |
31 | python -m pip install --upgrade pip
32 | python -m pip install flake8 pytest pytest-cov pytest-pythonpath coverage
33 | pip install -r requirements.txt
34 | pip install .
35 | - name: Lint with flake8
36 | run: |
37 | # stop the build if there are Python syntax errors or undefined names
38 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
39 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
40 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
41 | - name: Test ${{ env.package }} with pytest
42 | run: |
43 | pytest --cov=$package
44 | coverage:
45 | needs: build
46 | runs-on: ubuntu-latest
47 | env:
48 | cov_badge_path: docs/coverage.svg
49 | steps:
50 | - uses: actions/checkout@v3
51 | - name: Install ${{ env.package }}
52 | run: |
53 | python -m pip install --upgrade pip
54 | python -m pip install pytest pytest-cov pytest-pythonpath
55 | pip install -r requirements.txt
56 | pip install .
57 | - name: Make coverage badge for ${{ env.package }}
58 | run: |
59 | pip install genbadge[coverage]
60 | pytest --cov=$package --cov-report=xml
61 | genbadge coverage -i coverage.xml -o $cov_badge_path
62 | - name: Verify Changed files
63 | uses: tj-actions/verify-changed-files@v17
64 | id: changed_files
65 | with:
66 | files: ${{ env.cov_badge_path }}
67 | - name: Commit files
68 | if: steps.changed_files.outputs.files_changed == 'true'
69 | run: |
70 | git config --local user.email "github-actions[bot]@users.noreply.github.com"
71 | git config --local user.name "github-actions[bot]"
72 | git add $cov_badge_path
73 | git commit -m "Updated coverage.svg"
74 | - name: Push changes
75 | if: steps.changed_files.outputs.files_changed == 'true'
76 | uses: ad-m/github-push-action@master
77 | with:
78 | github_token: ${{ secrets.github_token }}
79 | branch: ${{ github.ref }}
80 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Temp files
2 | *~
3 | *.backup
4 | .DS_Store
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | .build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib64/
24 | parts/
25 | sdist/
26 | reinstall.sh
27 | test.sh
28 | update.sh
29 | version.py
30 |
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 | MANIFEST
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | coverage/
50 | .tox/
51 | .coverage
52 | .coverage.*
53 | .coveralls.*
54 | .cache
55 | nosetests.xml
56 | coverage.xml
57 | *,cover
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # Project artifacts
70 | .idea
71 | .vagrant
72 | .test
73 | .pytest_cache
74 | tmp
75 | TODO
76 | script.py
77 | tool.py
78 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: "ubuntu-22.04"
5 | tools:
6 | python: "3.11"
7 |
8 | mkdocs:
9 | configuration: docs/mkdocs.yml
10 |
11 | python:
12 | install:
13 | - requirements: docs/requirements.txt
14 |
--------------------------------------------------------------------------------
/docs/coverage.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_author: dhondta
2 | site_name: "Codext - Extension of native codecs for Python"
3 | repo_url: https://github.com/dhondta/python-codext
4 | copyright: Copyright © 2021-2023 Alexandre D'Hondt
5 | docs_dir: pages
6 | nav:
7 | - Introduction: index.md
8 | - Features: features.md
9 | - 'Guess mode': guessing.md
10 | - Encodings:
11 | - Base: enc/base.md
12 | - Binary: enc/binary.md
13 | - Common: enc/common.md
14 | - Compressions: enc/compressions.md
15 | - Cryptography: enc/crypto.md
16 | - Hashing: enc/hashing.md
17 | - Languages: enc/languages.md
18 | - Others: enc/others.md
19 | - Steganography: enc/stegano.md
20 | - 'String manipulations': manipulations.md
21 | - 'CLI tool': cli.md
22 | - 'Create your codec': howto.md
23 | extra:
24 | generator: false
25 | social:
26 | - icon: fontawesome/solid/paper-plane
27 | link: mailto:alexandre.dhondt@gmail.com
28 | name: Contact Alex
29 | - icon: fontawesome/brands/github
30 | link: https://github.com/dhondta
31 | name: Alex on GitHub
32 | - icon: fontawesome/brands/linkedin
33 | link: https://www.linkedin.com/in/alexandre-d-2ab2aa14/
34 | name: Alex on LinkedIn
35 | - icon: fontawesome/brands/twitter
36 | link: https://twitter.com/alex_dhondt
37 | name: Alex on Twitter
38 | extra_css:
39 | - css/extra.css
40 | theme:
41 | name: material
42 | palette:
43 | - scheme: default
44 | toggle:
45 | icon: material/brightness-7
46 | name: Switch to dark mode
47 | - scheme: slate
48 | toggle:
49 | icon: material/brightness-4
50 | name: Switch to light mode
51 | logo: img/logo.png
52 | favicon: img/icon.png
53 | use_directory_urls: false
54 | markdown_extensions:
55 | - toc:
56 | permalink: true
57 | - admonition
58 |
--------------------------------------------------------------------------------
/docs/pages/cli.md:
--------------------------------------------------------------------------------
1 | `codext` has a Command-Line Interface tool.
2 |
3 | -----
4 |
5 | ### Using Codext from the terminal
6 |
7 | The help message describes everything to know:
8 |
9 | ```sh
10 | usage: codext [-h] [-i INFILE] [-o OUTFILE] [-s] {encode,decode,guess,search} ...
11 |
12 | Codecs Extension (CodExt) 1.8.1
13 |
14 | Author : Alexandre D'Hondt (alexandre.dhondt@gmail.com)
15 | Copyright: © 2019-2021 A. D'Hondt
16 | License : GPLv3 (https://www.gnu.org/licenses/gpl-3.0.fr.html)
17 | Source : https://github.com/dhondta/python-codext
18 |
19 | This tool allows to encode/decode input strings/files with an extended set of codecs.
20 |
21 | positional arguments:
22 | {encode,decode,guess,search}
23 | command to be executed
24 | encode encode input using the specified codecs
25 | decode decode input using the specified codecs
26 | guess try guessing the decoding codecs
27 | search search for codecs
28 |
29 | optional arguments:
30 | -h, --help show this help message and exit
31 | -i INFILE, --input-file INFILE
32 | input file (if none, take stdin as input)
33 | -o OUTFILE, --output-file OUTFILE
34 | output file (if none, display result to stdout)
35 | -s, --strip-newlines strip newlines from input
36 |
37 | usage examples:
38 | - codext search bitcoin
39 | - codext decode base32 -i file.b32
40 | - codext encode morse < to_be_encoded.txt
41 | - echo "test" | codext encode base100
42 | - echo -en "test" | codext encode braille -o test.braille
43 | - codext encode base64 < to_be_encoded.txt > text.b64
44 | - echo -en "test" | codext encode base64 | codext encode base32
45 | - echo -en "mrdvm6teie6t2cq=" | codext encode upper | codext decode base32 | codext decode base64
46 | - echo -en "test" | codext encode upper reverse base32 | codext decode base32 reverse lower
47 | - echo -en "test" | codext encode upper reverse base32 base64 morse
48 | - echo -en "test" | codext encode base64 gzip | codext guess
49 | - echo -en "test" | codext encode base64 gzip | codext guess gzip -c base
50 | ```
51 |
52 | !!! note "Input/output"
53 |
54 | STDIN can be used as shown in an example from the help message, like when using the common Linux tool `base64`.
55 |
56 | Unless an output file is specified, the result is displayed in STDOUT.
57 |
58 | !!! note "Encodings chaining"
59 |
60 | Encodings can be chained as shown in the last examples of the help message. This can be practical for quickly manipulating data.
61 |
62 | ### Execution examples
63 |
64 | **Scenario 1**: 2-stages encoded flag
65 |
66 | Creating the payload:
67 |
68 | ```session
69 | $ echo "A somewhat weird F1@9 !" | codext encode rotate-3 base58
70 | pwTDSWRUbXTuMQs5EDgKpjgW8MiJVw1
71 | ```
72 |
73 | From this point, the only thing we know is that we are searching for "*flag*" (with eventually other characters, i.e. leetspeak).
74 |
75 | ```session
76 | $ echo "A somewhat weird F1@9 !" | codext encode rotate-3 base58 | codext guess -f flag
77 | Codecs: base58, rotate-3
78 | A somewhat weird F1@9 !
79 | ```
80 |
81 | Executing the previous command will take a few tens of seconds. With few stages to be guessed, using the scoring heuristic can be far quicker to get to the right output. The following takes less than a second.
82 |
83 | ```session
84 | $ echo "A somewhat weird F1@9 !" | codext encode rotate-3 base58 | codext guess -f flag --heuristic
85 | Codecs: base58, rotate-3
86 | A somewhat weird F1@9 !
87 | ```
88 |
89 | **Scenario 2**: Multi-stage-encoded flag
90 |
91 | Creating the payload:
92 |
93 | ```session
94 | $ echo "A somewhat weird F1@9 !" | codext encode barbie-1 base32 morse
95 | .... -.-- --.- --. -- ....- - -.- -- . ..... -..- --. ..--- .-.. .. . .- ..... .-- -.-. ..... -.. --- -. --.- --.- . --. -- .-. --... ..-. ..- --.- -.-. -- -...- -...- -...-
96 | ```
97 |
98 | When looking at the string, it is easy to figure out it is morse. The problem, at this point, is that this codec is case-insensitive and always returns lowercase characters, as shown hereafter.
99 |
100 | ```session
101 | $ echo "A somewhat weird F1@9 !" | codext encode barbie-1 base32 morse | codext decode morse
102 | hyqgm4tkme5xg2liea5wc5donqqegmr7fuqcm===
103 | ```
104 |
105 | In order to get it guessed as Base32, it is necessary to put it back to uppercase (in other words, decode from lowercase).
106 |
107 | ```session
108 | $ echo "A somewhat weird F1@9 !" | codext encode barbie-1 base32 morse | codext decode morse lowercase
109 | HYQGM4TKME5XG2LIEA5WC5DONQQEGMR7FUQCM===
110 | ```
111 |
112 | Now that we know we are searching for something with "*flag*" (with eventually other characters), we can use the predefined "`flag`" stop function.
113 |
114 | ```session
115 | $ echo "A somewhat weird F1@9 !" | codext encode barbie-1 base32 morse | codext decode morse lowercase | codext guess -f flag
116 | Codecs: base32, barbie
117 | A somewhat weird F1@9 !
118 | ```
119 |
120 | **Scenario 3**: Base-encoded rotated shifted secret (English) message
121 |
122 | Creating the payload:
123 |
124 | ```session
125 | $ echo "My super secret string" | codext encode shift-1 rotate-2 base58 base64
126 | NDNxaFdieXh0Z29XOVZpWWpjRGNpRWgyZE44Z2FNU0g=
127 | ```
128 |
129 | First, we shall simplify as much as possible ; we can easily guess that Base64 was used as the first encoding scheme:
130 |
131 | ```session
132 | $ echo "aDRuUnFGaWZTblJqRmZReFJIdVZweGp4cFA4Y0NS" | codext rank
133 | [+] 1.00002: base62
134 | [+] 0.99401: base64
135 | [+] 0.70806: rotate-1
136 | [+] 0.70806: rotate-2
137 | [+] 0.70806: rotate-3
138 | [+] 0.70806: rotate-4
139 | [+] 0.70806: rotate-5
140 | [+] 0.70806: rotate-6
141 | [+] 0.70806: rotate-7
142 | [+] 0.70806: rotate-left-1
143 |
144 | $ echo "aDRuUnFGaWZTblJqRmZReFJIdVZweGp4cFA4Y0NS" | codext decode base62
145 | %¤q ´!.[æ&[fÿhbð^
146 |
147 | $ echo "aDRuUnFGaWZTblJqRmZReFJIdVZweGp4cFA4Y0NS" | codext decode base64
148 | h4nRqFifSnRjFfQxRHuVpxjxpP8cCR
149 | ```
150 |
151 | Afterwards, we can still try to simplify ;
152 |
153 | ```session
154 | $ echo "aDRuUnFGaWZTblJqRmZReFJIdVZweGp4cFA4Y0NS" | codext decode base64 | codext rank
155 | [+] 1.00185: base58
156 | [+] 0.99091: base62
157 | [+] 0.67001: rotate-1
158 | [+] 0.67001: rotate-2
159 | [+] 0.67001: rotate-3
160 | [+] 0.67001: rotate-4
161 | [+] 0.67001: rotate-5
162 | [+] 0.67001: rotate-6
163 | [+] 0.67001: rotate-7
164 | [+] 0.67001: rotate-left-1
165 | ```
166 |
167 | From here, let us assume that `base58` is effectively the right second-stage encoding. Guessing the two remaining encodings with no more information will now take a few seconds. As multiple outputs can be recognized as normal text, we will use the "`-s`" option not to stop on the first output successfully decoded as text. Moreover, if we have the intuition that the output shall be English text, we can use a more refined stop function like "`lang_en`" with the "`-f`" option.
168 |
169 | ```session
170 | $ echo "aDRuUnFGaWZTblJqRmZReFJIdVZweGp4cFA4Y0NS" | codext decode base64 | codext decode base58 | codext guess -s -f lang_en
171 | [...]
172 | [+] rotate-2, rot-1: My!super!secret!string
173 | [+] rotate-2, rot-23: Qc!wytiv!wigvix!wxvmrk
174 | [+] rotate-2, shift-1: My super secret string
175 | [+] rotate-2, shift-20: :f\r`b]R_\r`RP_Ra\r`a_V[T
176 | [...]
177 | [+] rotate-left-6, shift-1: My super secret string
178 | ^C^C^C
179 | ```
180 |
181 | We can then stop the research with Ctrl+C. The right output has been found !
182 |
183 |
--------------------------------------------------------------------------------
/docs/pages/css/extra.css:
--------------------------------------------------------------------------------
1 | /* Full width (only works for some themes, including 'material') */
2 | @media only screen and (min-width: 76.25em) {
3 | .md-main__inner {
4 | max-width: none;
5 | }
6 | .md-sidebar--primary {
7 | left: 0;
8 | }
9 | .md-sidebar--secondary {
10 | right: 0;
11 | margin-left: 0;
12 | -webkit-transform: none;
13 | transform: none;
14 | }
15 | }
16 |
17 | /* See https://github.com/mkdocs/mkdocs/wiki/MkDocs-Recipes */
18 | /* Add Support for Checkbox Lists */
19 | .task-list-item {
20 | list-style-type: none;
21 | }
22 |
23 | .task-list-item input {
24 | margin: 0 4px 0.25em -20px;
25 | vertical-align: middle;
26 | }
27 |
--------------------------------------------------------------------------------
/docs/pages/demos/using-bases.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/docs/pages/demos/using-bases.gif
--------------------------------------------------------------------------------
/docs/pages/demos/using-codext.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/docs/pages/demos/using-codext.gif
--------------------------------------------------------------------------------
/docs/pages/demos/using-debase.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/docs/pages/demos/using-debase.gif
--------------------------------------------------------------------------------
/docs/pages/enc/base.md:
--------------------------------------------------------------------------------
1 | `codext` defines a far broader set of Base-encodings than in the original library.
2 |
3 | -----
4 |
5 | ### Classical base 2^N encodings
6 |
7 | This namely adds the classical BaseXX encodings like 16 (hexadecimal) and 32 (RFC 3548), which are not available in the native codecs.
8 |
9 | Common base encodings with N a power of 2:
10 |
11 | **Codec** | **Conversions** | **Aliases** | **Comment**
12 | :---: | :---: | --- | ---
13 | `base2` | text <-> Base2 encoded text | `(base[-_]?2|bin)-inv(erted)?` | Dynamic charset parameter `[-_]...`, amongst letters and digits (e.g. `_AB`)
14 | `base4` | text <-> Base4 encoded text | `base[-_]?4-inv(erted)` | Dynamic charset parameter `[-_]...`, amongst letters and digits (e.g. `_6VC9`)
15 | `base8` | text <-> Base8 encoded text | `base[-_]?8-inv(erted)` | Charset: `abcdefgh` ; Dynamic charset parameter `[-_]...`, amongst letters and digits (e.g. `_A5c96T7x`)
16 | `base16` | text <-> Base16 encoded text | `base[-_]?16-inv(erted)` |
17 | `base32` | text <-> Base32 encoded text | `base[-_]?32-inv(erted)`, `base32-crockford`, `base32_geohash`, ... | Also supports Base32 Crockford, Geohash and Hex
18 | `zbase32` | text <-> ZBase32 encoded text | `z[-_]?base[-_]?32` | Human-oriented Base32
19 | `base64` | text <-> Base64 encoded text | `base[-_]?64-inv(erted)` |
20 |
21 | !!! note "Aliases"
22 |
23 | All the aliases are case insensitive for base encodings.
24 |
25 | ```python
26 | >>> codext.encode("test", "base2")
27 | '01110100011001010111001101110100'
28 | >>> codext.encode("test", "base2-inv")
29 | '10001011100110101000110010001011'
30 | ```
31 |
32 | ```python
33 | >>> codecs.encode("this is a test", "base16")
34 | '7468697320697320612074657374'
35 | >>> codecs.decode("7468697320697320612074657374", "base16")
36 | 'this is a test'
37 | >>> codecs.encode("this is a test", "base16-inv")
38 | '1E02031DCA031DCA0BCA1E0F1D1E'
39 | ```
40 |
41 | ```python
42 | >>> codext.encode("this is a test", "base32")
43 | 'ORUGS4ZANFZSAYJAORSXG5A='
44 | >>> codext.decode("ORUGS4ZANFZSAYJAORSXG5A=", "base32")
45 | 'this is a test'
46 | ```
47 |
48 | Note that for `base64`, it overwrites the native `base64_codec` to also support en/decoding from str.
49 |
50 | ```python
51 | >>> codecs.encode("this is a test", "base64")
52 | 'dGhpcyBpcyBhIHRlc3Q='
53 | >>> codecs.decode("dGhpcyBpcyBhIHRlc3Q=", "base64")
54 | 'this is a test'
55 | ```
56 |
57 | -----
58 |
59 | ### Generic base encodings
60 |
61 | **Codec** | **Conversions** | **Aliases** | **Comment**
62 | :---: | :---: | --- | ---
63 | `base3` | text <-> Base3 encoded text | `base[-_]?3(|[-_]inv(erted)?)` | Dynamic charset parameter `[-_]...`, amongst letters and digits (e.g. `_C2Z`)
64 | `base10` | text <-> Base10 encoded text | `base[-_]?10|int(?:eger)?|dec(?:imal)?` |
65 | `base11` | text <-> Base11 encoded text | `base[-_]?11(|[-_]inv(erted)?)` |
66 | `base36` | text <-> Base36 encoded text | `base[-_]?36(|[-_]inv(erted)?)` |
67 | `base45` | text <-> Base45 encoded text | `base[-_]?45(|[-_]inv(erted)?)` |
68 | `base58` | text <-> Base58 encoded text | `base[-_]?58(|[-_](bc|bitcoin|rp|ripple|fl|flickr|short[-]?url|url))` | Supports Bitcoin, Ripple and short URL
69 | `base62` | text <-> Base62 encoded text | `base[-_]?62(|[-_]inv(erted)?)` |
70 | `base63` | text <-> Base63 encoded text | `base[-_]?63(|[-_]inv(erted)?)` |
71 | `base91` | text <-> Base91 encoded text | `base[-_]?91(|[-_]inv(erted)?)` |
72 | `base91-alt` | text <-> Alternate Base91 encoded text | `base[-_]?91[-_]alt(?:ernate)?(|[-_]inv(erted)?)` | Another version of Base91
73 |
74 | ```python
75 | >>> codext.encode("test", "base3")
76 | '23112113223321323322'
77 | ```
78 |
79 | ```python
80 | >>> codecs.encode("test", "base36")
81 | 'WANEK4'
82 | >>> codecs.decode("4WMHTK6UZL044O91NKCEB8", "base36")
83 | 'this is a test'
84 | ```
85 |
86 | ```python
87 | >>> codext.encode("this is a test!", "base45")
88 | 'AWE+EDH44.OEOCC7WE QEX0'
89 | >>> codext.decode('AWE+EDH44.OEOCC7WE QEX0', "base45")
90 | 'this is a test!'
91 | ```
92 |
93 | ```python
94 | >>> codext.encode("this is a test", "base58")
95 | 'jo91waLQA1NNeBmZKUF'
96 | >>> codext.encode("this is a test", "base58-ripple")
97 | 'jo9rA2LQwr44eBmZK7E'
98 | >>> codext.encode("this is a test", "base58-url")
99 | 'JN91Wzkpa1nnDbLyjtf'
100 | ```
101 |
102 | ```python
103 | >>> codecs.encode("test", "base62")
104 | '289lyu'
105 | >>> codecs.encode("this is a test", "base62")
106 | 'CsoB4HQ5gmgMyCenF7E'
107 | ```
108 |
109 | ```python
110 | >>> codecs.encode("This is a test !", "base91")
111 | 'nX,<:WRT%yxth90oZB^C'
112 | >>> codext.encode("This is a test !", "base91-alt")
113 | '?a&[jv4S3Wg>,71@Jo#K'
114 | ```
115 |
116 | !!! note "Generic encodings"
117 |
118 | Base encodings are available for any N other than the ones explicitely specified using the "`-generic`" suffix. Their charsets consist of printable characters from the `string` module for N up to 100 and for characters composed from the 256 possible ordinals for a greater N.
119 |
120 | :::python
121 | >>> codext.encode("test", "base3-generic")
122 | '12001002112210212211'
123 | >>> codext.encode("test", "base17-generic")
124 | '4cf60456'
125 |
126 | -----
127 |
128 | ### Base85
129 |
130 | This encoding implements various different versions of Base85.
131 |
132 | **Codec** | **Conversions** | **Aliases** | **Comment**
133 | :---: | :---: | --- | ---
134 | `base85` | text <-> ascii85 | `(base[-_]?85(?:|[-_](?:adobe|x?btoa|ipv6|rfc1924|xml|z(?:eromq)?))|z85|ascii85)` |
135 |
136 | ```python
137 | >>> codext.encode("this is a test", "ascii85")
138 | "FD,B0+DGm>@3BZ'F*%"
139 | >>> codext.decode("FD,B0+DGm>@3BZ'F*%", "ascii85")
140 | 'this is a test'
141 | >>> with open("ascii85.txt", 'w', encoding="ascii85") as f:
142 | f.write("this is a test")
143 | 14
144 | >>> with open("ascii85.txt", encoding="ascii85") as f:
145 | f.read()
146 | 'this is a test'
147 | ```
148 |
149 | -----
150 |
151 | ### Other base encodings
152 |
153 | **Codec** | **Conversions** | **Aliases** | **Comment**
154 | :---: | :---: | --- | ---
155 | `base100` | text <-> Base100 encoded text | `base[-_]?100|emoji` | Python 3 only
156 | `base122` | text <-> Base122 encoded text | `base[-_]?122` | Python 3 only
157 | `base128` | text <-> Base128 encoded text | `base[-_]?128` | Relies on the ASCII charset
158 |
159 | ```python
160 | >>> codecs.encode("this is a test", "base100")
161 | '👫👟👠👪🐗👠👪🐗👘🐗👫👜👪👫'
162 | >>> codecs.decode("👫👟👠👪🐗👠👪🐗👘🐗👫👜👪👫", "base100")
163 | 'this is a test'
164 | ```
165 |
166 | ```python
167 | >>> codecs.encode("this is a test", "base122")
168 | ':\x1aʗ\x19\x01Rs\x10\x18$\x07#\x15ft'
169 | >>> codecs.decode(":\x1aʗ\x19\x01Rs\x10\x18$\x07#\x15ft", "base122")
170 | 'this is a test'
171 | ```
172 |
173 |
--------------------------------------------------------------------------------
/docs/pages/enc/binary.md:
--------------------------------------------------------------------------------
1 | `codext` also adds common binary encodings. For instance, the Manchester code, that encodes digits, is applied to the ordinals of the input text and the resulting binary stream is converted back to characters.
2 |
3 | -----
4 |
5 | ### Baudot
6 |
7 | It supports various formats such as CCITT-1 and CCITT-2, ITA1 and ITA2, and some others.
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `baudot` | text <-> text | Baudot code bits | `baudot-ccitt1`, `baudot_ccitt2_lsb`, ... | supports CCITT-1, CCITT-2, EU/FR, ITA1, ITA2, MTK-2 (Python3 only), UK, ...
12 | `baudot-spaced` | text <-> Baudot code groups of bits | `baudot-spaced-ita1_lsb`, `baudot_spaced_ita2_msb`, ... | groups of 5 bits are whitespace-separated
13 | `baudot-tape` | text <-> Baudot code tape | `baudot-tape-mtk2`, `baudot_tape_murray`, ... | outputs a string that looks like a perforated tape
14 |
15 | !!! note "LSB / MSB"
16 |
17 | "`_lsb`" or "`_msb`" can be specified in the codec name to set the bits order. If not specified, it defaults to MSB.
18 |
19 |
20 | ```python
21 | >>> codext.encode("12345", "baudot-fr")
22 | '010000000100010001000010100111'
23 | >>> codext.decode("010000000100010001000010100111", "baudot-fr")
24 | '12345'
25 | ```
26 |
27 | ```python
28 | >>> codext.encode("TEST", "baudot-spaced_uk")
29 | '10101 00010 10100 10101'
30 | >>> codext.decode("10101 00010 10100 10101", "baudot-spaced_uk")
31 | 'TEST'
32 | ```
33 |
34 | ```python
35 | >>> s = codext.encode("HELLO WORLD!", "baudot-tape_ita2")
36 | >>> print(s)
37 | ***.**
38 | * *.
39 | . *
40 | * .*
41 | * .*
42 | ** .
43 | *.
44 | * .**
45 | ** .
46 | * .*
47 | * .*
48 | * . *
49 | ** .**
50 | **. *
51 | >>> codext.decode(s, "baudot-tape_ita2")
52 | 'HELLO WORLD!'
53 | ```
54 |
55 | -----
56 |
57 | ### Binary Coded Decimal (BCD)
58 |
59 | It converts characters to their odrinals, left-pads with zeros, converts digits to 4-bits groups and then make characters with the assembled groups. It can also use a 4-bits prefix for making new characters. It then allows to define extended versions of BCD.
60 |
61 | **Codec** | **Conversions** | **Aliases** | **Comment**
62 | :---: | :---: | --- | ---
63 | `bcd` | text <-> BCD encoded text | `binary_coded_decimals` |
64 | `bcd-extended0` | text <-> BCD encoded text using prefix `0000` | `bcd_ext0`, `bcd-extended-zeros`, `binary_coded_decimals_extended_0` |
65 | `bcd-extended1` | text <-> BCD encoded text using prefix `1111` | `bcd_ext1`, `bcd-extended-ones`, `binary_coded_decimals_extended_1` |
66 |
67 | ```python
68 | >>> codext.encode("Test", "bcd")
69 | '\x08A\x01\x11Q\x16'
70 | >>> codext.decode("\x08A\x01\x11Q\x16", "binary_coded_decimal")
71 | 'Test'
72 | >>> codext.encode("Test", "bcd_ext_zero")
73 | '\x00\x08\x04\x01\x00\x01\x01\x01\x05\x01\x01\x06\x00'
74 | >>> codext.decode("\x00\x08\x04\x01\x00\x01\x01\x01\x05\x01\x01\x06\x00", "bcd-ext0")
75 | 'Test'
76 | >>> codext.encode("Test", "bcd_extended_ones")
77 | '\xf0\xf8\xf4\xf1\xf0\xf1\xf1\xf1\xf5\xf1\xf1\xf6\xf0'
78 | >>> codext.decode("\xf0\xf8\xf4\xf1\xf0\xf1\xf1\xf1\xf5\xf1\xf1\xf6\xf0", "bcd_ext1")
79 | 'Test'
80 | ```
81 |
82 | -----
83 |
84 | ### Excess-3
85 |
86 | Also called *Stibitz code*, it converts characters to ordinals, left-pads with zeros and then applies Excess-3 (Stibitz) code to get groups of 4 bits that are finally reassembled into bytes.
87 |
88 | **Codec** | **Conversions** | **Aliases** | **Comment**
89 | :---: | :---: | --- | ---
90 | `excess3` | text <-> XS3 encoded text | `excess-3`, `xs3`, `stibitz` |
91 |
92 | ```python
93 | >>> codext.encode("This is a test!", "excess-3")
94 | ';t7C\x84H6T8D\x83e<£eD\x944D\x84I6`'
95 | >>> codext.decode(";t7C\x84H6T8D\x83e<£eD\x944D\x84I6`", "stibitz")
96 | 'This is a test!'
97 | ```
98 |
99 | -----
100 |
101 | ### Gray
102 |
103 | Also called *reflected binary code*, it implements the Gray code applied to characters while converted to bytes.
104 |
105 | **Codec** | **Conversions** | **Aliases** | **Comment**
106 | :---: | :---: | --- | ---
107 | `gray` | text <-> gray encoded text | `reflected-bin`, `reflected_binary` |
108 |
109 | ```python
110 | >>> codext.encode("this is a test", "gray")
111 | 'N\\]J0]J0Q0NWJN'
112 | >>> codext.decode("N\\]J0]J0Q0NWJN", "gray")
113 | 'this is a test'
114 | >>> codext.encode("THIS IS A TEST", "gray")
115 | '~lmz0mz0a0~gz~'
116 | >>> codext.decode("~lmz0mz0a0~gz~", "gray")
117 | 'THIS IS A TEST'
118 | ```
119 |
120 | -----
121 |
122 | ### Manchester
123 |
124 | This codec XORes each group of 4 bits of the input text with a 1-byte clock signal, e.g. `0x55` giving in binary `01010101`.
125 |
126 | **Codec** | **Conversions** | **Aliases** | **Comment**
127 | :---: | :---: | --- | ---
128 | `manchester` | text <-> manchester encoded text | | clock signal is `0x55` (`01010101`)
129 | `manchester-inverted` | text <-> manchester encoded text | `ethernet`, `ieee802.4` | clock signal is `0xaa` (`10101010`)
130 |
131 | ```python
132 | >>> codext.encode("This is a test!", "manchester")
133 | 'fei\x95i\x96jZYUi\x96jZYUiVYUjeifjZjeYV'
134 | >>> codext.decode("fei\x95i\x96jZYUi\x96jZYUiVYUjeifjZjeYV", "manchester")
135 | 'This is a test!'
136 | >>> codext.encode("This is a test!", "manchester-inverted")
137 | '\x99\x9a\x96j\x96i\x95¥¦ª\x96i\x95¥¦ª\x96©¦ª\x95\x9a\x96\x99\x95¥\x95\x9a¦©'
138 | >>> codext.decode("\x99\x9a\x96j\x96i\x95¥¦ª\x96i\x95¥¦ª\x96©¦ª\x95\x9a\x96\x99\x95¥\x95\x9a¦©", "ethernet")
139 | 'This is a test!'
140 | ```
141 |
142 | -----
143 |
144 | ### Rotate N bits
145 |
146 | This codec rotates of N bits each byte of an input string.
147 |
148 | !!! note "Lossless"
149 |
150 | This codec does not use the "`<<`" and "`>>`" operators as it is lossy in some cases. Instead, it rotates per group of 8 bits.
151 |
152 | **Codec** | **Conversions** | **Aliases** | **Comment**
153 | :---: | :---: | --- | ---
154 | `rotate` | text <-> N-bits-rotated text | `rotate-N`, `rotate_bits-N`, `rotate-right-N`, `rotate_left_N` | N belongs to [1,7] ; when nothing specified, it rotates to the right
155 |
156 | ```python
157 | >>> codext.encode("test", "rotate-1")
158 | ':29:'
159 | >>> codext.encode("test", "rotatebits-1")
160 | ':29:'
161 | >>> codext.encode("test", "rotate_right-1")
162 | ':29:'
163 | >>> codext.encode("test", "rotate_left_1")
164 | 'èÊæè'
165 | ```
166 |
167 |
--------------------------------------------------------------------------------
/docs/pages/enc/common.md:
--------------------------------------------------------------------------------
1 | `codext` also provides some very common encodings, for the sake of simplicity (e.g. while chaining codecs with [the CLI tool](../cli.html)).
2 |
3 | -----
4 |
5 | ### A1Z26
6 |
7 | This simple codec converts letters to their order number in the alphabet using a separator between characters and keeping words separated by a whitespace. It is similar to the [`consonant-vowel-indices`](others.html#letter-indices) encoding.
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `a1z26` | text <-> alphabet order numbers | `a1z26`, `a1z26-/`, `a1z26-,`, ... | this codec does not preserve the case and is dynamic (separator of characters in each word can be customized among these: "`-_/|,;:*`")
12 |
13 | ```python
14 | >>> codext.encode("This is a test", "a1z26")
15 | '20-8-9-19 9-19 1 20-5-19-20'
16 | >>> codext.decode("20-8-9-19 9-19 1 20-5-19-20", "a1z26")
17 | 'this is a test'
18 | ```
19 |
20 | -----
21 |
22 | ### Octal
23 |
24 | This simple codec converts characters into their octal values.
25 |
26 | **Codec** | **Conversions** | **Aliases** | **Comment**
27 | :---: | :---: | --- | ---
28 | `octal` | text <-> octal digits | `octals` | groups of 3-chars octal values when encoded
29 | `octal-spaced` | text <-> spaced octal digits | `octals-spaced` | whitespace-separated suite of variable-length groups of octal digits when encoded
30 |
31 | ```python
32 | >>> codext.encode("this is a test", "octal")
33 | '164150151163040151163040141040164145163164'
34 | >>> codext.decode("164150151163040151163040141040164145163164", "octals")
35 | 'this is a test'
36 | ```
37 |
38 | ```python
39 | >>> codext.encode("this is a test", "octal-spaced")
40 | '164 150 151 163 40 151 163 40 141 40 164 145 163 164'
41 | >>> codext.decode("164 150 151 163 40 151 163 40 141 40 164 145 163 164", "octals-spaced")
42 | 'this is a test'
43 | ```
44 |
45 | -----
46 |
47 | ### Ordinal
48 |
49 | This simple codec converts characters into their ordinals.
50 |
51 | **Codec** | **Conversions** | **Aliases** | **Comment**
52 | :---: | :---: | --- | ---
53 | `ordinal` | text <-> ordinal digits | `ordinals` | groups of 3-chars ordinal values when encoded
54 | `ordinal-spaced` | text <-> spaced ordinal digits | `ordinals-spaced` | whitespace-separated suite of variable-length groups of ordinal digits when encoded
55 |
56 | ```python
57 | >>> codext.encode("this is a test", "ordinal")
58 | '116104105115032105115032097032116101115116'
59 | >>> codext.decode("116104105115032105115032097032116101115116", "ordinals")
60 | 'this is a test'
61 | ```
62 |
63 | ```python
64 | >>> codext.encode("this is a test", "ordinal-spaced")
65 | '116 104 105 115 32 105 115 32 97 32 116 101 115 116'
66 | >>> codext.decode("116 104 105 115 32 105 115 32 97 32 116 101 115 116", "ordinals-spaced")
67 | 'this is a test'
68 | ```
69 |
70 |
--------------------------------------------------------------------------------
/docs/pages/enc/compressions.md:
--------------------------------------------------------------------------------
1 | `codext` provides a few common compression codecs.
2 |
3 | -----
4 |
5 | ### GZip
6 |
7 | **Codec** | **Conversions** | **Aliases** | **Comment**
8 | :---: | :---: | --- | ---
9 | `gzip` | data <-> GZipped data | | decoding tries with and without the file signature
10 |
11 | ```python
12 | >>> codext.encode('test', "gzip")
13 | '\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00'
14 | >>> codext.decode('\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00', "gzip")
15 | 'test'
16 | ```
17 |
18 | -----
19 |
20 | ### Lempel-Ziv
21 |
22 | This implements the algorithm of Lempel and Ziv of 1977 and 1978.
23 |
24 | **Codec** | **Conversions** | **Aliases** | **Comment**
25 | :---: | :---: | --- | ---
26 | `lz77` | data <-> LZ77-compressed data | |
27 | `lz78` | data <-> LZ78-compressed data | |
28 |
29 | ```python
30 | >>> codecs.encode("A test string !", "lz77")
31 | ' \x88\x0e\x86S\x99ÐA\x0029\x1aMÆq\x00\x84'
32 | >>> codecs.decode(" \x88\x0e\x86S\x99ÐA\x0029\x1aMÆq\x00\x84", "lz77")
33 | 'A test string !'
34 | ```
35 |
36 | ```python
37 | >>> codext.encode("A test string !", "lz78")
38 | 'A\x00 \x00t\x00e\x00s\x03 \x05t\x00r\x00i\x00n\x00g\x02!'
39 | >>> codext.decode("A\x00 \x00t\x00e\x00s\x03 \x05t\x00r\x00i\x00n\x00g\x02!", "lz78")
40 | 'A test string !'
41 | ```
42 |
43 | -----
44 |
45 | ### PKZip
46 |
47 | This implements multiple compression types available in the native [`zipfile`](https://docs.python.org/3/library/zipfile.html) library.
48 |
49 | **Codec** | **Conversions** | **Aliases** | **Comment**
50 | :---: | :---: | --- | ---
51 | `pkzip_deflate` | data <-> Deflated data | `deflate`, `zip_deflate` | Python3 only
52 | `pkzip_bzip2` | data <-> Bzipped data | `bz2`, `bzip2`, `zip_bz2` | Python3 only
53 | `pkzip_lzma` | data <-> LZMA-compressed data | `lzma`, `zip_lzma` | Python3 only
54 |
55 | ```python
56 | >>> codecs.encode("a test string", "deflate")
57 | 'KT(I-.Q(.)ÊÌK\x07\x00'
58 | >>> codecs.decode("KT(I-.Q(.)ÊÌK\x07\x00", "zip_deflate")
59 | 'a test string'
60 | ```
61 |
62 | ```python
63 | >>> codecs.encode("a test string", "bzip2")
64 | 'BZh91AY&SY°\x92µÏ\x00\x00\x01\x11\x80@\x00"¡\x1c\x00 \x00"\x1a\x07¤ É\x88u\x95Á`Òñw$S\x85\t\x0b\t+\\ð'
65 | >>> codecs.decode("BZh91AY&SY°\x92µÏ\x00\x00\x01\x11\x80@\x00\"¡\x1c\x00 \x00\"\x1a\x07¤ É\x88u\x95Á`Òñw$S\x85\t\x0b\t+\\ð", "bz2")
66 | 'a test string'
67 | ```
68 |
69 | ```python
70 | >>> codecs.encode("a test string", "lzma")
71 | '\t\x04\x05\x00]\x00\x00\x80\x00\x000\x88\n\x86\x94\\Uf\x14Þ\x82*\x11ëê\x93fÿý\x84 \x00'
72 | >>> codecs.decode("\t\x04\x05\x00]\x00\x00\x80\x00\x000\x88\n\x86\x94\\Uf\x14Þ\x82*\x11ëê\x93fÿý\x84 \x00", "zip_lzma")
73 | 'a test string'
74 | ```
75 |
76 |
--------------------------------------------------------------------------------
/docs/pages/enc/crypto.md:
--------------------------------------------------------------------------------
1 | `codext` also implements several simple cryptographic ciphers. But how does it relate to encoding while a key is required ? `codext` focuses on ciphers that have a weak key. With dynamically named encodings, it is then possible to define a bunch of encodings, one for each value of the key. For instance, Barbie Typewriter has a key with only 4 possible values. The `barbie` codec can then be `barbie-1`, ..., `barbie-4`.
2 |
3 | !!! note "Available masks"
4 |
5 | Some cipher codecs use character masks to generate their alphabets. Groups of characters are indicated using a headin "`?`".
6 |
7 | `a`: printable characters
8 | `b`: all 8-bits chars
9 | `d`: digits
10 | `h`: lowercase hexadecimal
11 | `H`: uppercase hexadecimal
12 | `l`: lowercase letters
13 | `p`: punctuation characters
14 | `s`: whitespace
15 | `u`: uppercase letters
16 |
17 | When combining masks, only one occurrence of each character is taken in the final alphabet.
18 |
19 | So, for instance, the following masks yield the following alphabets:
20 |
21 | - `?l?u?d?s`: "`abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 `"
22 | - `?s.,?!?u?d`: "` .,?!ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`"
23 |
24 | -----
25 |
26 | ### Affine Cipher
27 |
28 | This codec implements the Affine monoalphabetic substitution cipher. It is parametrizable with a mask for generating the alphabet and the parameters `a` and `b`. By default, it uses mask "`lus`" and parameters `a=1` and `b=2` but it can be set as in the examples hereafter.
29 |
30 | **Codec** | **Conversions** | **Aliases** | **Comment**
31 | :---: | :---: | --- | ---
32 | `affine` | text <-> affine ciphertext | `affine`, `affine_cipher-?l?u?d?s-5,8`, `affine-?s.,?!?u?d-23,6`, ... | Mask-generated alphabet ; uses default mask "`?l?u?s`" with `a=1` and `b=2`
33 |
34 | ```python
35 | >>> codext.encode("this is a test", "affine")
36 | 'vjkubkubcbvguv'
37 | >>> codext.decode("vjkubkubcbvguv", "affine")
38 | 'this is a test'
39 | >>> codext.encode("this is a test", "affine-?l?u?d?s-5,8")
40 | 'ORWJdWJdidOCJO'
41 | >>> codext.decode("ORWJdWJdidOCJO", "affine-?l?u?d?s-5,8")
42 | 'this is a test'
43 | >>> codext.encode("THIS IS A TEST", "affine-?s.,?!?u?d-5,8")
44 | 'AW1 D1 D2DAH A'
45 | >>> codext.decode("AW1 D1 D2DAH A", "affine-?s.,?!?u?d-5,8")
46 | 'THIS IS A TEST'
47 | ```
48 |
49 | !!! warning "Parameters `a` and `b`"
50 |
51 | Not all values are suitable for `a` and `b`. If a generated encoding map has mapping collisions, an exception is raised telling that `a` and `b` are bad.
52 |
53 | -----
54 |
55 | ### Atbash Cipher
56 |
57 | It implements the monoalphabetic substitution cipher used for the Hebrew alphabet. By default, it considers the lowercase and uppercase letters, inverted per group, as the alphabet. It can also use a mask to extend it. Note that it does not generate any error for characters that are not part of the alphabet.
58 |
59 | **Codec** | **Conversions** | **Aliases** | **Comment**
60 | :---: | :---: | --- | ---
61 | `atbash` | text <-> Atbash ciphertext | `atbash`, `atbash_cipher-?l?d?s`, ... | Mask-generated alphabet ; uses default mask "`?u?l`"
62 |
63 | ```python
64 | >>> codext.encode("this is a test", "atbash")
65 | 'gsrh rh z gvhg'
66 | >>> codext.encode("this is a test", "atbash-[?l?u?p?s]")
67 | '.^]/a]/a a.{/.'
68 | >>> codext.decode(".^]/a]/a a.{/.", "atbash_cipher_[?l?u?p?s]")
69 | 'this is a test'
70 | ```
71 |
72 | -----
73 |
74 | ### Baconian Cipher
75 |
76 | It support only letters.
77 |
78 | **Codec** | **Conversions** | **Aliases** | **Comment**
79 | :---: | :---: | --- | ---
80 | `bacon` | text <-> Bacon ciphertext | `bacon-cipher`, `baconian_cipher`, `bacon-01`, `bacon-10` | Dynamic tokens mapping ; we can define a mapping of encoding's tokens (original tokens: `ab`)
81 |
82 | ```python
83 | >>> codext.encode("this is a test", "bacon")
84 | 'baaba aabbb abaaa baaab abaaa baaab aaaaa baaba aabaa baaab baaba'
85 | >>> codext.encode("this is a test", "bacon_01")
86 | '10010 00111 01000 10001 01000 10001 00000 10010 00100 10001 10010'
87 | >>> codext.decode("-..-. ..--- .-... -...- .-... -...- ..... -..-. ..-.. -...- -..-.", "bacon_.-")
88 | 'THIS IS A TEST'
89 | ```
90 |
91 | -----
92 |
93 | ### Barbie Typewriter
94 |
95 | It implements the cipher for its 4 different keys.
96 |
97 | **Codec** | **Conversions** | **Aliases** | **Comment**
98 | :---: | :---: | --- | ---
99 | `barbie` | text <-> Barbie ciphertext | `barbie-1`, `barbie-2`, `barbie-3`, `barbie-4`
100 |
101 | ```python
102 | >>> codext.encode("this is a test", "barbie-1")
103 | 'hstf tf i hafh'
104 | >>> codext.encode("this is a test", "barbie_3")
105 | 'fpsu su h ftuf'
106 | >>> codext.decode("fpsu su h ftuf", "barbie-3")
107 | 'this is a test'
108 | ```
109 |
110 | -----
111 |
112 | ### Citrix CTX1
113 |
114 | This implements the Citrix CTX1 password encoding algorithm.
115 |
116 | **Codec** | **Conversions** | **Aliases** | **Comment**
117 | :---: | :---: | --- | ---
118 | `citrix` | text <-> Citrix CTX1 ciphertext | `citrix`, `citrix-1`, `citrix_ctx1` |
119 |
120 | ```python
121 | >>> codext.encode("this is a test", "citrix-ctx1")
122 | 'NBBMNAAGIDEPJJBMNIFNIMEMJKEL'
123 | >>> codext.decode("NBBMNAAGIDEPJJBMNIFNIMEMJKEL", "citrix-ctx1")
124 | 'this is a test'
125 | ```
126 |
127 | -----
128 |
129 | ### Rail Fence Cipher
130 |
131 | This implements the Rail Fence encoding algorithm, using 3 rails and offset 0 as the default parameters. The encoding fence is built from the top ; the `up` flag can be used to build the fence from the bottom. Note that trying parameters that do not fit the input length will trigger a `ValueError` mentioning the bad value.
132 |
133 | **Codec** | **Conversions** | **Aliases** | **Comment**
134 | :---: | :---: | --- | ---
135 | `rail` | text <-> rail fence ciphertext, X rails and Y offset | `rail-X-Y`, `rail_X_Y`, `rail-X-Y-up`, `zigzag`, ... |
136 |
137 | ```python
138 | >>> codext.encode("this is a test", "zigzag")
139 | 't ashsi etist'
140 | >>> codext.encode("this is a test", "rail-5-3")
141 | 'it sss etiath '
142 | >>> codext.decode("it sss etiath ", "zigzag_5-3")
143 | 'this is a test'
144 | ```
145 |
146 | -----
147 | ### ROT N
148 |
149 | This is a dynamic encoding, that is, it can be called with an integer to define the ROT offset. Encoding will apply a positive offset, decoding will apply a negative one.
150 |
151 | **Codec** | **Conversions** | **Aliases** | **Comment**
152 | :---: | :---: | --- | ---
153 | `rot` | text <-> rot(1) ciphertext | `rot1`, `rot-1`, `rot_25`, `caesar13` | Dynamic ROT parameter ; belongs to [1, 26[
154 | `rot47` | text <-> rot47 ciphertext | |
155 |
156 | ```python
157 | >>> codext.encode("this is a test", "rot-15")
158 | 'iwxh xh p ithi'
159 | >>> codext.encode("iwxh xh p ithi", "rot20")
160 | 'cqrb rb j cnbc'
161 | >>> codext.decode("cqrb rb j cnbc", "rot_9")
162 | 'this is a test'
163 | ```
164 |
165 | -----
166 |
167 | ### Shift
168 |
169 | This is a dynamic encoding, that is, it can be called with an integer to define the shift offset. Encoding will apply a positive offset, decoding will apply a negative one.
170 |
171 | **Codec** | **Conversions** | **Aliases** | **Comment**
172 | :---: | :---: | --- | ---
173 | `shift` | text <-> shift(1) ciphertext | `shift1`, `shift-158`, `shift_255` | Dynamic shift parameter ; belongs to [1, 256[
174 |
175 | ```python
176 | >>> codext.encode("this is a test", "shift-3")
177 | 'wklv#lv#d#whvw'
178 | >>> codext.decode("wklv#lv#d#whvw", "shift10")
179 | 'mabl\x19bl\x19Z\x19m^lm'
180 | >>> codext.encode("mabl\x19bl\x19Z\x19m^lm", "ordshift_7")
181 | 'this is a test'
182 | ```
183 |
184 | -----
185 |
186 | ### XOR with 1 byte
187 |
188 | This is a dynamic encoding, that is, it can be called with an integer to define the ordinal of the byte to XOR with the input text.
189 |
190 | **Codec** | **Conversions** | **Aliases** | **Comment**
191 | :---: | :---: | --- | ---
192 | `xor` | text <-> XOR(1) ciphertext | `XOR1`, `xor22`, `xor-158`, `xor_255` | Dynamic XOR parameter ; belongs to [1, 256[
193 |
194 | ```python
195 | >>> codext.encode("this is a test", "xor-10")
196 | '~bcy*cy*k*~oy~'
197 | >>> codext.encode("this is a test", "xor-30")
198 | 'jvwm>wm>\x7f>j{mj'
199 | >>> codext.decode("this is a test", "xor-30")
200 | 'jvwm>wm>\x7f>j{mj'
201 | >>> codext.encode("~bcy*cy*k*~oy~", "xor-10")
202 | 'this is a test'
203 | ```
204 |
205 |
--------------------------------------------------------------------------------
/docs/pages/enc/hashing.md:
--------------------------------------------------------------------------------
1 | `codext` provides hash functions through the `.encode(...)` API for convenience (e.g. while chaining codecs with [the CLI tool](../cli.html)).
2 |
3 | -----
4 |
5 | ### BLAKE
6 |
7 | These one-way transformation functions all rely on the native [`hashlib`](https://docs.python.org/3/library/hashlib.html) library.
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `blake2b` | data --> Blake2b(data, length) | | Python3 only, parametrized ; *length* belongs to [1,64]
12 | `blake2s` | data --> Blake2s(data, length) | | Python3 only, parametrized ; *length* belongs to [1,32]
13 |
14 | ```python
15 | >>> codext.encode("this is a test", "blake2b")
16 | '61a548f2de1c318ba91d5207007861010f69a43ec663fe487d8403282c934ea725dc0bb172256ac99625ad64cca6a2c4d61c650a35afab4787dc678e19071ef9'
17 | >>> codext.encode("this is a test", "blake2s")
18 | 'f20146c054f9dd6b6764b6c09357f7cd7551dfbcba545972a4c8166df8afde60'
19 | ```
20 |
21 | -----
22 |
23 | ### Checksums
24 |
25 | These one-way transformation functions are mostly computed with a generic CRC.
26 |
27 | **Codec** | **Conversions** | **Aliases** | **Comment**
28 | :---: | :---: | --- | ---
29 | `adler32` | data --> Adler32(data) | | relies on [`zlib`](https://docs.python.org/3/library/zlib.html)
30 | `crcN` | data --> CRCN(data) | many available variants ; see [this source](https://github.com/dhondta/python-codext/blob/master/codext/hashing/checksums.py) |
31 |
32 | ```python
33 | >>> codext.encode("This is a test string !", "crc10-gsm")
34 | '187'
35 | >>> codext.encode("This is a test string !", "crc14-gsm")
36 | '0ef2'
37 | >>> codext.encode("This is a test string !", "crc16-profibus")
38 | 'a865'
39 | >>> codext.encode("This is a test string !", "crc30")
40 | '2a179ad0'
41 | >>> codext.encode("This is a test string !", "crc32-autosar")
42 | 'acfc9276'
43 | >>> codext.encode("This is a test string !", "crc40-gsm")
44 | 'b6732ce009'
45 | >>> codext.encode("This is a test string !", "crc64")
46 | 'e89b72737a60f502'
47 | >>> codext.encode("This is a test string !", "crc82-darc")
48 | '37a49332f8907c01de3d8'
49 | ```
50 |
51 | -----
52 |
53 | ### Crypt
54 |
55 | This one-way transformation function relies on the native [`crypt`](https://docs.python.org/3/library/crypt.html) library.
56 |
57 | **Codec** | **Conversions** | **Aliases** | **Comment**
58 | :---: | :---: | --- | ---
59 | `crypt` | data --> Crypt(data, method) | `crypt-blowfish`, `crypt_md5`, ... | Python3 and Unix only, parametrized ; *method* is one of the `METHOD_[...]` values implemented in the `crypt` module for generating a salt.
60 |
61 | ```python
62 | >>> codext.encode("This is a test string !", "crypt")
63 | '$2b$12$xBIgGvCjYxIZ4ymKtstID.Wmf8eESVVMNU2DClPKVU37LQ5OdfUBy'
64 | >>> codext.encode("This is a test string !", "crypt_md5")
65 | '$1$qLvI5Kml$kXm7/Yvm87XcnzDdAgfsX1'
66 | >>> codext.encode("This is a test string !", "crypt-sha512")
67 | '$6$P9pjfscoLy9vpRrH$KHuRMbAltdkIQ/XL9HqrRRQTZUB2jFucH21RPbDXlsNV/ffek9MFJVZ0P2qZMTxL8m1MO0rS8UQgxj2x/Xs9A1'
68 | ```
69 |
70 | -----
71 |
72 | ### Message Digest
73 |
74 | **Codec** | **Conversions** | **Aliases** | **Comment**
75 | :---: | :---: | --- | ---
76 | `md2` | data --> MD2(data) | |
77 | `md4` | data --> MD4(data) | | relies on [`hashlib`](https://docs.python.org/3/library/hashlib.html)
78 | `md5` | data --> MD5(data) | | relies on [`hashlib`](https://docs.python.org/3/library/hashlib.html)
79 |
80 | ```python
81 | >>> codext.encode("This is a test string !", "md2")
82 | '5200e226ea210b854974c7781b3b20d6'
83 | >>> codext.encode("This is a test string !", "md4")
84 | 'ee4170b214eaac5be6a13d64a31b60b3'
85 | >>> codext.encode("This is a test string !", "md5")
86 | '5ba93d5b8e8efd9135f0030c978dd64e'
87 | ```
88 |
89 | -----
90 |
91 | ### Secure Hash Algorithm
92 |
93 | These one-way transformation functions all rely on the native [`hashlib`](https://docs.python.org/3/library/hashlib.html) library.
94 |
95 | **Codec** | **Conversions** | **Aliases** | **Comment**
96 | :---: | :---: | --- | ---
97 | `sha1` | data --> SHA1(data) | |
98 | `sha224` | data --> SHA224(data) | |
99 | `sha256` | data --> SHA256(data) | |
100 | `sha384` | data --> SHA384(data) | |
101 | `sha3_224` | data --> SHA3-224(data) | | Python3 only
102 | `sha3_256` | data --> SHA3-256(data) | | Python3 only
103 | `sha3_384` | data --> SHA3-384(data) | | Python3 only
104 | `sha3_512` | data --> SHA3-512(data) | | Python3 only
105 | `sha512` | data --> SHA512(data) | |
106 |
107 | ```python
108 | >>> codext.encode("This is a test string !", "shake_256_64")
109 | '01a14a746d7c1d28927fe6078fdb9dcc8fabc45da58b3d1af13175b6278a6e824241927c47b4c5ced2ff629833574c9d985410d97c5c3d54d0f15b548cf2713d'
110 | >>> codext.encode("This is a test string !", "sha224")
111 | '85fbf14cc6e3637c303999c18f9ac3209405f4d7a11cabca8c67d0da'
112 | >>> codext.encode("This is a test string !", "sha512")
113 | '125683c8e8d252c753b7d1fd9bd224c638bc4b9c0311bf4173b404f1b1097a805a74d575b2a2704305e1317eafe0a1c821c54d63155f5e727c8e67ffdd3c42ab'
114 | >>> codext.encode("This is a test string !", "sha3_384")
115 | 'f0947477521346fb9cad9d816b19a1ba0bbe2e9315faf486eeed160f5f0e8c3b78bc27d189e76e91b327ccec88938efd'
116 | ```
117 |
118 | -----
119 |
120 | ### SHAKE
121 |
122 | These one-way transformation functions rely on the native [`hashlib`](https://docs.python.org/3/library/hashlib.html) library.
123 |
124 | **Codec** | **Conversions** | **Aliases** | **Comment**
125 | :---: | :---: | --- | ---
126 | `shake_128` | data --> Shake128(data, length) | | Python3 only, parametrized ; *length* belongs to [1,[
127 | `shake_256` | data --> Shake256(data, length) | | Python3 only, parametrized ; *length* belongs to [1,[
128 |
129 | ```python
130 | >>> codext.encode("This is a test string !", "shake_128")
131 | 'c43c192074c7d2e3e4a2c21e8f9b1bc5129b00d4c3dfa6a6fc55eba7aed13d5afd110db5bffede68496477b40f405da696dfb8e7182ca05e83ee5d301ac2f0b1b516df2d3c694f8e5c26b0d23122869130e09f705a2d59296c4de68c8632d2836952c869e5e015e9f3b3f9d83a09877d00224bebece7ac2bd6ffd11325e63b84'
132 | >>> codext.encode("This is a test string !", "shake_128-16")
133 | 'c43c192074c7d2e3e4a2c21e8f9b1bc5'
134 | >>> codext.encode("This is a test string !", "shake_256")
135 | '01a14a746d7c1d28927fe6078fdb9dcc8fabc45da58b3d1af13175b6278a6e824241927c47b4c5ced2ff629833574c9d985410d97c5c3d54d0f15b548cf2713dc7c8d5145a74f6c5d613d769c03bd315350121f164f8b059fbd34548d5c1808e975858d5ea4b6edb889381a712d03954e04eacd8a077d016d8994610e9663058bef533bc71d38cb71974c7ef8abb9d2c7a0c4dfb7d007811375da4da526e37c101cead641b81faf51097b607aa3c410274074825a99d1f2a598acff414b8320be6104887c6f8df0e66aa16286da3b043cabeb90bd001e7512169c41ef8ad502666358bc7a2ea30d40a9e597dcc569cf5f8b3d383ed7c72690aca893be2ffb104'
136 | >>> codext.encode("This is a test string !", "shake_256_64")
137 | '01a14a746d7c1d28927fe6078fdb9dcc8fabc45da58b3d1af13175b6278a6e824241927c47b4c5ced2ff629833574c9d985410d97c5c3d54d0f15b548cf2713d'
138 | ```
139 |
140 |
--------------------------------------------------------------------------------
/docs/pages/enc/languages.md:
--------------------------------------------------------------------------------
1 | `codext` also adds some common languages for encoding.
2 |
3 | -----
4 |
5 | ### Braille
6 |
7 | It supports letters, digits and some special characters.
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `braille` | text <-> braille symbols | | Python 3 only
12 |
13 | ```python
14 | >>> codext.encode("this is a test", "braille")
15 | '⠞⠓⠊⠎⠀⠊⠎⠀⠁⠀⠞⠑⠎⠞'
16 | >>> codext.encode("THIS IS A TEST", "braille")
17 | '⠞⠓⠊⠎⠀⠊⠎⠀⠁⠀⠞⠑⠎⠞'
18 | >>> codext.decode("⠞⠓⠊⠎⠀⠊⠎⠀⠁⠀⠞⠑⠎⠞", "braille")
19 | 'this is a test'
20 | ```
21 |
22 | -----
23 |
24 | ### Galactic
25 |
26 | This implements the [Minecraft's enchanting table](https://www.thegamer.com/minecraft-enchantment-table-language-guide/) using resembling Unicode characters.
27 |
28 | **Codec** | **Conversions** | **Aliases** | **Comment**
29 | :---: | :---: | --- | ---
30 | `galactic` | text <-> Minecraft enchantment symbols | `galactic-alphabet`, `minecraft_enchantment`, `minecraft-enchanting-language` | Python 3 only
31 |
32 | ```python
33 | >>> codext.encode("this is a test", "galactic")
34 | 'ℸ₸╎߆ ╎߆ ᒋ ℸᒷ߆ℸ'
35 | >>> codext.decode("ℸ₸╎߆ ╎߆ ᒋ ℸᒷ߆ℸ", "galactic")
36 | 'this is a test'
37 | ```
38 |
39 | -----
40 |
41 | ### Ipsum
42 |
43 | This implements a codec that uses lorem ipsum words. It selects random words per letter and keeps the following punctuations: "`.,:;+=-*/\\`".
44 |
45 | **Codec** | **Conversions** | **Aliases** | **Comment**
46 | :---: | :---: | --- | ---
47 | `ipsum` | text <-> latin words | `loremipsum`, `lorem-ipsum` | words from the classical lorem ipsum
48 |
49 | ```python
50 | >>> codext.encode("This is a test.", "ipsum")
51 | 'Torquent hystericus id sit interdum sit aliquam tempor erat scelerisque taciti.'
52 | >>> codext.decode("Torquent hystericus id sit interdum sit aliquam tempor erat scelerisque taciti.", "lorem-ipsum")
53 | 'This is a test.'
54 | ```
55 |
56 | -----
57 |
58 | ### Leetspeak
59 |
60 | This implements a very basic ruleset of elite speaking.
61 |
62 | **Codec** | **Conversions** | **Aliases** | **Comment**
63 | :---: | :---: | --- | ---
64 | `leetspeak` | text <-> leetspeak encoded text | `leet`, `1337`, `leetspeak` | based on minimalistic elite speaking rules
65 |
66 | ```python
67 | >>> codext.encode("this is a test", "leetspeak")
68 | '7h15 15 4 7357'
69 | >>> codext.decode("7h15 15 4 7357", "leetspeak")
70 | 'ThIS IS A TEST'
71 | ```
72 |
73 | -----
74 |
75 | ### Morse
76 |
77 | It supports of course letters and digits, but also a few special characters: `.,;:?!/\\@&=-_'" $()`.
78 |
79 | **Codec** | **Conversions** | **Aliases** | **Comment**
80 | :---: | :---: | --- | ---
81 | `morse` | text <-> morse encoded text | none | uses whitespace as a separator, dynamic tokens mapping ; we can define a mapping of encoding's tokens (original tokens: `/-.`)
82 |
83 | ```python
84 | >>> codext.encode("this is a test", "morse")
85 | '- .... .. ... / .. ... / .- / - . ... -'
86 | >>> codext.encode("this is a test", "morse/-.")
87 | '- .... .. ... / .. ... / .- / - . ... -'
88 | >>> codext.encode("this is a test", "morse_ABC")
89 | 'B CCCC CC CCC A CC CCC A CB A B C CCC B'
90 | >>> codext.decode("- .... .. ... / .. ... / .- / - . ... -", "morse")
91 | 'this is a test'
92 | >>> with codext.open("morse.txt", 'w', encoding="morse") as f:
93 | f.write("this is a test")
94 | 14
95 | >>> with codext.open("morse.txt", encoding="morse") as f:
96 | f.read()
97 | 'this is a test'
98 | ```
99 |
100 | -----
101 |
102 | ### Navajo
103 |
104 | It implements the letters from the [Navajo Code Talkers' Dictionary](https://www.history.navy.mil/research/library/online-reading-room/title-list-alphabetically/n/navajo-code-talker-dictionary.html). It conserves digits and newlines.
105 |
106 | **Codec** | **Conversions** | **Aliases** | **Comment**
107 | :---: | :---: | --- | ---
108 | `navajo` | text <-> Navajo | |
109 |
110 | ```python
111 | >>> import codext
112 | >>> codext.encode("this is a test 123", "navajo")
113 | 'a-woh cha tkin klesh - a-chi klesh - be-la-sana - a-woh dzeh klesh a-woh - 1 2 3'
114 | >>> codext.decode("a-woh cha tkin klesh - a-chi klesh - be-la-sana - a-woh dzeh klesh a-woh - 1 2 3", "navajo")
115 | 'this is a test 123'
116 | ```
117 |
118 | -----
119 |
120 | ### Radio Alphabet
121 |
122 | This is also known as the [NATO phonetic alphabet](https://en.wikipedia.org/wiki/NATO_phonetic_alphabet).
123 |
124 | **Codec** | **Conversions** | **Aliases** | **Comment**
125 | :---: | :---: | --- | ---
126 | `radio` | text <-> radio alphabet words | `military_alphabet`, `nato-phonetic-alphabet`, `radio-alphabet` |
127 |
128 | ```python
129 | >>> codext.encode("foobar", "nato_phonetic_alphabet")
130 | 'Foxtrot Oscar Oscar Bravo Alpha Romeo'
131 | >>> codext.decode("Foxtrot Oscar Oscar Bravo Alpha Romeo", "radio-alphabet")
132 | 'FOOBAR'
133 | ```
134 |
135 | -----
136 |
137 | ### Southpark
138 |
139 | This encodes text according to Kenny's language in Southpark.
140 |
141 | **Codec** | **Conversions** | **Aliases** | **Comment**
142 | :---: | :---: | --- | ---
143 | `southpark` | text <-> Kenny's language | `kenny` | Dynamic tokens mapping ; we can define a mapping of encoding's tokens (original tokens: `fFMmpP`)
144 | `southpark-icase` | text <-> Kenny's language | `kenny_icase` | Dynamic tokens mapping ; we can define a mapping of encoding's tokens (original tokens: `FMP`)
145 |
146 | ```python
147 | >>> codext.encode("This is a Test", "southpark")
148 | 'FmpmfpmfffmmfffmfffmmfffmmmfffFmpmppfmmfmp'
149 | >>> codext.decode('FmpmfpmfffmmfffmfffmmfffmmmfffFmpmppfmmfmp', "kenny")
150 | 'This is a Test'
151 | >>> codext.encode("This is a test", "kenny_123456")
152 | '245415411144111411144211444111145455144145'
153 | >>> codext.decode("245415411144111411144211444111145455144145", "kenny-123456")
154 | 'This is a test'
155 | >>> codext.encode("this is a test", "kenny_icase")
156 | 'FMPMFPMFFFMMFFFMFFFMMFFFMMMFFFFMPMPPFMMFMP'
157 | >>> codext.decode("FMPMFPMFFFMMFFFMFFFMMFFFMMMFFFFMPMPPFMMFMP", "southpark-icase")
158 | 'this is a test'
159 | >>> codext.encode("this is a test", "southpark-icase_123")
160 | '123213211122111211122111222111123233122123'
161 | >>> codext.decode('123213211122111211122111222111123233122123', "kenny_icase-123")
162 | 'this is a test'
163 | ```
164 |
165 | -----
166 |
167 | ### Tap
168 |
169 | This codec implements the [tap/knock code](https://en.wikipedia.org/wiki/Tap_code) commonly used by prisoners. It uses 25 letters, "*k*" is encoded to the same token than "*c*".
170 |
171 | **Codec** | **Conversions** | **Aliases** | **Comment**
172 | :---: | :---: | --- | ---
173 | `tap` | text <-> tap/knock encoded text | `knock`, `tap-code` | uses a large Unicode whitespace as a token separator ; Python 3 only
174 |
175 | ```python
176 | >>> codext.encode("this is a test", "tap")
177 | '.... ....⠀.. ...⠀.. ....⠀.... ...⠀ ⠀.. ....⠀.... ...⠀ ⠀. .⠀ ⠀.... ....⠀. .....⠀.... ...⠀.... ....'
178 | >>> codext.decode(".... ....⠀.. ...⠀.. ....⠀.... ...⠀ ⠀.. ....⠀.... ...⠀ ⠀. .⠀ ⠀.... ....⠀. .....⠀.... ...⠀.... ....", "knock")
179 | 'this is a test'
180 | ```
181 |
182 | -----
183 |
184 | ### Tom-Tom
185 |
186 | This codec is similar to morse. It converts text into slashes and backslashes.
187 |
188 | **Codec** | **Conversions** | **Aliases** | **Comment**
189 | :---: | :---: | --- | ---
190 | `tomtom` | text <-> tom-tom encoded text | `tom-tom` | uses "`|`" as a separator
191 |
192 | ```python
193 | >>> codext.encode("this is a test", "tom-tom")
194 | '\\\\/\\ /\\\\ /\\\\\\ \\/\\ | /\\\\\\ \\/\\ | / | \\\\/\\ /\\ \\/\\ \\\\/\\'
195 | >>> codext.decode("\\\\/\\ /\\\\ /\\\\\\ \\/\\ | /\\\\\\ \\/\\ | / | \\\\/\\ /\\ \\/\\ \\\\/\\", "tomtom")
196 | 'THIS IS A TEST'
197 | ```
198 |
--------------------------------------------------------------------------------
/docs/pages/enc/others.md:
--------------------------------------------------------------------------------
1 | ## Others
2 |
3 | All kinds of other codecs are categorized in "*Others*".
4 |
5 | -----
6 |
7 | ### DNA
8 |
9 | This implements the 8 methods of ATGC nucleotides following the rule of complementary pairing, according the literature about coding and computing of DNA sequences.
10 |
11 | **Codec** | **Conversions** | **Aliases** | **Comment**
12 | :---: | :---: | --- | ---
13 | `dna` (rule 1) | text <-> DNA-1 | `dna1`, `dna-1`, `dna_1` |
14 | `dna` (rule X) | text <-> DNA-X | ... |
15 | `dna` (rule 8) | text <-> DNA-8 | `dna8`, `dna-8`, `dna_8` |
16 |
17 | ```python
18 | >>> for i in range(8):
19 | print(codext.encode("this is a test", "dna-%d" % (i + 1)))
20 | GTGAGCCAGCCGGTATACAAGCCGGTATACAAGCAGACAAGTGAGCGGGTATGTGA
21 | CTCACGGACGGCCTATAGAACGGCCTATAGAACGACAGAACTCACGCCCTATCTCA
22 | ACAGATTGATTAACGCGTGGATTAACGCGTGGATGAGTGGACAGATAAACGCACAG
23 | AGACATTCATTAAGCGCTCCATTAAGCGCTCCATCACTCCAGACATAAAGCGAGAC
24 | TCTGTAAGTAATTCGCGAGGTAATTCGCGAGGTAGTGAGGTCTGTATTTCGCTCTG
25 | TGTCTAACTAATTGCGCACCTAATTGCGCACCTACTCACCTGTCTATTTGCGTGTC
26 | GAGTGCCTGCCGGATATCTTGCCGGATATCTTGCTGTCTTGAGTGCGGGATAGAGT
27 | CACTCGGTCGGCCATATGTTCGGCCATATGTTCGTCTGTTCACTCGCCCATACACT
28 | >>> codext.decode("GTGAGCCAGCCGGTATACAAGCCGGTATACAAGCAGACAAGTGAGCGGGTATGTGA", "dna-1")
29 | 'this is a test'
30 | ```
31 |
32 | -----
33 |
34 | ### Letter indices
35 |
36 | This encodes consonants and/or vowels with their respective indices. This codec is case insensitive, strips white spaces and only applies to letters.
37 |
38 | **Codec** | **Conversions** | **Aliases** | **Comment**
39 | :---: | :---: | --- | ---
40 | `consonant-indices` | text <-> text with consonant indices | `consonants_indices`, `consonants_index` | while decoding, searches from the longest match, possibly not producing the original input
41 | `vowel-indices` | text <-> text with vowel indices | `vowels_indices`, `vowels_index` |
42 | `consonant-vowel-indices` | text <-> text with consonant and vowel indices | `consonants-vowels_index` | prefixes consonants with `C` and vowels with `V`
43 |
44 | ```python
45 | >>> codext.encode("This is a test", "consonant-index")
46 | '166I15I15A16E1516'
47 | >>> codext.decode("166I15I15A16E1516", "consonant-index")
48 | 'THISISATEST'
49 | ```
50 |
51 | ```python
52 | >>> codext.encode("This is a test", "vowel-index")
53 | 'TH3S3S1T2ST'
54 | >>> codext.decode("TH3S3S1T2ST", "vowel-index")
55 | 'THISISATEST'
56 | ```
57 |
58 | ```python
59 | >>> codext.encode("This is a test", "consonant-vowel-index")
60 | 'C16C6V3C15V3C15V1C16V2C15C16'
61 | >>> codext.decode("C16C6V3C15V3C15V1C16V2C15C16", "consonant-vowel-index")
62 | 'THISISATEST'
63 | ```
64 |
65 | -----
66 |
67 | ### Markdown
68 |
69 | This is only for "encoding" (converting) Markdown to HTML.
70 |
71 | **Codec** | **Conversions** | **Aliases** | **Comment**
72 | :---: | :---: | --- | ---
73 | `markdown` | Markdown --> HTML | `markdown`, `Markdown`, `md` | unidirectional !
74 |
75 | ```python
76 | >>> codext.encode("# Test\nparagraph", "markdown")
77 | '
Test
\n\nparagraph
\n'
78 | ```
79 |
80 |
--------------------------------------------------------------------------------
/docs/pages/enc/stegano.md:
--------------------------------------------------------------------------------
1 | `codext` defines a few steganography-related encodings. While encoding is not really steganography (that is, concealing data within data), the following codecs are worth creating this category as they relate to converting data into something that could mislead the unaware reader.
2 |
3 | -----
4 |
5 | ### Hexagrams (I Ching)
6 |
7 | This uses Base64 and then encodes output characters to [I Ching Hexagrams](https://en.wikipedia.org/wiki/Hexagram_%28I_Ching%29) such that implemented [here](https://github.com/qntm/hexagram-encode).
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `hexagram` | text <-> hexagrams-encoded Base64 | `hexagrams`, `i-ching-hexagrams`, `iching` | Python3 only
12 |
13 | ```python
14 | >>> codext.encode("this is a test", "hexagram")
15 | '䷰䷭䷚䷔䷞䷺䷗䷔䷞䷺䷗䷚䷏䷊䷂䷕䷞䷈䷇☯'
16 | >>> codext.decode("䷰䷭䷚䷔䷞䷺䷗䷔䷞䷺䷗䷚䷏䷊䷂䷕䷞䷈䷇☯", "iching")
17 | 'this is a test'
18 | ```
19 |
20 | -----
21 |
22 | ### Klopf Code
23 |
24 | This is a Polybius code with the trivial alphabetical distribution ("A" -> (1,1), "B" -> (2,1), ...). This can be tested [here](https://gc.de/gc/klopfcode/).
25 |
26 | **Codec** | **Conversions** | **Aliases** | **Comment**
27 | :---: | :---: | --- | ---
28 | `klopf` | text <-> klopf encoded text | `klopfcode` |
29 |
30 | ```python
31 | >>> codext.encode("this is a test", "klopf")
32 | '44324234 4234 11 44513444'
33 | >>> codext.decode("44324234 4234 11 44513444", "klopf")
34 | 'THIS IS A TEST'
35 | ```
36 |
37 | -----
38 |
39 | ### Resistor Color Codes
40 |
41 | This uses the [electronic color code](https://en.wikipedia.org/wiki/Electronic_color_code#Resistor_color-coding) to encode digits, displaying colors in the terminal with ANSI color codes.
42 |
43 | **Codec** | **Conversions** | **Aliases** | **Comment**
44 | :---: | :---: | --- | ---
45 | `resistor` | text <-> resistor colors | `condensator`, `resistors-color`, `resistor_color_code` | visually, it only works in a terminal supporting ANSI color codes
46 |
47 | ```python
48 | >>> codext.encode("1234", "resistor")
49 | '\x1b[48;5;130m \x1b[0;00m\x1b[48;5;1m \x1b[0;00m\x1b[48;5;214m \x1b[0;00m\x1b[48;5;11m \x1b[0;00m'
50 | >>> codext.decode("\x1b[48;5;130m \x1b[0;00m\x1b[48;5;1m \x1b[0;00m\x1b[48;5;214m \x1b[0;00m\x1b[48;5;11m \x1b[0;00m", "condensators_color")
51 | '1234'
52 | ```
53 |
54 | -----
55 |
56 | ### Rick Cipher
57 |
58 | This converts letters to words from Rick Astley's famous song "*Never gonna give you up*".
59 |
60 | **Codec** | **Conversions** | **Aliases** | **Comment**
61 | :---: | :---: | --- | ---
62 | `rick` | text <-> words from Risk's song | `rick-astley`, `rick_cipher`, `rick-astley-cipher` | case-insensitive while encoding
63 |
64 | ```python
65 | >>> codext.encode("Test String", "rick")
66 | 'TELL UP gonna TELL + gonna TELL NEVer You AROUND Gonna'
67 | >>> codext.decode("TELL UP gonna TELL + gonna TELL NEVer You AROUND Gonna", "rick")
68 | 'TEST STRING'
69 | ```
70 |
71 | -----
72 |
73 | ### SMS (T9)
74 |
75 | This codec implements the SMS encoding, also caled T9, that is the conversion from characters to their corresponding phone keystrokes.
76 |
77 | **Codec** | **Conversions** | **Aliases** | **Comment**
78 | :---: | :---: | --- | ---
79 | `sms` | text <-> phone keystrokes | `nokia`, `nokia_3310`, `t9` | uses "`-`" as a separator for encoding, "`-`" or "`_`" or whitespace for decoding
80 |
81 | ```python
82 | >>> codext.encode("this is a test", "sms")
83 | '8-44-444-7777-0-444-7777-0-2-0-8-33-7777-8'
84 | >>> codext.decode("8_44_444_7777_0_444_7777_0_2_0_8_33_7777_8", "nokia")
85 | 'this is a test'
86 | >>> codext.decode("8_44_444_7777_0-444-7777_0-2_0_8_33-7777-8", "t9")
87 | 'this is a test'
88 | ```
89 |
90 | -----
91 |
92 | ### Whitespaces
93 |
94 | This simple encoding replaces zeros and ones of the binary version of the input text with spaces and tabs. It is supported either with its original mapping or with the inverted mapping.
95 |
96 | !!! warning "Encoding, not programming !"
97 |
98 | This should not be confused with the [whitespace esoteric language](https://en.wikipedia.org/wiki/Whitespace_(programming_language)).
99 |
100 | **Codec** | **Conversions** | **Aliases** | **Comment**
101 | :---: | :---: | --- | ---
102 | `whitespace` | text <-> whitespaces and tabs | `whitespaces?-inv(erted)?` | The default encoding uses tabs for zeros and spaces for ones
103 | `whitespace_after_before` | text <-> whitespaces[letter]whitespaces | | This codec encodes characters as new characters with whitespaces before and after according to an equation described in the codec name (e.g. "`whitespace+2*after-3*before`")
104 |
105 | ```python
106 | >>> codext.encode("test", "whitespace")
107 | '\t \t \t\t\t \t\t \t \t \t\t \t \t \t\t'
108 | >>> codext.encode("test", "whitespaces")
109 | '\t \t \t\t\t \t\t \t \t \t\t \t \t \t\t'
110 | >>> codext.encode("test", "whitespaces-inv")
111 | ' \t\t\t \t \t\t \t \t \t\t\t \t\t \t\t\t \t '
112 | >>> codext.decode(" \t\t\t \t \t\t \t \t \t\t\t \t\t \t\t\t \t ", "whitespaces_inverted")
113 | 'test'
114 | ```
115 |
116 | ```python
117 | >>> codext.encode("test", "whitespace+after-before")
118 | ' m \n l \n u \n m '
119 | >>> codext.decode(" m \n l \n u \n m ", "whitespace+after-before")
120 | 'test'
121 | ```
122 |
--------------------------------------------------------------------------------
/docs/pages/enc/web.md:
--------------------------------------------------------------------------------
1 | `codext` implements some common Web-related encodings.
2 |
3 | -----
4 |
5 | ### HTML Entities
6 |
7 | This implements the full list of characters available at [this reference](https://dev.w3.org/html5/html-author/charref).
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `html` | text <-> HTML entities | `html-entity`, `html_entities` | implements entities according to [this reference](https://dev.w3.org/html5/html-author/charref)
12 |
13 | ```python
14 | >>> codext.encode("Тħĩş Їś ą Ţêšŧ", "html")
15 | 'Тħĩş Їś ą Ţêšŧ'
16 | >>> codext.decode("Тħĩş Їś ą Ţêšŧ", "html-entities")
17 | 'Тħĩş Їś ą Ţêšŧ'
18 | ```
19 |
20 | -----
21 |
22 | ### URL
23 |
24 | This handles URL encoding, regardless of the case when decoding and with no error.
25 |
26 | **Codec** | **Conversions** | **Aliases** | **Comment**
27 | :---: | :---: | --- | ---
28 | `url` | text <-> URL encoded text | `url`, `urlencode` |
29 |
30 | ```python
31 | >>> codecs.encode("?=this/is-a_test/../", "url")
32 | '%3F%3Dthis%2Fis-a_test%2F%2E%2E%2F'
33 | >>> codext.decode("%3F%3Dthis%2Fis-a_test%2F%2E%2E%2F", "urlencode")
34 | '?=this/is-a_test/../'
35 | >>> codext.decode("%3f%3dthis%2fis-a_test%2f%2e%2e%2f", "urlencode")
36 | '?=this/is-a_test/../'
37 | ```
38 |
39 |
--------------------------------------------------------------------------------
/docs/pages/guessing.md:
--------------------------------------------------------------------------------
1 | For decoding multiple layers of codecs, `codext` features a guess mode relying on an Artificial Intelligence algorithm, the Breadth-First tree Search (BFS). For many cases, the default parameters are sufficient for guess-decoding things. But it may require parameters tuning.
2 |
3 | -----
4 |
5 | ### Parameters
6 |
7 | BFS stops when a given condition, in the form of a function applied to the decoded string at the current depth, is met. It returns two results: the decoded string and a tuple with the related encoding names in order of application.
8 |
9 | The following parameters are tunable:
10 |
11 | - `stop_func`: can be a function or a regular expression to be matched (automatically converted to a function that uses the `re` module) ; by default, checks if all input characters are printable.
12 | - `min_depth`: the minimum depth for the tree search (allows to avoid a bit of overhead while checking the current decoded output at a depth with the stop function when we are sure it should not be the right result) ; by default 0.
13 | - `max_depth`: the maximum depth for the tree search ; by default 5.
14 | - `codec_categories`: a string indicating a codec [category](#list-codecs) or a list of [category](#list-codecs) strings ; by default, `None`, meaning the whole [categories](#list-codecs) (very slow).
15 | - `found`: a list or tuple of currently found encodings that can be used to save time if the first decoding steps are known ; by default, an empty tuple.
16 |
17 | A simple example for a 1-stage base64-encoded string:
18 |
19 | ```python
20 | >>> codext.guess("VGhpcyBpcyBhIHRlc3Q=")
21 | {('base64',): 'This is a test'}
22 | ```
23 |
24 | An example of a 2-stages base64- then base62-encoded string:
25 |
26 | ```python
27 | >>> codext.guess("CJG3Ix8bVcSRMLOqwDUg28aDsT7")
28 | {('base62',): 'VGhpcyBpcyBhIHRlc3Q='}
29 | ```
30 |
31 | In the second example, we can see that the given encoded string is not decoded as expected. This is the case because the (default) stop condition is too broad and stops if all the characters of the output are printable. If we have a prior knowledge on what we should expect, we can input a simple string or a regex:
32 |
33 | !!! note "Default stop function"
34 |
35 | :::python
36 | >>> codext.stopfunc.default.__name__
37 | '...'
38 |
39 | The output depends on whether you have a language detection backend library installed ; see section [*Natural Language Detection*](#natural-language-detection). If no such library is installed, the default function is "`text`".
40 |
41 | ```python
42 | >>> codext.guess("CJG3Ix8bVcSRMLOqwDUg28aDsT7", "test")
43 | {('base62', 'base64'): 'This is a test'}
44 | ```
45 |
46 | In this example, the string "*test*" is converted to a function that uses this string as regular expression. Instead of a string, we can also pass a function. For this purpose, standard [stop functions](#available-stop-functions) are predefined. So, we can for instance use `stopfunc.lang_en` to stop when we find something that is English. Note that working this way gives lots of false positives if the text is very short like in the example case. That's why the `codec_categories` argument is used to only consider baseX codecs. This is also demonstrated in the next examples.
47 |
48 | ```python
49 | >>> codext.stopfunc._reload_lang("langdetect")
50 | >>> codext.guess("CJG3Ix8bVcSRMLOqwDUg28aDsT7", codext.stopfunc.lang_en, codec_categories="base")
51 | ('This is a test', ('base62', 'base64'))
52 | ```
53 |
54 | If we know the first encoding, we can set this in the `found` parameter to save time:
55 |
56 | ```python
57 | >>> codext.guess("CJG3Ix8bVcSRMLOqwDUg28aDsT7", "test", found=["base62"])
58 | ('This is a test', ('base62', 'base64'))
59 | ```
60 |
61 | If we are sure that only `base` (which is a valid [category](#list-codecs)) encodings are used, we can restrict the tree search using the `codec_categories` parameter to save time:
62 |
63 | ```python
64 | >>> codext.guess("CJG3Ix8bVcSRMLOqwDUg28aDsT7", "test", codec_categories="base")
65 | ('This is a test', ('base62', 'base64'))
66 | ```
67 |
68 | Another example of 2-stages encoded string:
69 |
70 | ```python
71 | >>> codext.guess("LSAuLi4uIC4uIC4uLiAvIC4uIC4uLiAvIC4tIC8gLSAuIC4uLiAt", "test")
72 | ('this is a test', ('base64', 'morse'))
73 | >>> codext.guess("LSAuLi4uIC4uIC4uLiAvIC4uIC4uLiAvIC4tIC8gLSAuIC4uLiAt", "test", codec_categories=["base", "language"])
74 | ('this is a test', ('base64', 'morse'))
75 | ```
76 |
77 | When multiple results are expected, `stop` and `show` arguments can be used respectively to avoid stopping while finding a result and to display the intermediate result.
78 |
79 | !!! warning "Computation time"
80 |
81 | Note that, in the very last examples, the first call takes much longer than the second one but requires no knowledge about the possible [categories](#list-codecs) of encodings.
82 |
83 | -----
84 |
85 | ### Available Stop Functions
86 |
87 | A few stop functions are predefined in the `stopfunc` submodule.
88 |
89 | ```python
90 | >>> import codext
91 | >>> dir(codext.stopfunc)
92 | ['LANG_BACKEND', 'LANG_BACKENDS', ..., '_reload_lang', 'default', 'flag', ..., 'printables', 'regex', 'text']
93 | ```
94 |
95 | Currently, the following stop functions are provided:
96 |
97 | - `flag`: searches for the pattern "`[Ff][Ll1][Aa4@][Gg9]`" (either UTF-8 or UTF-16)
98 | - `lang_**`: checks if the given lang is detected (note that it first checks if all characters are text ; see `text` hereafter)
99 | - `printables`: checks that every output character is in the set of printables
100 | - `regex(pattern)`: takes one argument, the regular expression, for checking a string against the given pattern
101 | - `text`: checks for printables and an entropy less than 4.6 (empirically determined)
102 |
103 | A stop function can be used as the second argument of the `guess` function or as a keyword-argument, as shown in the following examples:
104 |
105 | ```python
106 | >>> codext.guess("...", codext.stopfunc.text)
107 | [...]
108 | >>> codext.guess("...", [...], stop_func=codext.stopfunc.text)
109 | [...]
110 | ```
111 |
112 | When a string is given, it is automatically converted to a `regex` stop function.
113 |
114 | ```python
115 | >>> s = codext.encode("pattern testing", "leetspeak")
116 | >>> s
117 | 'p4773rn 73571n9'
118 | >>> stop_func = codext.stopfunc.regex("p[a4@][t7]{2}[e3]rn")
119 | >>> stop_func(s)
120 | True
121 | >>> codext.guess(s, stop_func)
122 | [...]
123 | ```
124 |
125 | Additionally, a simple stop function is predefined for CTF players, matching various declinations of the word *flag*. Alternatively, a pattern can always be used when flags have a particular format.
126 |
127 | ```python
128 | >>> codext.stopfunc.flag("test string")
129 | False
130 | >>> codext.stopfunc.flag("test f1@9")
131 | True
132 | >>> codext.stopfunc.regex(r"^CTF\{.*?\}$")("CTF{098f6bcd4621d373cade4e832627b4f6}")
133 | True
134 | ```
135 |
136 | The particular type of stop function `lang_**` is explained in the [next section](#natural-language-detection).
137 |
138 | -----
139 |
140 | ### Natural Language Detection
141 |
142 | As in many cases, we are trying to decode inputs to readable text, it is necessary to narrow the scope while searching for valid decoded outputs. As matching printables and even text (as defined here before as printables with an entropy of less than 4.6) is too broad for many cases, it may be very useful to apply natural language detection. In `codext`, this is done by relying on Natural Language Processing (NLP) backend libraries, loaded only if they were separately installed.
143 |
144 | Currently, the following backends are supported, in order of precedence (this order was empirically determined by testing):
145 |
146 | - [`langid`](https://github.com/saffsd/langid.py): *Standalone Language Identification (LangID) tool.*
147 | - [`langdetect`](https://github.com/Mimino666/langdetect): *Port of Nakatani Shuyo's language-detection library (version from 03/03/2014) to Python.*
148 | - [`pycld2`](https://github.com/aboSamoor/pycld2): *Python bindings for the Compact Langauge Detect 2 (CLD2).*
149 | - [`cld3`](https://github.com/bsolomon1124/pycld3): *Python bindings to the Compact Language Detector v3 (CLD3).*
150 | - [`textblob`](https://github.com/sloria/TextBlob): *Python (2 and 3) library for processing textual data.*
151 |
152 | The way NLP is used is to check that these libraries exist and to take the first one by default. This sets up the `stopfunc.default` for the guess mode. This behavior aims to keep language detection as optional and to avoid multiple specific requirements having the same purpose.
153 |
154 | While loaded, the default backend can be switched to another one by using the `_reload_lang` function:
155 |
156 | ```python
157 | >>> codext.stopfunc._reload_lang("pycld2") # this loads pycld2 and attaches lang_** functions to the stopfunc submodule
158 | >>> codext.stopfunc._reload_lang() # this unloads any loaded backend
159 | ```
160 |
161 | Each time a backend is loaded, it gets `lang_**` stop functions attached to the `stopfunc` submodule for each supported language.
162 |
163 | -----
164 |
165 | ### Ranking Heuristic
166 |
167 | !!! warning "Work in progress"
168 |
169 | This part is still in progress and shall be improved with better features and/or using machine learning.
170 |
171 |
--------------------------------------------------------------------------------
/docs/pages/img/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/docs/pages/img/banner.png
--------------------------------------------------------------------------------
/docs/pages/img/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/docs/pages/img/icon.png
--------------------------------------------------------------------------------
/docs/pages/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/docs/pages/img/logo.png
--------------------------------------------------------------------------------
/docs/pages/index.md:
--------------------------------------------------------------------------------
1 | Codext, contraction of "*codecs*" and "*extension*", is a library that gathers many additional encodings for use with [`codecs`](https://docs.python.org/3/library/codecs.html). When imported, it registers new encodings to an extended codecs registry for making the encodings available from the `codecs.(decode|encode|open)` API. It also features [CLI tools](./cli.html) and a [guess mode](./features.html#guess-decode-an-arbitrary-input) for decoding multiple layers of codecs.
2 |
3 | ### Setup
4 |
5 | This library is available on [PyPi](https://pypi.python.org/pypi/codext/) and can be simply installed using Pip:
6 |
7 | ```sh
8 | pip install codext
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/pages/manipulations.md:
--------------------------------------------------------------------------------
1 | `codext` also defines multiple dummy string manipulation/transformation codecs, essentially for use with the CLI tool and for the sake of simplicity.
2 |
3 | -----
4 |
5 | ### Case-related operations
6 |
7 | These transformation functions are simple string transformations, including `str`'s methods.
8 |
9 | **Codec** | **Conversions** | **Aliases** | **Comment**
10 | :---: | :---: | --- | ---
11 | `camelcase` | text --> camel-case text | `camel` | no decoding
12 | `capitalize` | text <-> capitalized text | | decoding "uncapitalizes" the text
13 | `lowercase` | text <-> lowercase text | `lower` | decoding is `uppercase`
14 | `pascalcase` | text --> pascal-case text | `pascal` | no decoding
15 | `screamingsnakecase` | text --> screaming-snake-case text | `screaming-snake`, `screaming_snake_case` | no decoding
16 | `slugify` | text --> slug | `slug`, `kebab`, `kebabcase` | no decoding
17 | `snakecase` | text --> snake-case text | `snake` | no decoding
18 | `swapcase` | text <-> case-swapped text | `swap`, `invert`, `invertcase` |
19 | `title` | text <-> titled text | | decoding "untitles" the text
20 | `uppercase` | text <-> uppercase text | `upper` | decoding is `lowercase`
21 |
22 | Of course, these transformations have no interest while using them in Python as the `str` methods can be called. It can be useful while using `codext` from the terminal (see [*CLI tool*](cli.html)).
23 |
24 | Some simple examples:
25 |
26 | ```sh
27 | $ echo -en "test string" | codext encode swap-case
28 | TEST STRING
29 |
30 | $ echo -en "test string" | codext encode camel_case
31 | testString
32 |
33 | $ echo -en "test string" | codext encode kebab_case
34 | test-string
35 | ```
36 |
37 | -----
38 |
39 | ### Dummy string operations
40 |
41 | These transformation functions are simple string transformations.
42 |
43 | **Codec** | **Conversions** | **Aliases** | **Comment**
44 | :---: | :---: | --- | ---
45 | `replace` | text <-> text with multi-chars replaced | | parametrized with a _string_ and its _replacement_
46 | `reverse` | text <-> reversed text | |
47 | `reverse-words` | text <-> reversed words | | same as `reverse` but not on the whole text, only on the words (text split by whitespace)
48 | `strip-spaces` | text <-> all whitespaces stripped | |
49 | `substitute` | text <-> text with token substituted | |
50 | `tokenize` | text <-> text split in tokens of length N | | parametrized with _N_
51 |
52 | As in the previous section, these transformations have no interest while using them in Python but well while using `codext` from the terminal (see [*CLI tool*](cli.html)).
53 |
54 | A simple example:
55 |
56 | ```sh
57 | $ echo -en "test string" | codext encode reverse-words | codext encode reverse replace-\ _
58 | string_test
59 | ```
60 |
61 | Another example:
62 |
63 | ```sh
64 | $ echo -en "3132333435" | codext encode tokenize-2
65 | 31 32 33 34 35
66 | ```
67 |
68 | Or using encodings chaining:
69 |
70 | ```sh
71 | $ echo -en "test string" | codext encode reverse-words reverse substitute-string/phrase
72 | phrase test
73 | ```
74 |
75 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | jinja2<3.1.0
2 | mkdocs>=1.3.0
3 | mkdocs-bootswatch
4 | mkdocs-material
5 | mkdocs-rtd-dropdown
6 | pymdown-extensions
7 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "setuptools-scm"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [tool.setuptools.dynamic]
6 | version = {attr = "codext.__info__.__version__"}
7 |
8 | [tool.setuptools.packages.find]
9 | where = ["src"]
10 |
11 | [project]
12 | name = "codext"
13 | authors = [
14 | {name="Alexandre D'Hondt", email="alexandre.dhondt@gmail.com"},
15 | ]
16 | description = "Native codecs extension"
17 | license = {file = "LICENSE"}
18 | keywords = ["python", "development", "programming", "codecs", "encodings"]
19 | requires-python = ">=3.8,<4"
20 | classifiers = [
21 | "Development Status :: 5 - Production/Stable",
22 | "Environment :: Console",
23 | "Intended Audience :: Developers",
24 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
25 | "Programming Language :: Python :: 3",
26 | "Topic :: Software Development :: Libraries :: Python Modules",
27 | ]
28 | dependencies = [
29 | "crypt-r; python_version >= '3.13'",
30 | "markdown2>=2.4.0",
31 | ]
32 | dynamic = ["version"]
33 |
34 | [project.readme]
35 | file = "README.md"
36 | content-type = "text/markdown"
37 |
38 | [project.urls]
39 | documentation = "https://python-codext.readthedocs.io/en/latest/?badge=latest"
40 | homepage = "https://github.com/dhondta/python-codext"
41 | issues = "https://github.com/dhondta/python-codext/issues"
42 | repository = "https://github.com/dhondta/python-codext"
43 |
44 | [project.scripts]
45 | base1 = "codext.base.baseN:main1"
46 | base2 = "codext.base.baseN:main2"
47 | base3 = "codext.base.baseN:main3"
48 | base4 = "codext.base.baseN:main4"
49 | base8 = "codext.base.baseN:main8"
50 | base10 = "codext.base.baseN:main10"
51 | base16 = "codext.base.baseN:main16"
52 | base26 = "codext.base.baseN:main26"
53 | base32 = "codext.base.baseN:main32"
54 | base32-hex = "codext.base.baseN:main32hex"
55 | base32-geohash = "codext.base.baseN:main32geo"
56 | base32-crockford = "codext.base.baseN:main32crk"
57 | base32-z = "codext.base.baseN:mainz32"
58 | base36 = "codext.base.baseN:main36"
59 | base45 = "codext.base.base45:main"
60 | base58-bitcoin = "codext.base.baseN:main58bc"
61 | base58-ripple = "codext.base.baseN:main58rp"
62 | base58-flickr = "codext.base.baseN:main58fl"
63 | base62 = "codext.base.baseN:main62"
64 | base63 = "codext.base.baseN:main63"
65 | base64 = "codext.base.baseN:main64"
66 | base64-url = "codext.base.baseN:main64url"
67 | base67 = "codext.base.baseN:main67"
68 | base85 = "codext.base.base85:main85"
69 | base85-adobe = "codext.base.base85:main85adobe"
70 | base85-xbtoa = "codext.base.base85:main85xbtoa"
71 | base85-ipv6 = "codext.base.base85:main85rfc1924"
72 | base85-xml = "codext.base.base85:main85xml"
73 | base85-zeromq = "codext.base.base85:main85zeromq"
74 | base91 = "codext.base.base91:main91"
75 | base100 = "codext.base.base100:main100"
76 | base122 = "codext.base.base122:main122"
77 | codext = "codext.__init__:main"
78 | unbase = "codext.base.__init__:main"
79 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | python_paths = src
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | markdown2>=2.4.0
2 |
--------------------------------------------------------------------------------
/src/codext/VERSION.txt:
--------------------------------------------------------------------------------
1 | 1.15.5
2 |
--------------------------------------------------------------------------------
/src/codext/__info__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Codext package information.
3 |
4 | """
5 | import os
6 | from datetime import datetime
7 |
8 | __author__ = "Alexandre D'Hondt"
9 | __copyright__ = "© 2019-{} A. D'Hondt".format(datetime.now().year)
10 | __email__ = "alexandre.dhondt@gmail.com"
11 | __license__ = "GPLv3 (https://www.gnu.org/licenses/gpl-3.0.fr.html)"
12 | __source__ = "https://github.com/dhondta/python-codext"
13 |
14 | with open(os.path.join(os.path.dirname(__file__), "VERSION.txt")) as f:
15 | __version__ = f.read().strip()
16 |
17 |
--------------------------------------------------------------------------------
/src/codext/base/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from argparse import ArgumentParser, RawTextHelpFormatter
3 | from types import MethodType
4 |
5 | from .base45 import *
6 | from .base85 import *
7 | from .base91 import *
8 | from .base100 import *
9 | from .base122 import *
10 | from .baseN import *
11 | from ..__common__ import *
12 | from ..__info__ import __version__
13 |
14 |
15 | def main():
16 | descr = """Usage: unbase [OPTION]... [FILE]
17 | Decode multi-layer base encoded FILE, or standard input, to standard output.
18 |
19 | With no FILE, or when FILE is -, read standard input.
20 |
21 | Optional arguments:
22 | -E, --extended also consider generic base codecs while guess-decoding
23 | -f, --stop-function set the result chceking function (default: text)
24 | format: printables|text|flag|lang_[bigram]
25 | -M, --max-depth maximum codec search depth (default: 5)
26 | -m, --min-depth minimum codec search depth (default: 0)
27 | -p, --pattern pattern to be matched while searching
28 | -s, --show show the decoding chain
29 |
30 | --help display this help and exit
31 | --verbose show guessing information and steps
32 | --version output version information and exit
33 |
34 | Report unbase bugs to
35 | Full documentation at:
36 | """
37 | parser = ArgumentParser(description=descr, formatter_class=RawTextHelpFormatter, add_help=False)
38 | parser.format_help = MethodType(lambda s: s.description, parser)
39 | group = parser.add_mutually_exclusive_group()
40 | parser.add_argument("file", nargs="?")
41 | parser.add_argument("-E", "--extended", action="store_true")
42 | group.add_argument("-f", "--stop-function", default="text")
43 | parser.add_argument("-M", "--max-depth", type=int, default=10)
44 | parser.add_argument("-m", "--min-depth", type=int, default=0)
45 | group.add_argument("-p", "--pattern")
46 | parser.add_argument("-s", "--show", action="store_true")
47 | parser.add_argument("--help", action="help")
48 | parser.add_argument("--version", action="version")
49 | parser.add_argument("--verbose", action="store_true")
50 | parser.version = "CodExt " + __version__
51 | args = parser.parse_args()
52 | c, e = _input(args.file), [["base%d-generic" % i for i in range(2, 256)], []][args.extended]
53 | c = c.rstrip("\r\n") if isinstance(c, str) else c.rstrip(b"\r\n")
54 | r = codecs.guess(c, stopfunc._validate(args.stop_function), 0, args.max_depth, "base", tuple(e), stop=False,
55 | show=args.verbose, debug=args.verbose)
56 | if len(r) == 0:
57 | print("Could not decode :-(")
58 | return 0
59 | ans = max(r.items(), key=lambda x: len(x[0]))
60 | if args.show:
61 | print(" - ".join(ans[0]))
62 | print(ensure_str(ans[1]))
63 | return 0
64 |
65 |
--------------------------------------------------------------------------------
/src/codext/base/_base2n.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """BaseN functions with N a power of 2.
3 |
4 | """
5 | from math import ceil, log
6 |
7 | from ..__common__ import *
8 | from ..__common__ import _set_exc
9 | from ._base import base, _get_charset
10 |
11 |
12 | _bin = lambda x: bin(x if isinstance(x, int) else ord(x))
13 |
14 |
15 | # base en/decoding functions for N a power of 2
16 | _set_exc("Base2NDecodeError")
17 | _set_exc("Base2NEncodeError")
18 |
19 |
20 | def base2n(charset, pattern=None, name=None, **kwargs):
21 | """ Base-N codec factory for N a power of 2.
22 |
23 | :param charset: charset selection function
24 | :param pattern: matching pattern for the codec name (first capturing group is used as the parameter for selecting
25 | the charset)
26 | :param name: forced encoding name (useful e.g. for zbase32)
27 | """
28 | base(charset, pattern, True, base2n_encode, base2n_decode, name, **kwargs)
29 |
30 |
31 | def base2n_encode(string, charset, errors="strict"):
32 | """ 8-bits characters to base-N encoding for N a power of 2.
33 |
34 | :param string: string to be decoded
35 | :param charset: base-N characters set
36 | :param errors: errors handling marker
37 | """
38 | bs, r, n = "", "", len(charset)
39 | # find the number of bits for the given character set and the quantum
40 | nb_out = int(log(n, 2))
41 | q = nb_out
42 | while q % 8 != 0:
43 | q += nb_out
44 | # iterate over the characters, gathering bits to be mapped to the charset
45 | for i, c in enumerate(b(string)):
46 | bs += "{:0>8}".format(_bin(c)[2:])
47 | while len(bs) >= nb_out:
48 | r += charset[int(bs[:nb_out], 2)]
49 | bs = bs[nb_out:]
50 | if len(bs) > 0:
51 | for i in range(0, len(bs), nb_out):
52 | c = ("{:0<%d}" % nb_out).format(bs[i:i+nb_out])
53 | p = len(c) - len(bs[i:i+nb_out])
54 | r += charset[int(c, 2)]
55 | l = len(r) * nb_out
56 | while l % q != 0:
57 | l += nb_out
58 | return r + int(l / nb_out - len(r)) * "="
59 |
60 |
61 | def base2n_decode(string, charset, errors="strict"):
62 | """ Base-N to 8-bits characters decoding for N a power of 2.
63 |
64 | :param string: string to be decoded
65 | :param charset: base-N characters set
66 | :param errors: errors handling marker
67 | """
68 | bs, r, n = "", "", len(charset)
69 | # particular case: for hex, ensure the right case in the charset ; not that this way, if mixed cases are used, it
70 | # will trigger an error (this is the expected behavior)
71 | if n == 16:
72 | if any(c in string for c in "abcdef"):
73 | charset = charset.lower()
74 | elif any(c in string for c in "ABCDEF"):
75 | charset = charset.upper()
76 | string = re.sub(r"\s", "", string)
77 | # find the number of bits for the given character set and the number of padding characters
78 | nb_in = int(log(n, 2))
79 | n_pad = len(string) - len(string.rstrip("="))
80 | # iterate over the characters, mapping them to the character set and converting the resulting bits to 8-bits chars
81 | for i, c in enumerate(string):
82 | if c == "=":
83 | bs += "0" * nb_in
84 | else:
85 | try:
86 | bs += ("{:0>%d}" % nb_in).format(_bin(charset.index(c))[2:])
87 | except ValueError:
88 | if errors == "strict":
89 | e = Base2NDecodeError("'base%d' codec can't decode character '%s' in position %d" % (n, c, i))
90 | e.__cause__ = e # block exceptions chaining
91 | raise e
92 | elif errors == "replace":
93 | bs += "0" * nb_in
94 | elif errors == "ignore":
95 | continue
96 | else:
97 | raise ValueError("Unsupported error handling {}".format(errors))
98 | if len(bs) > 8:
99 | r += chr(int(bs[:8], 2))
100 | bs = bs[8:]
101 | # if the number of bits is not multiple of 8 bits, it could mean a bad padding
102 | if len(bs) != 8:
103 | if errors == "strict":
104 | raise Base2NDecodeError("Incorrect padding")
105 | elif errors in ["replace", "ignore"]:
106 | pass
107 | else:
108 | raise ValueError("Unsupported error handling {}".format(errors))
109 | r += chr(int(bs, 2))
110 | np = int(ceil(n_pad * nb_in / 8.0))
111 | return r[:-np] if np > 0 else r
112 |
113 |
--------------------------------------------------------------------------------
/src/codext/base/base100.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Base100 Codec - base100 content encoding.
3 |
4 | Note: only works in Python3 ; strongly inspired from https://github.com/MasterGroosha/pybase100
5 |
6 | This codec:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 | """
12 | from ._base import main
13 | from ..__common__ import *
14 |
15 | # no __examples__ ; handled manually in tests/test_base.py
16 |
17 | class Base100DecodeError(ValueError):
18 | __module__ = "builtins"
19 |
20 |
21 | def base100_encode(input, errors="strict"):
22 | input = b(input)
23 | r = [240, 159, 0, 0] * len(input)
24 | for i, c in enumerate(input):
25 | r[4*i+2] = (c + 55) // 64 + 143
26 | r[4*i+3] = (c + 55) % 64 + 128
27 | return bytes(r), len(input)
28 |
29 |
30 | def base100_decode(input, errors="strict"):
31 | input = b(_stripl(input, True, True))
32 | if errors == "ignore":
33 | input = input.replace(b"\n", b"")
34 | if len(input) % 4 != 0:
35 | raise Base100DecodeError("Bad input (length should be multiple of 4)")
36 | r = [None] * (len(input) // 4)
37 | for i, c in enumerate(input):
38 | if i % 4 == 2:
39 | tmp = ((c - 143) * 64) % 256
40 | elif i % 4 == 3:
41 | r[i//4] = (c - 128 + tmp - 55) & 0xff
42 | return bytes(r), len(input)
43 |
44 |
45 | add("base100", base100_encode, base100_decode, r"^(?:base[-_]?100|emoji)$", expansion_factor=1.)
46 | main100 = main(100, "")
47 |
48 |
--------------------------------------------------------------------------------
/src/codext/base/base122.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Base122 Codec - base122 content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ._base import main
11 | from ..__common__ import *
12 |
13 |
14 | __examples__ = {
15 | 'enc(base122|base-122)': {
16 | 'this is a test': ":\x1aʗ\x19\x01Rs\x10\x18$\x07#\x15ft",
17 | b'This is another longer test string with d1g1t5 and sp3c141 characters !\n': \
18 | b"*\x1a\xca\x97\x19\x01Rs\x10\x18-f{QPe9\x08\xcb\x86{9Ne9\x08\x0eF+Mh 9]\x0e\xd3\x8b"
19 | b"9N ;Z.FA\x01H13L.C)\x01Bn2\x08\x0e7\x01MF1\x1a\x0c$\x06\x1b!Br0XnF+If \x10B@"
20 | },
21 | 'enc-dec(base_122)': ["@random"],
22 | }
23 |
24 |
25 | _BAD = [0, 10, 13, 34, 38, 92]
26 | _i = lambda c: c if isinstance(c, int) else ord(c)
27 |
28 |
29 | # inspired from: https://github.com/kevinAlbs/Base122/blob/master/base122.js
30 | def base122_encode(input, errors="strict"):
31 | idx, bit, r, l = 0, 0, [], len(input)
32 |
33 | def _get_7bits(idx, bit):
34 | if idx >= l:
35 | return idx, bit, False
36 | B1 = _i(input[idx])
37 | p1 = (((254 >> bit) & B1) << bit) >> 1
38 | bit += 7
39 | if bit < 8:
40 | return idx, bit, p1
41 | bit -= 8
42 | idx += 1
43 | if idx >= l:
44 | return idx, bit, p1
45 | B2 = _i(input[idx])
46 | p2 = (((65280 >> bit) & B2) & 255) >> (8 - bit)
47 | return idx, bit, (p1 | p2)
48 |
49 | while True:
50 | if idx >= l:
51 | break
52 | # get seven bits of input data
53 | idx, bit, B = _get_7bits(idx, bit)
54 | # check for illegal chars
55 | try:
56 | bad_idx = _BAD.index(B)
57 | except ValueError:
58 | r.append(B)
59 | continue
60 | idx, bit, nB = _get_7bits(idx, bit)
61 | if nB is False:
62 | nB, bad_idx = B, 7
63 | B1, B2 = 194, 128
64 | B1 |= (7 & bad_idx) << 2
65 | B1 |= int((nB & 64) > 0)
66 | B2 |= nB & 63
67 | r.extend([B1, B2])
68 | return "".join(map(chr, r)).encode("latin-1"), len(input)
69 |
70 |
71 | # inspired from: https://github.com/kevinAlbs/Base122/blob/master/base122.js
72 | def base122_decode(input, errors="strict"):
73 | currB, bob, r, input = 0, 0, [], list(map(ord, input))
74 |
75 | def _get_7bits(currB, bob, B, decoded):
76 | B <<= 1
77 | currB |= (B % 0x100000000) >> bob
78 | bob += 7
79 | if bob >= 8:
80 | decoded += [currB]
81 | bob -= 8
82 | return (B << (7 - bob)) & 255, bob
83 |
84 | for i in range(len(input)):
85 | if input[i] >= 128:
86 | try:
87 | currB, bob = _get_7bits(currB, bob, _BAD[(input[i] >> 8) & 7], r)
88 | except IndexError:
89 | pass
90 | currB, bob = _get_7bits(currB, bob, input[i] & 127, r)
91 | else:
92 | currB, bob = _get_7bits(currB, bob, input[i], r)
93 | return "".join(map(chr, r)).rstrip("\0"), len(input)
94 |
95 |
96 | add("base122", base122_encode, base122_decode, r"^base[-_]?122$", expansion_factor=1.085)
97 | main122 = main(122, "", wrap=False)
98 |
99 |
--------------------------------------------------------------------------------
/src/codext/base/base45.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Base45 Codec - base45 content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ._base import _get_charset, digits, lower, main, upper
11 | from ..__common__ import *
12 |
13 |
14 | __examples__ = {
15 | 'enc(base45|base-45|base_45)': {'this is a test!': "AWE+EDH44.OEOCC7WE QEX0"},
16 | 'enc(base45-inv|base_45_inv)': {'this is a test!': "K6O+ONREE.YOYMMH6O 0O7A"},
17 | 'dec(base45)': {'BAD STRING\00': None, 'AWE+EDH44.OEOCC7WE QEX000': None},
18 | }
19 | __guess__ = ["base45", "base45-inv"]
20 |
21 |
22 | B45 = {
23 | '': digits + upper + " $%*+-./:",
24 | '[-_]inv(?:erted)?$': upper + digits + " $%*+-./:",
25 | }
26 |
27 |
28 | __chr = lambda c: chr(c >> 8) + chr(c & 0xff) if isinstance(c, int) and 256 <= c <= 65535 else \
29 | chr(c) if isinstance(c, int) else c
30 | __ord = lambda c: ord(c) if not isinstance(c, int) else c
31 |
32 |
33 | def base45_encode(mode):
34 | b45 = _get_charset(B45, mode)
35 | def encode(text, errors="strict"):
36 | t, s = b(text), ""
37 | for i in range(0, len(text), 2):
38 | n = 256 * __ord(t[i])
39 | try:
40 | n += __ord(t[i+1])
41 | except IndexError:
42 | n = __ord(t[i])
43 | s += b45[n % 45] + b45[n // 45]
44 | break
45 | m = n // 45**2
46 | n -= m * 45**2
47 | s += b45[n % 45] + b45[n // 45] + b45[m]
48 | return s, len(text)
49 | return encode
50 |
51 |
52 | def base45_decode(mode):
53 | b45 = {c: i for i, c in enumerate(_get_charset(B45, mode))}
54 | def decode(text, errors="strict"):
55 | t, s = b(text), ""
56 | ehandler = handle_error("base45", errors, decode=True)
57 | for i in range(0, len(text), 3):
58 | try:
59 | n = b45[__chr(t[i])]
60 | except KeyError:
61 | ehandler(__chr(t[i]), i, s)
62 | try:
63 | j = i + 1
64 | n += 45 * b45[__chr(t[j])]
65 | except KeyError:
66 | ehandler(__chr(t[j]), j, s)
67 | except IndexError:
68 | ehandler(__chr(t[i]), i, s)
69 | try:
70 | k = i + 2
71 | n += 45 ** 2 * b45[__chr(t[k])]
72 | except KeyError:
73 | ehandler(__chr(t[k]), k, s)
74 | except IndexError:
75 | s += __chr(n)
76 | continue
77 | s += __chr(n // 256) + __chr(n % 256)
78 | return s, len(text)
79 | return decode
80 |
81 |
82 | add("base45", base45_encode, base45_decode, r"^base[-_]?45(|[-_]inv(?:erted)?)$", expansion_factor=1.5)
83 | main = main(45, "")
84 |
85 |
--------------------------------------------------------------------------------
/src/codext/base/base85.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Base85 Codec - base85 content encoding.
3 |
4 | This is a simple wrapper for adding base64.b85**code to the codecs.
5 |
6 | This codec:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 | """
12 | import base64
13 |
14 | from ._base import _get_charset, digits, lower, main, upper
15 | from ..__common__ import *
16 |
17 |
18 | __examples__ = {
19 | 'enc-dec(base85|z85|base85-ipv6)': ["@random{512,1024,2048}"],
20 | 'enc-dec(base85-btoa|base85-xbtoa)': ["@random{512,1024,2048}"],
21 | 'enc(base85|ascii85)': {'this is a test': "FD,B0+DGm>@3BZ'F*%"},
22 | 'enc(base85-adobe)': {'this is a test': "<~FD,B0+DGm>@3BZ'F*%~>",
23 | 'this is a test\0\0\0\0\0\0': "<~FD,B0+DGm>@3BZ'F*%B^z~>"},
24 | 'enc(z85|base85-z)': {'this is a test': "BzbxfazC)tvixV6B94"},
25 | 'enc(base85-ipv6|base85_rfc1924)': {'this is a test': "bZBXFAZc?TVIXv6b94"},
26 | 'enc(base85_btoa)': {'this is a test': "FD,B0+DGm>@3BZ'F*%B^"},
27 | 'enc(base85_btoa)': {'this\0\0\0\0test': "FD,B0+DGm>@3BZ'F*%B^"},
28 | 'enc(base85_btoa)': {'this is a test\0\0\0\0': "FD,B0+DGm>y@3BZ'F*%B^z"},
29 | 'enc(base85-xbtoa)': {'this is a test': "xbtoa Begin\nFD,B0+DGm>@3BZ'F*%B^\nxbtoa End N 14 e E 4b" \
30 | " S 523 R 1b132e"},
31 | 'dec(base85-xbtoa)': {'xbtoa Begin\nFD,B0+DGm>@3BZ\'F*%B^\nxbtoa End': None,
32 | 'xbtoa Begin\nFD,B0+DGm>@3BZ\'F*%B^\nxbtoa End N 14 e E 4b S 523 R 000bad':
33 | None},
34 | 'enc(base85-xml)': {'this is a test': "bZBXFAZc@TVIXv6b94"},
35 | 'enc(base85|ascii85)': {'this\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0test': "FD,B0zzz!!!\"@ATMq"},
36 | }
37 | __guess__ = ["ascii85", "z85", "base85-ipv6", "base85-xml", "base85-adobe", "base85-xbtoa"]
38 |
39 |
40 | B85 = {
41 | r'(base[-_]?85([-_]ascii)?|ascii85)$': "!\"#$%&'()*+,-./" + digits + ":;<=>?@" + upper + "[\\]^_`" + lower[:21],
42 | r'(z85|base[-_]?85[-_]z(eromq)?)$': digits + lower + upper + ".-:+=^!/*?&<>()[]{}@%$#",
43 | r'base[-_]?85[-_](rfc1924|ipv6)$': digits + upper + lower + "!#$%&()*+-;<=>?@^_`{|}~",
44 | r'base[-_]?85[-_]xml$': digits + upper + lower[:-1] + "!#$()*+,-./:;=?@^`{|}~z_",
45 | }
46 | B85[r'(base[-_]?85[-_]adobe)$'] = B85[r'(base[-_]?85[-_]x?btoa)$'] = B85[r'(base[-_]?85([-_]ascii)?|ascii85)$']
47 | POW85 = [85 ** i for i in range(5)]
48 |
49 |
50 | def __format(text, mode, decode=False, **kwargs):
51 | if "adobe" in mode:
52 | if decode:
53 | if text.startswith("<~") and text.endswith("~>"):
54 | text = text[2:-2]
55 | else:
56 | text = "<~" + text + "~>"
57 | elif "xbtoa" in mode:
58 | sp, ep = "xbtoa [bB]egin\n", "xbtoa [eE]nd"
59 | if decode:
60 | if re.match(r"^xbtoa\s+[bB]egin\n", text) and \
61 | re.search(r"\nxbtoa\s+[eE]nd N \d+{h} E{h} S{h} R{h}\s*$".format(h=" [0-9a-fA-F]+"), text):
62 | text = "".join(text.split("\n")[1:-1]).replace(" ", "")
63 | elif not decode:
64 | l, t = kwargs['length'], "\n".join(text[i:i+78] for i in range(0, len(text), 78))
65 | text = "xbtoa Begin\n%s\nxbtoa End N %d %x E %x S %x R %x" % \
66 | (t, l, l, kwargs['c_xor'], kwargs['c_sum'], kwargs['c_rot'])
67 | return text
68 |
69 |
70 | def __xbtoa_values(text):
71 | try:
72 | hr = "[0-9a-fA-F]+"
73 | return re.search(r"\nxbtoa\s+[eE]nd N (\d+) ({h}) E ({h}) S ({h}) R ({h})\s*$".format(h=hr), text).groups()
74 | except:
75 | raise Base85DecodeError("Bad or missing xbtoa parameters")
76 |
77 |
78 | def base85_encode(mode):
79 | b85 = _get_charset(B85, mode)
80 | def encode(input, errors="strict"):
81 | r, l, kw = "", len(input), {}
82 | if l == 0:
83 | return input, 0
84 | if "xbtoa" in mode:
85 | kw['length'] = l
86 | kw['c_xor'], kw['c_sum'], kw['c_rot'] = 0, 0, 0
87 | n_pad = (4 - l % 4) % 4
88 | for i in range(0, l, 4):
89 | block = input[i:i+4]
90 | if block == "\0\0\0\0" and b85[-3:] == "stu":
91 | r += "z"
92 | if block == "\x20\x20\x20\x20" and "btoa" in mode:
93 | r += "y"
94 | if "xbtoa" in mode:
95 | for c in block:
96 | k = ord(c)
97 | kw['c_xor'] ^= k
98 | kw['c_sum'] += k + 1
99 | kw['c_rot'] <<= 1
100 | if kw['c_rot'] & 0x80000000:
101 | kw['c_rot'] += 1
102 | kw['c_rot'] += k
103 | if block == "\0\0\0\0" and b85[-3:] == "stu" or block == "\x20\x20\x20\x20" and "btoa" in mode:
104 | continue
105 | if len(block) < 4:
106 | block += n_pad * "\0"
107 | n, bl = s2i(block), ""
108 | for _ in range(5):
109 | n, k = divmod(n, 85)
110 | bl = b85[k] + bl
111 | r += bl
112 | if "btoa" not in mode and n_pad:
113 | r = r[:-n_pad]
114 | if b85[-3:] == "stu" and r[-5:] == "!!!!!":
115 | r = r[:-5] + "z"
116 | return __format(r, mode, **kw), l
117 | return encode
118 |
119 |
120 | def base85_decode(mode):
121 | b85 = _get_charset(B85, mode)
122 | def decode(input, errors="strict"):
123 | r, l, i, n_pad = "", len(input), 0, 0
124 | if l == 0:
125 | return input, 0
126 | if "xbtoa" in mode:
127 | v = __xbtoa_values(input)
128 | n_last = int(v[0]) % 4
129 | c_xor, c_sum, c_rot = 0, 0, 0
130 | input = __format(input, mode, True)
131 | ehandler = handle_error("base85", errors, decode=True)
132 | if b85[-3:] == "stu" and input[-1] == "z":
133 | input = input[:-1] + "!!!!!"
134 | l = len(input)
135 | while i < l:
136 | n, incr = 0, 5
137 | if input[i] == "z" and b85[-3:] == "stu":
138 | bl, incr = "\0\0\0\0", 1
139 | elif input[i] == "y" and "btoa" in mode:
140 | bl, incr = "\x20\x20\x20\x20", 1
141 | else:
142 | block = input[i:i+5]
143 | if len(block) < 5:
144 | n_pad = 5 - len(block) % 5
145 | block += n_pad * "\0"
146 | for k, c in enumerate(block[::-1]):
147 | try:
148 | n += (b85.index(c) if c != "\0" else 255) * POW85[k]
149 | except ValueError:
150 | r += ehandler(c, i + k, r)
151 | bl = codecs.decode("{:0>8}".format(hex(n & 0xffffffff)[2:]), "hex")
152 | if "xbtoa" in mode:
153 | if i + 5 == l and n_last > 0:
154 | bl = bl[:n_last]
155 | for c in bl:
156 | k = ord(c)
157 | c_xor ^= k
158 | c_sum += k + 1
159 | c_rot <<= 1
160 | if c_rot & 0x80000000:
161 | c_rot += 1
162 | c_rot += k
163 | r += bl
164 | i += incr
165 | if n_pad > 0:
166 | r = r[:-n_pad]
167 | if "xbtoa" in mode:
168 | chkv = ["%d" % len(r), "%x" % len(r), "%x" % c_xor, "%x" % c_sum, "%x" % c_rot]
169 | if any(v1 != v2 for v1, v2 in zip(v, chkv)) and errors == "strict":
170 | raise Base85ValueError("A check value does not match (%s != %s)" % (str(list(v)).replace("'", ""),
171 | str(chkv).replace("'", "")))
172 | return r, l
173 | return decode
174 |
175 |
176 | add("base85", base85_encode, base85_decode, expansion_factor=lambda f, ename: f if "xbtoa" in ename else 1.25,
177 | pattern=r"^(base[-_]?85(?:|[-_](?:adobe|x?btoa|ipv6|rfc1924|xml|z(?:eromq)?))|z85|ascii85)$",
178 | extra_exceptions=["Base85ValueError"])
179 | main85 = main(85, None)
180 | main85adobe = main(85, None, "adobe")
181 | main85xbtoa = main(85, None, "xbtoa", wrap=False)
182 | main85rfc1924 = main(85, "RFC 1924", "ipv6")
183 | main85xml = main(85, "", "xml")
184 | main85zeromq = main(85, "", "zeromq")
185 |
186 |
--------------------------------------------------------------------------------
/src/codext/base/base91.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Base91 Codec - base91 content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ._base import _get_charset, digits, lower, main, upper
11 | from ..__common__ import *
12 |
13 | # no __examples__ ; handled manually in tests/test_base.py
14 | __guess__ = ["base91", "base91-inv", "base91-alt", "base91-alt-inv"]
15 |
16 |
17 | B91 = {
18 | r'': upper + lower + digits + "!#$%&()*+,./:;<=>?@[]^_`{|}~\"",
19 | r'[-_]inv(erted)?$': digits + upper + lower + "!#$%&()*+,./:;<=>?@[]^_`{|}~\"",
20 | r'[-_]alt(ernate)?$': "!#$%&'()*+,-./" + digits + ":;<=>?@" + upper + "[\\]^_" + lower + "{|}",
21 | r'[-_]alt(ernate)?[-_]inv(erted)?$': "!#$%&'()*+,-./" + upper + ":;<=>?@" + lower + "[\\]^_" + digits + "{|}",
22 | }
23 |
24 |
25 | __chr = lambda c: chr(c) if isinstance(c, int) else c
26 | __ord = lambda c: ord(c) if not isinstance(c, int) else c
27 |
28 |
29 | def base91_encode(mode):
30 | b91 = _get_charset(B91, mode)
31 | def encode(text, errors="strict"):
32 | t, s, bits = b(text), "", ""
33 | if re.search(r'[-_]alt(ernate)?$', mode):
34 | while len(bits) < 13 and t:
35 | bits += "{:08b}".format(__ord(t[0]))
36 | t = t[1:]
37 | while len(bits) > 13 or t:
38 | n = int(bits[:13], 2)
39 | s += b91[n // 91] + b91[n % 91]
40 | bits = bits[13:]
41 | while len(bits) < 13 and t:
42 | bits += "{:08b}".format(__ord(t[0]))
43 | t = t[1:]
44 | if len(bits) > 0:
45 | if len(bits) < 7:
46 | bits += "0" * (6 - len(bits))
47 | s += b91[int(bits, 2)]
48 | else:
49 | bits += "0" * (13 - len(bits))
50 | n = int(bits, 2)
51 | s += b91[n // 91] + b91[n % 91]
52 | else:
53 | for c in t:
54 | bits = bin(__ord(c))[2:].zfill(8) + bits
55 | if len(bits) > 13:
56 | n = int(bits[-13:], 2)
57 | if n > 88:
58 | bits = bits[:-13]
59 | else:
60 | n = int(bits[-14:], 2)
61 | bits = bits[:-14]
62 | s += b91[n % 91] + b91[n // 91]
63 | if len(bits) > 0:
64 | n = int(bits, 2)
65 | s += b91[n % 91]
66 | if len(bits) > 7 or n > 90:
67 | s += b91[n // 91]
68 | return s, len(t)
69 | return encode
70 |
71 |
72 | def base91_decode(mode):
73 | b91 = {c: i for i, c in enumerate(_get_charset(B91, mode))}
74 | def decode(text, errors="strict"):
75 | t, s, bits, alt = b(_stripl(text, True, True)), "", "", re.search(r'[-_]alt(ernate)?$', mode) is not None
76 | ehandler = handle_error("base91", errors, decode=True)
77 | for i in range(0, len(t), 2):
78 | try:
79 | n = b91[__chr(t[i])] * [1, 91][alt]
80 | except KeyError:
81 | ehandler(__chr(t[i]), i, s)
82 | try:
83 | j = i + 1
84 | n += b91[__chr(t[j])] * [91, 1][alt]
85 | except IndexError:
86 | pass
87 | except KeyError:
88 | ehandler(__chr(t[j]), j, s)
89 | if alt:
90 | bits += "{:013b}".format(n)
91 | while 8 <= len(bits):
92 | s += chr(int(bits[0:8], 2))
93 | bits = bits[8:]
94 | else:
95 | bits = bin(n)[2:].zfill([14, 13][n & 8191 > 88]) + bits
96 | while len(bits) > 8:
97 | s += chr(int(bits[-8:], 2))
98 | bits = bits[:-8]
99 | if alt and len(t) % 2 == 1:
100 | bits += "{:06b}".format(b91[__chr(t[-1])])
101 | while 8 <= len(bits):
102 | s += chr(int(bits[:8], 2))
103 | bits = bits[8:]
104 | elif not alt and len(bits) > 0 and not set(bits) == {"0"}:
105 | s += chr(int(bits, 2))
106 | return s.rstrip("\0"), len(t)
107 | return decode
108 |
109 |
110 | add("base91", base91_encode, base91_decode, r"^base[-_]?91((?:|[-_]alt(?:ernate)?)(?:|[-_]inv(?:erted)?)?)$",
111 | entropy=6.5, expansion_factor=1.231)
112 | main91 = main(91, "")
113 |
114 |
--------------------------------------------------------------------------------
/src/codext/base/baseN.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """BaseN Codecs - base content encodings.
3 |
4 | These codecs:
5 | - en/decode strings from str to str
6 | - en/decode strings from bytes to bytes
7 | - decode file content to str (read)
8 | - encode file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 | from ._base import base, base_generic, digits, lower, main, upper
12 | from ._base2n import base2n
13 |
14 |
15 | B1 = {chr(i): chr(i) for i in range(2**8)}
16 | B1[''] = "A"
17 | base(B1, r"^(?:base[-_]?1(|[-_].)|unary)$", guess=[])
18 | main1 = main(1)
19 |
20 |
21 | B2 = {r'': "01", r'[-_]inv(erted)?$': "10"}
22 | base2n(B2, r"^(?:base[-_]?2|bin(?:ary)?)(|[-_]inv(?:erted)?|[-_](?!.*(.).*\2)[a-zA-Z0-9]{2})$", expansion_factor=8.)
23 | main2 = main(2)
24 |
25 |
26 | B3 = {r'': "123", r'[-_]inv(erted)?$': "321"}
27 | base(B3, r"^base[-_]?3(|[-_]inv(?:erted)?|[-_](?!.*(.).*\2)[a-zA-Z0-9]{3})$", expansion_factor=5.)
28 | main3 = main(3)
29 |
30 |
31 | B4 = {r'': "1234", r'[-_]inv(erted)?$': "4321"}
32 | base2n(B4, r"^base[-_]?4(|[-_]inv(?:erted)?|[-_](?!.*(.).*\2)[a-zA-Z0-9]{4})$", expansion_factor=4.)
33 | main4 = main(4)
34 |
35 |
36 | B8 = {r'': "abcdefgh", r'[-_]inv(erted)?$': "hgfedcba"}
37 | base2n(B8, r"^base[-_]?8(|[-_]inv(?:erted)?|[-_](?!.*(.).*\2)[a-zA-Z0-9]{8})$")
38 | main8 = main(8)
39 |
40 |
41 | B10 = {r'': "0123456789"}
42 | base(B10, r"^(?:base[-_]?10|int(?:eger)?|dec(?:imal)?)$")
43 | main10 = main(10)
44 |
45 |
46 | B11 = {r'': "0123456789a", r'[-_]inv(erted)?$': "a0123456789"}
47 | base(B11, r"^base[-_]?11(|[-_]inv(?:erted)?)$")
48 | main11 = main(11)
49 |
50 |
51 | B16 = {'': digits + "ABCDEF", '[-_]inv(erted)?$': "ABCDEF" + digits}
52 | base2n(B16, r"^(?:base[-_]?16|hex)(|[-_]inv(?:erted)?)$", expansion_factor=2.)
53 | main16 = main(16, "RFC 4648")
54 |
55 |
56 | B26 = {'': upper}
57 | base(B26, r"^base[-_]?26$")
58 | main26 = main(26, inv=False)
59 |
60 |
61 | B32 = {
62 | r'': upper + "234567",
63 | r'[-_]?z(?:base32)?$': "ybndrfg8ejkmcpqxot1uwisza345h769",
64 | r'[-_]inv(erted)?$': "234567" + upper,
65 | r'(?:[-_](ext(ended)?)?)?[-_]hex$': digits + upper[:22],
66 | r'[-_]?crockford': digits + "ABCDEFGHJKMNPQRSTVWXYZ",
67 | r'[-_]?geohash': digits + "bcdefghjkmnpqrstuvwxyz",
68 | }
69 | base2n(B32, r"^(?:base[-_]?32(|[-_]inv(?:erted)?|(?:[-_]ext(?:ended)?)?[-_]hex|[-_](?:z|geohash|crockford))|"
70 | r"(zbase32|geohash|crockford))$", padding_char="=",
71 | guess=["base32", "base32-inv", "base32-hex", "base32-geohash", "base32-crockford"])
72 | main32 = main(32, "RFC 4648")
73 | main32hex = main(32, "RFC 4648", "hex", False)
74 | main32geo = main(32, "", "geohash", False)
75 | main32crk = main(32, "", "crockford", False)
76 | mainz32 = main(32, "", "z", False)
77 |
78 |
79 | B36 = {'': digits + upper, '[-_]inv(erted)?$': upper + digits}
80 | base(B36, r"^base[-_]?36(|[-_]inv(?:erted)?)$")
81 | main36 = main(36, "")
82 |
83 |
84 | B58 = {
85 | r'(|[-_]?(bc|bitcoin))$': "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz",
86 | r'[-_]?(rp|ripple)$': "rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz",
87 | r'[-_]?(fl|flickr|short[-]?url|url)$': "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ",
88 | }
89 | base(B58, r"^base[-_]?58(|[-_](bc|bitcoin|rp|ripple|fl|flickr|short[-]?url|url))$",
90 | guess=["base58-bitcoin", "base58-ripple", "base58-flickr"])
91 | main58bc = main(58, "", "bitcoin")
92 | main58rp = main(58, "", "ripple")
93 | main58fl = main(58, "", "flickr")
94 |
95 |
96 | B62 = {'': digits + upper + lower, '[-_]inv(erted)?$': upper + lower + digits}
97 | base(B62, r"^base[-_]?62(|[-_]inv(?:erted)?)$")
98 | main62 = main(62, "")
99 |
100 |
101 | B63 = {'': digits + upper + lower + "_", 'inv': upper + lower + digits + "_"}
102 | base(B63, r"^base[-_]?63(|[-_]inv(?:erted)?)$")
103 | main63 = main(63)
104 |
105 |
106 | B64 = {
107 | r'': upper + lower + digits + "+/",
108 | r'[-_]inv(erted)?$': digits + upper + lower + "+/",
109 | r'[-_]?(file|url)(safe)?$': upper + lower + digits + "-_",
110 | }
111 | base2n(B64, r"^base[-_]?64(|[-_]inv(?:erted)?|[-_]?(?:file|url)(?:safe)?)$", padding_char="=",
112 | guess=["base64", "base64-inv", "base64-url"])
113 | main64 = main(64, "RFC 4648")
114 | main64url = main(64, "RFC 4648 / Base64URL", "url", False)
115 |
116 |
117 | B67 = {
118 | r'': upper + lower + digits + "-_.!~",
119 | r'[-_]inv(erted)?$': lower + upper + digits + "-_.!~",
120 | }
121 | base(B67, r"^base[-_]?67(|[-_]inv(?:erted)?)$")
122 | main67 = main(67)
123 |
124 |
125 | B128 = {r'': "".join(chr(i) for i in range(128))}
126 | base(B128, r"^base[-_]?128$", padding_char="=")
127 | main128 = main(128, None, False, wrap=False)
128 |
129 |
130 | # generic base encodings, to be added after all others as they have the precedence
131 | base_generic()
132 |
133 |
--------------------------------------------------------------------------------
/src/codext/binary/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .baudot import *
3 | from .bcd import *
4 | from .excess3 import *
5 | from .gray import *
6 | from .manchester import *
7 | from .rotate import *
8 |
9 |
--------------------------------------------------------------------------------
/src/codext/binary/bcd.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """BCD Codec - Binary Coded Decimal content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples1__ = {
14 | 'enc(bcd|binary-coded-decimal|binary_coded_decimal)': {
15 | 'This is a test!': "\x08A\x04\x10Q\x15\x03!\x05\x11P2\tp2\x11a\x01\x11Q\x16\x030",
16 | },
17 | 'dec(binary-coded-decimal)': {
18 | '\xaf': None,
19 | '\xff': None,
20 | '\x08A\x04\x10Q\x15\x03!\x05\x11P2\tp2\x11a\x01\x11Q\x16\x030': "This is a test!",
21 | },
22 | }
23 | __examples2__ = {
24 | 'enc(bcd-ext0|bcd_extended_zeros)': {
25 | 'This is a test': "\x00\x08\x04\x01\x00\x04\x01\x00\x05\x01\x01\x05\x00\x03\x02\x01\x00\x05\x01\x01\x05\x00"
26 | "\x03\x02\x00\t\x07\x00\x03\x02\x01\x01\x06\x01\x00\x01\x01\x01\x05\x01\x01\x06\x00",
27 | },
28 | }
29 | __examples3__ = {
30 | 'enc(bcd-ext1|bcd_extended_ones)': {
31 | 'This is a test': "\xf0\xf8\xf4\xf1\xf0\xf4\xf1\xf0\xf5\xf1\xf1\xf5\xf0\xf3\xf2\xf1\xf0\xf5\xf1\xf1\xf5\xf0"
32 | "\xf3\xf2\xf0\xf9\xf7\xf0\xf3\xf2\xf1\xf1\xf6\xf1\xf0\xf1\xf1\xf1\xf5\xf1\xf1\xf6\xf0",
33 | },
34 | }
35 |
36 |
37 | CODE = {str(i): bin(i)[2:].zfill(4) for i in range(10)}
38 |
39 |
40 | def bcd_encode(prefix=""):
41 | def encode(text, errors="strict"):
42 | r, bits = "", prefix
43 | for c in text:
44 | for i in str(ord(c)).zfill(3):
45 | bits += CODE[i]
46 | if len(bits) == 8:
47 | r += chr(int(bits, 2))
48 | bits = prefix
49 | if len(bits) > 0:
50 | r += chr(int(bits + "0000", 2))
51 | return r, len(b(text))
52 | return encode
53 |
54 |
55 | def bcd_decode(prefix=""):
56 | def decode(text, errors="strict"):
57 | code = {v: k for k, v in CODE.items()}
58 | r, d = "", ""
59 | for i, c in enumerate(text):
60 | bin_c = bin(ord(c))[2:].zfill(8)
61 | for k in range(len(prefix), 8, 4):
62 | hb = bin_c[k:k+4]
63 | try:
64 | d += code[hb]
65 | except KeyError:
66 | d += handle_error("bcd", errors, decode=True)(hb, i)
67 | if len(d) == 3:
68 | r += chr(int(d))
69 | d = ""
70 | return r, len(b(text))
71 | return decode
72 |
73 |
74 | add("bcd", bcd_encode(), bcd_decode(), pattern=r"^(?:bcd|binary[-_]coded[-_]decimals?)$", examples=__examples1__,
75 | entropy=lambda e: .45739*e+2.63519, printables_rate=.2)
76 | add("bcd-extended0", bcd_encode("0000"), bcd_decode("0000"), examples=__examples2__, entropy=lambda e: .13584*e+2.07486,
77 | pattern=r"^(?:bcd|binary[-_]coded[-_]decimals?)[-_]ext(?:ended)?(?:[-_]?0|[-_]zeros?)$")
78 | add("bcd-extended1", bcd_encode("1111"), bcd_decode("1111"), examples=__examples3__, entropy=lambda e: .13584*e+2.07486,
79 | pattern=r"^(?:bcd|binary[-_]coded[-_]decimals?)[-_]ext(?:ended)?(?:[-_]?1|[-_]ones?)$")
80 |
81 |
--------------------------------------------------------------------------------
/src/codext/binary/excess3.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Excess-3 Codec - Excess-3 code (aka Stibitz code) content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(excess3|xs-3|stibitz)': {
15 | 'This is a test!': ";t7C\x84H6T8D\x83e<\xa3eD\x944D\x84I6`",
16 | 'This is another test ': ";t7C\x84H6T8D\x83e<\xa4CDDICt4DseD\x944D\x84I6P",
17 | },
18 | 'dec(excess-3|xs3)': {
19 | '\x00': None,
20 | '\xff': None,
21 | ';t7C\x84H6T8D\x83e<\xa3eD\x944D\x84I6`': "This is a test!",
22 | ';t7C\x84H6T8D\x83e<\xa4CDDICt4DseD\x944D\x84I6P': "This is another test ",
23 | },
24 | }
25 |
26 |
27 | CODE = {
28 | '0': "0011", '1': "0100", '2': "0101", '3': "0110", '4': "0111",
29 | '5': "1000", '6': "1001", '7': "1010", '8': "1011", '9': "1100",
30 | }
31 |
32 |
33 | def excess3_encode(text, errors="strict"):
34 | r, bits = "", ""
35 | for c in text:
36 | for i in str(ord(c)).zfill(3):
37 | bits += CODE[i]
38 | if len(bits) == 8:
39 | r += chr(int(bits, 2))
40 | bits = ""
41 | if len(bits) > 0:
42 | r += chr(int(bits + "0000", 2))
43 | return r, len(b(text))
44 |
45 |
46 | def excess3_decode(text, errors="strict"):
47 | code = {v: k for k, v in CODE.items()}
48 | r, d = "", ""
49 | for i, c in enumerate(text):
50 | bin_c = bin(ord(c))[2:].zfill(8)
51 | for k in range(0, 8, 4):
52 | hb = bin_c[k:k+4]
53 | try:
54 | d += code[hb]
55 | except KeyError: # (normal case) occurs when 0000 was used for padding
56 | if i != len(text) - 1 or k != 4 or hb != "0000":
57 | d += handle_error("excess3", errors, decode=True)(hb, i)
58 | if len(d) == 3:
59 | r += chr(int(d))
60 | d = ""
61 | return r, len(b(text))
62 |
63 |
64 | add("excess3", excess3_encode, excess3_decode, pattern=r"^(?:excess\-?3|xs\-?3|stibitz)$", printables_rate=.45)
65 |
66 |
--------------------------------------------------------------------------------
/src/codext/binary/gray.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Gray Codec - gray code content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(gray|reflected-bin|reflected_binary)': {
15 | 'this is a test': "N\\]J0]J0Q0NWJN",
16 | 'THIS IS A TEST': "~lmz0mz0a0~gz~",
17 | },
18 | }
19 |
20 |
21 | ENCMAP = {chr(i): chr(i ^ (i >> 1)) for i in range(256)}
22 |
23 |
24 | add_map("gray", ENCMAP, pattern=r"^(?:gray|reflected[-_]bin(?:ary)?)$", entropy=lambda e: e)
25 |
26 |
--------------------------------------------------------------------------------
/src/codext/binary/manchester.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Manchester Codec - Manchester content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples1__ = {'enc(manchester)': {'This is a test!': "fei\x95i\x96jZYUi\x96jZYUiVYUjeifjZjeYV"}}
14 | __examples2__ = {
15 | 'enc(manchester-inverted|ethernet|ieee802.4)': {
16 | 'This is a test!': "\x99\x9a\x96j\x96i\x95\xa5\xa6\xaa\x96i\x95\xa5\xa6\xaa\x96\xa9\xa6\xaa\x95\x9a\x96\x99"
17 | "\x95\xa5\x95\x9a\xa6\xa9",
18 | },
19 | }
20 |
21 |
22 | def manchester_encode(clock):
23 | def encode(text, errors="strict"):
24 | r = ""
25 | for c in text:
26 | bin_c = bin(ord(c))[2:].zfill(8)
27 | for i in range(0, 8, 4):
28 | r += chr(int("".join(2*bit for bit in bin_c[i:i+4]), 2) ^ clock)
29 | return r, len(b(text))
30 | return encode
31 |
32 |
33 | def manchester_decode(clock):
34 | def decode(text, errors="strict"):
35 | r, bits = "", ""
36 | for c in text:
37 | bin_c = bin(ord(c) ^ clock)[2:].zfill(8)
38 | bits += "".join(bin_c[i] for i in range(0, len(bin_c), 2))
39 | if len(bits) == 8:
40 | r += chr(int(bits, 2))
41 | bits = ""
42 | return r, len(b(text))
43 | return decode
44 |
45 |
46 | add("manchester", manchester_encode(0x55), manchester_decode(0x55), examples=__examples1__, printables_rate=.25,
47 | entropy=lambda e: .17616*e+2.56229)
48 | add("manchester-inverted", manchester_encode(0xaa), manchester_decode(0xaa), examples=__examples2__,
49 | pattern=r"^(?:manchester-inverted|ethernet|ieee802\.4)$", entropy=lambda e: .17616*e+2.56229)
50 |
51 |
--------------------------------------------------------------------------------
/src/codext/binary/rotate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Rotate-Bits Codec - rotate-N-bits content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(rotate-0|rotate-8|rotate-left-8)': None,
15 | 'enc(rotate1|rotate-right-1|rotate_1)': {'This is a test': "*4\xb4\xb9\x10\xb4\xb9\x10\xb0\x10:\xb2\xb9:"},
16 | 'enc(rotate-left-1|rotate_left_1)': {'This is a test': "¨ÐÒæ@Òæ@Â@èÊæè"},
17 | }
18 | __guess__ = ["rotate-%d" % i for i in range(1, 8)] + ["rotate-left-%d" % i for i in range(1, 8)]
19 |
20 |
21 | def _getn(i):
22 | m = 1
23 | if str(i).startswith("left"):
24 | i = i[4:].lstrip("-_")
25 | m = -1
26 | return m * int(i)
27 |
28 |
29 | def _rotaten(text, n=1):
30 | r = ""
31 | for c in ensure_str(text):
32 | b = bin(ord(c))[2:].zfill(8)
33 | r += chr(int(b[-n:] + b[:-n], 2))
34 | return r
35 |
36 |
37 | def rotate_encode(i):
38 | def encode(text, errors="strict"):
39 | return _rotaten(text, _getn(i)), len(text)
40 | return encode
41 |
42 |
43 | def rotate_decode(i):
44 | def decode(text, errors="strict"):
45 | return _rotaten(text, -_getn(i)), len(text)
46 | return decode
47 |
48 |
49 | add("rotate", rotate_encode, rotate_decode, r"rotate(?:[-_]?bits)?[-_]?((?:(?:left|right)[-_]?)?[1-7])$",
50 | transitive=True)
51 |
52 |
--------------------------------------------------------------------------------
/src/codext/common/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .a1z26 import *
3 | from .cases import *
4 | from .dummy import *
5 | from .octal import *
6 | from .ordinal import *
7 |
8 |
--------------------------------------------------------------------------------
/src/codext/common/a1z26.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """A1Z26 Codec - A1Z26 content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from string import ascii_lowercase as lower
11 |
12 | from ..__common__ import *
13 |
14 |
15 | SEP = "-_/|,;:*"
16 |
17 | __examples__ = {
18 | 'enc(a1z26-BAD)': None,
19 | 'dec(a1z26)': {'1-12-123': None},
20 | 'enc(a1z26)': {'test123': None, 'this is a test': "20-8-9-19 9-19 1 20-5-19-20"},
21 | 'enc(a1z26-/)': {'this is a test': "20/8/9/19 9/19 1 20/5/19/20"},
22 | }
23 | __guess__ = ["a1z26", "a1z26_"] + ["a1z26-" + s for s in SEP[2:]]
24 |
25 |
26 | def a1z26_encode(sep):
27 | sep = sep[-1] if len(sep) > 0 else "-"
28 | def encode(text, errors="strict"):
29 | words = []
30 | for word in text.split():
31 | w = []
32 | for k, c in enumerate(word):
33 | try:
34 | w.append(str(lower.index(c.lower()) + 1))
35 | except ValueError:
36 | w.append(handle_error("a1z26", errors)(c, k))
37 | words.append(sep.join(w).strip(sep))
38 | return " ".join(words), len(text)
39 | return encode
40 |
41 |
42 | def a1z26_decode(sep):
43 | sep = sep[-1] if len(sep) > 0 else "-"
44 | def decode(text, errors="strict"):
45 | k, words = 0, []
46 | for word in text.split():
47 | w = ""
48 | for i in word.split(sep):
49 | k += 1
50 | try:
51 | w += lower[int(i)-1]
52 | except IndexError:
53 | w += handle_error("a1z26", errors, decode=True)(str(i), k)
54 | words.append(w)
55 | return " ".join(words), len(text)
56 | return decode
57 |
58 |
59 | add("a1z26", a1z26_encode, a1z26_decode, pattern=r"^a1z26(|[-_]|[-_][/|,;:\*])$", printables_rate=1.)
60 |
61 |
--------------------------------------------------------------------------------
/src/codext/common/cases.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Case Codecs - simple string case manipulations.
3 |
4 | These are case-related codecs for manipulating strings, for use with other codecs in encoding/decoding chains.
5 |
6 | These codecs:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 | """
12 | import re
13 |
14 | from ..__common__ import add
15 |
16 |
17 | pascal = lambda i, e="strict": ("".join(x.capitalize() for x in re.findall(r"[0-9a-z]+", i.lower())), len(i))
18 | add("camelcase", lambda i, e="strict": uncapitalize(pascal(i, e)[0]), None, r"^camel(?:[-_]?case)?$")
19 | add("pascalcase", pascal, None, r"^pascal(?:[-_]?case)?$")
20 |
21 | capitalize = lambda i, e="strict": (i.capitalize(), len(i))
22 | uncapitalize = lambda i, e="strict": (i[0].lower() + i[1:] if len(i) > 0 else "", len(i))
23 | add("capitalize", capitalize, uncapitalize, penalty=.2)
24 |
25 | lowercase, uppercase = lambda i, e="strict": (i.lower(), len(i)), lambda i, e="strict": (i.upper(), len(i))
26 | add("uppercase", uppercase, lowercase, r"^upper(?:case)?$", penalty=.2)
27 | add("lowercase", lowercase, uppercase, r"^lower(?:case)?$", penalty=.2)
28 |
29 | slugify = lambda i, e="strict", d="-": (re.sub(r"[^0-9a-z]+", d, i.lower()).strip(d), len(i))
30 | add("slugify", lambda i, e="strict": slugify(i, e), None, r"^(?:slug(?:ify)?|(?:dash|kebab)(?:[-_]?case)?)$")
31 | add("snakecase", lambda i, e="strict": slugify(i, e, "_"), None, r"^snake(?:[-_]?case)?$")
32 | add("screamingsnakecase", lambda i, e="strict": slugify(i, e, "_").upper(), None, r"^screaming[-_]snake(?:[-_]?case)?$")
33 |
34 | swapcase = lambda i, e="strict": (i.swapcase(), len(i))
35 | add("swapcase", swapcase, swapcase, r"^(?:(?:flip|swap)(?:[-_]?case)?|invert(?:case)?)$", penalty=.2)
36 |
37 | title = lambda i, e="strict": (i.title(), len(i))
38 | untitle = lambda i, e="strict": (" ".join(w[0].lower() + w[1:] if len(w) > 0 else "" for w in i.split()), len(i))
39 | add("title", title, untitle, penalty=.2)
40 |
41 |
--------------------------------------------------------------------------------
/src/codext/common/dummy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Dummy Codecs - simple string manipulations.
3 |
4 | These are dummy codecs for manipulating strings, for use with other codecs in encoding/decoding chains.
5 |
6 | These codecs:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 | """
12 | import re
13 |
14 | from ..__common__ import *
15 |
16 |
17 | def replace(pair, *args):
18 | def code(input, errors="strict"):
19 | return input.replace(pair[0], pair[1]), len(input)
20 | return code
21 | add("replace", replace, replace, r"^replace[-_]?((?!.*(.).*\2)..)$", guess=None)
22 | # important note: ^
23 | # using "{2}" here instead will break the codec
24 | # this is due to the fact the codext.__common__.generate_string_from_regex DOES NOT handle ASSERT_NOT (?!) and will
25 | # fail to generate a valid instance in lookup(...) when an encoding name is to be generated to get the CodecInfo
26 |
27 |
28 | def substitute(token, replacement):
29 | def code(input, errors="strict"):
30 | return input.replace(token, replacement), len(input)
31 | return code
32 | add("substitute", substitute, substitute, r"^substitute[-_]?(.*?)/(.*?)$", guess=None)
33 |
34 |
35 | reverse = lambda i, e="strict": (i[::-1], len(i))
36 | add("reverse", reverse, reverse)
37 |
38 | _revl = lambda i, wd=False: "".join((" ".join(w[::-1] for w in l.split()) if wd else l[::-1]) \
39 | if not re.match(r"(\r?\n)", l) else l for l in re.split(r"(\r?\n)", i))
40 | line_reverse = lambda i, e="strict": (_revl(i), len(i))
41 | add("reverse-lines", line_reverse, line_reverse, r"^reverse[-_]lines$")
42 | word_reverse = lambda i, e="strict": (_revl(i, True), len(i))
43 | add("reverse-words", word_reverse, word_reverse, r"^reverse[-_]words$")
44 |
45 | strip_spaces = lambda i, e="strict": (i.replace(" ", ""), len(i))
46 | add("strip-spaces", strip_spaces, strip_spaces, guess=None)
47 |
48 | def tokenize(n):
49 | tlen = int(n[8:].lstrip("-_"))
50 | def code(input, errors="strict"):
51 | l = len(input)
52 | if tlen > l:
53 | raise LookupError("unknown encoding: %s" % n)
54 | return " ".join(input[i:i+tlen] for i in range(0, l, tlen)), l
55 | return code
56 | add("tokenize", tokenize, tokenize, r"^(tokenize[-_]?[1-9][0-9]*)$", guess=None)
57 |
58 |
--------------------------------------------------------------------------------
/src/codext/common/octal.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Octal Codec - octal content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples1__ = {
14 | 'enc(octal-spaced|octals_spaced)': {'this is a test': "164 150 151 163 40 151 163 40 141 40 164 145 163 164"},
15 | }
16 | __examples2__ = {
17 | 'enc(octal|octals)': {'this is a test': "164150151163040151163040141040164145163164"},
18 | }
19 |
20 |
21 | oct2 = lambda i: oct(i).lstrip("0").replace("o", "")
22 |
23 | ENCMAP1 = {chr(i): oct2(i) for i in range(256)}
24 | ENCMAP2 = {chr(i): oct2(i).zfill(3) for i in range(256)}
25 |
26 |
27 | add_map("octal-spaced", ENCMAP1, sep=" ", pattern=r"^octals?[-_]spaced$", examples=__examples1__,
28 | entropy=lambda e: .07258*e+2.3739, printables_rate=1.)
29 | add_map("octal", ENCMAP2, pattern=r"^octals?$", examples=__examples2__, entropy=lambda e: .08803*e+2.19498,
30 | printables_rate=1.)
31 |
32 |
--------------------------------------------------------------------------------
/src/codext/common/ordinal.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Ordinal Codec - ordinal content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples1__ = {
14 | 'enc(ordinal-spaced|ordinals_spaced)': {'this is a test': "116 104 105 115 32 105 115 32 97 32 116 101 115 116"},
15 | }
16 | __examples2__ = {
17 | 'enc(ordinal|ordinals)': {'this is a test': "116104105115032105115032097032116101115116"},
18 | }
19 |
20 |
21 | ENCMAP1 = {chr(i): str(i) for i in range(256)}
22 | ENCMAP2 = {chr(i): str(i).zfill(3) for i in range(256)}
23 |
24 |
25 | add_map("ordinal-spaced", ENCMAP1, sep=" ", pattern=r"^ordinals?[-_]spaced$", examples=__examples1__, entropy=3.,
26 | printables_rate=1.)
27 | add_map("ordinal", ENCMAP2, pattern=r"^ordinals?$", examples=__examples2__, entropy=3., printables_rate=1.)
28 |
29 |
--------------------------------------------------------------------------------
/src/codext/compressions/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .gzipp import *
3 | from .lz77 import *
4 | from .lz78 import *
5 | from .pkzip import *
6 |
7 |
8 | for e in list_encodings("compression"):
9 | ci = lookup(e, False)
10 | ci.parameters['scoring']['entropy'] = 7.9
11 | ci.parameters['scoring']['expansion_factor'] = lambda f: f
12 |
13 |
--------------------------------------------------------------------------------
/src/codext/compressions/gzipp.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Gzip Codec - gzip content compression.
3 |
4 | NB: Not an encoding properly speaking.
5 |
6 | This codec:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 | """
12 | import zlib
13 | from gzip import GzipFile
14 |
15 | from ..__common__ import *
16 |
17 |
18 | __examples__ = {'enc-dec(gzip)': ["test", "This is a test", "@random{512,1024,2048}"]}
19 |
20 |
21 | def gzip_compress(text, errors="strict"):
22 | out = BytesIO()
23 | with GzipFile(fileobj=out, mode="wb") as f:
24 | f.write(b(text))
25 | return out.getvalue(), len(text)
26 |
27 |
28 | def gzip_decompress(data, errors="strict"):
29 | # then try decompressing considering the file signature
30 | try:
31 | with GzipFile(fileobj=BytesIO(b(data)), mode="rb") as f:
32 | r = f.read()
33 | except:
34 | pass
35 | # try decompressing without considering the file signature
36 | try:
37 | r = zlib.decompress(b(data), 16 + zlib.MAX_WBITS)
38 | except:
39 | return handle_error("gzip", errors, decode=True)(data[0], 0) if len(data) > 0 else "", len(data)
40 | return r, len(r)
41 |
42 |
43 | add("gzip", gzip_compress, gzip_decompress)
44 |
45 |
--------------------------------------------------------------------------------
/src/codext/compressions/lz77.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """LZ77 Codec - Lempel-Ziv 1977 compression algorithm.
3 |
4 | NB: Not an encoding properly speaking.
5 |
6 | This codec:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 |
12 | Inspired from: https://github.com/manassra/LZ77-Compressor
13 | """
14 | from ..__common__ import *
15 |
16 |
17 | __examples__ = {'enc-dec(lz77)': ["test", "This is a test", "@random{512,1024,2048}"]}
18 |
19 |
20 | _B2b = lambda B: bin(B if isinstance(B, int) else ord(B))[2:].zfill(8)
21 | _b2B = lambda bt: "".join(chr(int(bt[i:i+8], 2)) for i in range(0, len(bt), 8))
22 | WINDOW_SIZE = 20
23 |
24 |
25 | def _find_longest_match(data, pos):
26 | """ Finds the longest match to a substring starting at the current position (pos) in the lookahead buffer from
27 | the history window. """
28 | eob, bmd, bml = min(pos + 15, len(data) + 1), -1, -1
29 | for j in range(pos + 2, eob):
30 | start = max(0, pos - WINDOW_SIZE)
31 | substr = data[pos:j]
32 | l = len(substr)
33 | for i in range(start, pos):
34 | n, r = l // (pos - i), l % (pos - i)
35 | if data[i:pos] * n + data[i:i+r] == substr and l > bml:
36 | bmd, bml = pos - i, l
37 | if bmd > 0 and bml > 0:
38 | return bmd, bml
39 |
40 |
41 | def lz77_compress(input, errors="strict"):
42 | """ Compresses the given data by applying LZ77 compression algorithm. """
43 | i, l, bits = 0, len(input), ""
44 | while i < l:
45 | try:
46 | bmd, bml = _find_longest_match(input, i)
47 | bits += "1" + _B2b(bmd >> 4) + _B2b(((bmd & 0xf) << 4) | bml)
48 | i += bml
49 | except TypeError:
50 | bits += "0" + _B2b(input[i])
51 | i += 1
52 | bits += "0" * ((8 - (len(bits) % 8)) % 8)
53 | return _b2B(bits), l
54 |
55 |
56 | def lz77_decompress(input, errors="strict"):
57 | """ Decompresses the given data. """
58 | out, d = "", "".join(_B2b(c) for c in input)
59 | while len(d) >= 9:
60 | flag, d = d[0], d[1:]
61 | if flag == "0":
62 | out += _b2B(d[:8])
63 | d = d[8:]
64 | else:
65 | B1, B2 = int(d[:8], 2), int(d[8:16], 2)
66 | d = d[16:]
67 | dist = (B1 << 4) | (B2 >> 4)
68 | for i in range(B2 & 0xf):
69 | out += out[-dist]
70 | return out, len(out)
71 |
72 |
73 | add("lz77", lz77_compress, lz77_decompress)
74 |
75 |
--------------------------------------------------------------------------------
/src/codext/compressions/lz78.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """LZ78 Codec - Lempel-Ziv 1978 compression algorithm.
3 |
4 | NB: Not an encoding properly speaking.
5 |
6 | This codec:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 |
12 | Inspired from: https://github.com/mileswatson/lempel-ziv-compression
13 | """
14 | from ..__common__ import *
15 |
16 |
17 | __examples__ = {'enc-dec(lz78)': ["test", "This is a test", "@random{512,1024,2048}"]}
18 |
19 |
20 | def lz78_compress(input, errors="strict"):
21 | """ Compresses the given data by applying LZ78 compression algorithm. """
22 | data = tuple(c if isinstance(c, int) else ord(c) for c in input)
23 | if len(data) == 0:
24 | return "", 0
25 | out = (data[0], )
26 | d = {tuple(): (0, ), (data[0], ): (1, )}
27 | a, b, ctr = 1, 1, [2]
28 | while b < len(data):
29 | if not data[a:b+1] in d:
30 | w = d[data[a:b]]
31 | out += w + tuple(0 for i in range(len(ctr) - len(w) - int(sum(ctr) == 1))) + (data[b], )
32 | d[data[a:b+1]] = tuple(ctr)
33 | for i in range(len(ctr)):
34 | ctr[i] += 1
35 | if ctr[i] != 256:
36 | break
37 | else:
38 | ctr[i] = 0
39 | if i == len(ctr) - 1:
40 | ctr.append(1)
41 | a = b + 1
42 | b += 1
43 | if data[a:b] in d and a != b:
44 | w = tuple(d[data[a:b]])
45 | out += w + tuple(0 for i in range(len(ctr) - len(w)))
46 | return "".join(chr(i) for i in out), len(out)
47 |
48 |
49 | def lz78_decompress(input, errors="strict"):
50 | """ Decompresses the given data. """
51 | data = tuple(c if isinstance(c, int) else ord(c) for c in input)
52 | if len(data) == 0:
53 | return "", 0
54 | out = (data[0], )
55 | l = [tuple(), out]
56 | a, b, c, i, char = 1, 1, 256, 0, False
57 | try:
58 | while a < len(data):
59 | if char:
60 | out += (data[a], )
61 | l.append(l[i] + (data[a], ))
62 | char = False
63 | a += 1
64 | if len(l) == c + 1:
65 | b += 1
66 | c *= 256
67 | else:
68 | i, m = 0, 1
69 | for j in range(b):
70 | i += data[a + j] * m
71 | m *= 256
72 | out += l[i]
73 | a += b
74 | char = True
75 | except:
76 | return handle_error("lz78", errors, decode=True)(chr(data[a]), a), len(input)
77 | return "".join(chr(i) for i in out), len(out)
78 |
79 |
80 | add("lz78", lz78_compress, lz78_decompress)
81 |
82 |
--------------------------------------------------------------------------------
/src/codext/compressions/pkzip.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Pkzip Codec - pkzip content compression.
3 |
4 | NB: Not an encoding properly speaking.
5 |
6 | This codec:
7 | - en/decodes strings from str to str
8 | - en/decodes strings from bytes to bytes
9 | - decodes file content to str (read)
10 | - encodes file content from str to bytes (write)
11 | """
12 | import zipfile
13 |
14 | from ..__common__ import *
15 |
16 |
17 | _str = ["test", "This is a test", "@random{512,1024,2048}"]
18 | __examples1__ = {'enc-dec(pkzip-deflate|deflate)': _str}
19 | __examples2__ = {'enc-dec(pkzip_bz2|bzip2)': _str}
20 | __examples3__ = {'enc-dec(pkzip-lzma|lzma)': _str}
21 |
22 |
23 | NULL = {
24 | 8: b"\x03\x00",
25 | 12: b"BZh9\x17rE8P\x90\x00\x00\x00\x00",
26 | 14: b"\t\x04\x05\x00]\x00\x00\x80\x00\x00\x83\xff\xfb\xff\xff\xc0\x00\x00\x00",
27 | }
28 |
29 |
30 | def pkzip_encode(compression_type):
31 | def _encode(text, errors="strict"):
32 | c = zipfile._get_compressor(compression_type)
33 | return c.compress(b(text)) + c.flush(), len(text)
34 | return _encode
35 |
36 |
37 | def pkzip_decode(compression_type, name):
38 | def _decode(data, errors="strict"):
39 | d = zipfile._get_decompressor(compression_type)
40 | r = d.decompress(b(data))
41 | if len(r) == 0 and b(data) != NULL[compression_type]:
42 | return handle_error(name, errors, decode=True)(data[0], 0) if len(data) > 0 else "", len(data)
43 | return r, len(r)
44 | return _decode
45 |
46 |
47 | add("pkzip_deflate", pkzip_encode(8), pkzip_decode(8, "deflate"), r"(?:(?:pk)?zip[-_])?deflate",
48 | examples=__examples1__, guess=["deflate"])
49 |
50 | add("pkzip_bzip2", pkzip_encode(12), pkzip_decode(12, "bzip2"), r"(?:(?:pk)?zip[-_])?bz(?:ip)?2",
51 | examples=__examples2__, guess=["bz2"])
52 |
53 | add("pkzip_lzma", pkzip_encode(14), pkzip_decode(14, "lzma"), r"(?:(?:pk)?zip[-_])?lzma",
54 | examples=__examples3__, guess=["lzma"])
55 |
56 |
--------------------------------------------------------------------------------
/src/codext/crypto/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .affine import *
3 | from .atbash import *
4 | from .bacon import *
5 | from .barbie import *
6 | from .citrix import *
7 | from .railfence import *
8 | from .rot import *
9 | from .scytale import *
10 | from .shift import *
11 | from .xor import *
12 |
13 |
--------------------------------------------------------------------------------
/src/codext/crypto/affine.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Affine Cipher Codec - affine content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 |
10 | Reference: https://crypto.interactive-maths.com/affine-cipher.html
11 | """
12 | from ..__common__ import *
13 |
14 |
15 | __guess__ = []
16 |
17 |
18 | def encmap_factory(mask=None):
19 | mask = mask or "?l?u?s-1,2"
20 | mask, key = mask.split("-")
21 | a, b = map(int, key.split(","))
22 | alphabet = get_alphabet_from_mask(mask)
23 | encmap = {c: alphabet[(a * alphabet.index(c) + b) % len(alphabet)] for c in alphabet}
24 | if len(set(encmap.keys())) != len(set(encmap.values())):
25 | raise LookupError("Bad parameter for encoding 'affine': {}, {}".format(a, b))
26 | if ' ' not in encmap.keys():
27 | encmap[' '] = " "
28 | return encmap
29 |
30 |
31 | add_map("affine", encmap_factory, pattern=r"^affine(?:[-_]cipher)?(?:[-_](.+?\-\d+\,\d+))?$")
32 |
33 |
--------------------------------------------------------------------------------
/src/codext/crypto/atbash.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Atbash Cipher Codec - atbash-based content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 |
10 | Reference: https://crypto.interactive-maths.com/atbash-cipher.html
11 | """
12 | from ..__common__ import *
13 |
14 |
15 | __guess__ = ["atbash"]
16 |
17 |
18 | def encmap_factory(mask=None):
19 | mask = mask or "?u?l"
20 | # [...] enclosure causes the mask to be handled as a whole
21 | if mask[0] == "[" and mask[-1] == "]":
22 | alphabet = get_alphabet_from_mask(mask[1:-1])
23 | return {k: v for k, v in zip(alphabet, alphabet[::-1])}
24 | # not enclosing the whole mask means that each group is to be considered separately
25 | else:
26 | m = {}
27 | for group in re.findall(r"(\?.|[^?]+)", mask):
28 | alphabet = get_alphabet_from_mask(group)
29 | m.update({k: v for k, v in zip(alphabet, alphabet[::-1])})
30 | return m
31 |
32 |
33 | add_map("atbash", encmap_factory, no_error=True, pattern=r"atbash(?:[-_]cipher)?(?:[-_](.+))?$")
34 |
35 |
--------------------------------------------------------------------------------
/src/codext/crypto/bacon.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Bacon's Cipher Codec - bacon content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 |
10 | Reference: https://en.wikipedia.org/wiki/Bacon%27s_cipher
11 | """
12 | from ..__common__ import *
13 |
14 |
15 | __examples__ = {
16 | 'enc(bacon|bacon_cipher|baconian-cipher|bacon-ab|bacon_AB)': {
17 | 'this is a test': "baabaaabbbabaaabaaab abaaabaaab aaaaa baabaaabaabaaabbaaba",
18 | },
19 | 'enc(bacon-01|bacon_01)': {
20 | 'this is a test': "10010001110100010001 0100010001 00000 10010001001000110010",
21 | },
22 | }
23 | __guess__ = {"bacon", "bacon-ba", "bacon-01", "bacon-10"}
24 |
25 |
26 | ENCMAP = {
27 | 'A': "aaaaa", 'B': "aaaab", 'C': "aaaba", 'D': "aaabb", 'E': "aabaa", 'F': "aabab", 'G': "aabba", 'H': "aabbb",
28 | 'I': "abaaa", 'J': "abaaa", 'K': "abaab", 'L': "ababa", 'M': "ababb", 'N': "abbaa", 'O': "abbab", 'P': "abbba",
29 | 'Q': "abbbb", 'R': "baaaa", 'S': "baaab", 'T': "baaba", 'U': "baabb", 'V': "baabb", 'W': "babaa", 'X': "babab",
30 | 'Y': "babba", 'Z': "babbb", ' ': " ",
31 | }
32 |
33 |
34 | add_map("bacon", ENCMAP, ignore_case="both", pattern=r"bacon(?:(?:ian)?[-_]cipher)?([\-_].{2})?$", expansion_factor=5.,
35 | printables_rate=1.)
36 |
37 |
--------------------------------------------------------------------------------
/src/codext/crypto/barbie.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Barbie typewriter Codec - barbie content encoding.
3 |
4 | While Barbie typewriter is more a cipher, its very limited key size of 2 bits makes it easy to turn into four variants
5 | of the same encoding.
6 |
7 | This codec:
8 | - en/decodes strings from str to str
9 | - en/decodes strings from bytes to bytes
10 | - decodes file content to str (read)
11 | - encodes file content from str to bytes (write)
12 |
13 | Reference: http://www.cryptomuseum.com/crypto/mehano/barbie/
14 | """
15 | from ..__common__ import *
16 |
17 |
18 | __examples__ = {
19 | 'enc(barbie1)': {'\r': None},
20 | 'enc(barbie1|barbie_1|barbie-1)': {'this is a test': "hstf tf i hafh"},
21 | 'enc(barbie2|barbie_2|barbie-2)': {'this is a test': "sfhp hp t sips"},
22 | 'enc(barbie3|barbie_3|barbie-3)': {'this is a test': "fpsu su h ftuf"},
23 | 'enc(barbie4|barbie_4|barbie-4)': {'this is a test': "pufq fq s phqp"},
24 | }
25 | __guess__ = ["barbie-%d" % i for i in range(1, 5)]
26 |
27 |
28 | STD = [
29 | "abcdefghijklmnopqrstuvABCDEFGHIJKLMNOPQRSTUVWXYZ0123456 \n\t",
30 | "icolapxstvybjeruknfhqg>FAUTCYOLVJDZINQKSEHG<.1PB5234067 \n\t",
31 | "torbiudfhgzcvanqyepskxRC>GHAPNDQIUXSPNRKLG1XYCUDV ¢ £ § €",
37 | "; d z w 8 9 - ¨ _ & m @ : \" * ( # W M § ^ , ¢ / ? ! ) % X \' R + € £ =",
38 | "¢ l w ; 9 - ¨ § ) \" j ? , m # * @ . Z £ ! W + ^ / & ( : 1 _ S % = € \'",
39 | "+ b ; ¢ - ¨ § £ ( m v / W j @ # ? M B € & . % ! ^ \" * , 2 ) E : \' = _",
40 | "% c ¢ + ¨ § £ € * j g ^ . v ? @ / Z F = \" N : & ! m # W 3 ( T , _ \' )",
41 | ]
42 | ENCMAP = []
43 | for i in range(4):
44 | encmap = {}
45 | for j, c in enumerate(STD[0]):
46 | encmap[c] = STD[i+1][j]
47 | spec = SPEC[i+1].split()
48 | for j, c in enumerate(SPEC[0].split()):
49 | encmap[c] = spec[j]
50 | ENCMAP.append(encmap)
51 |
52 |
53 | add_map("barbie", ENCMAP, pattern=r"^barbie[-_]?([1-4])$", printables_rate=lambda pr: .857 * pr)
54 |
55 |
--------------------------------------------------------------------------------
/src/codext/crypto/citrix.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Citrix Codec - citrix password encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 |
10 | Reference: https://crypto.interactive-maths.com/atbash-cipher.html
11 | """
12 | from ..__common__ import *
13 |
14 |
15 | __examples__ = {
16 | 'enc(citrix-ctx0)': None,
17 | 'enc(citrix|citrix-1|citrix_ctx1)': {'this is a test': "NBBMNAAGIDEPJJBMNIFNIMEMJKEL"},
18 | }
19 | __guess__ = ["citrix-ctx1"]
20 |
21 |
22 | _dec = lambda g: ((ord(g[0]) - 0x41) & 0xf) ^ ((((ord(g[1]) - 0x41) & 0xf) << 4) & 0xf0)
23 | _enc = lambda o: chr(((o >> 4) & 0xf) + 0x41) + chr((o & 0xf) + 0x41)
24 |
25 |
26 | def citrix_encode(t):
27 | def encode(text, errors="strict"):
28 | l = len(text)
29 | r, x = "", 0
30 | for c in text:
31 | x = ord(c) ^ 0xa5 ^ x
32 | r += _enc(x)
33 | return r, l
34 | return encode
35 |
36 |
37 | def citrix_decode(t):
38 | def decode(text, errors="strict"):
39 | l = len(text)
40 | text = text[::-1]
41 | r = ""
42 | for i in range(0, l, 2):
43 | x = 0 if i + 2 >= l else _dec(text[i+2:i+4])
44 | x ^= _dec(text[i:i+2]) ^ 0xa5
45 | r += chr(x)
46 | return r[::-1], l
47 | return decode
48 |
49 |
50 | add("citrix", citrix_encode, citrix_decode, r"citrix(|[-_]?(?:ctx)?1)$", entropy=4., printables_rate=1.,
51 | expansion_factor=2.)
52 |
53 |
--------------------------------------------------------------------------------
/src/codext/crypto/railfence.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Rail Fence Cipher Codec - rail fence content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(rail_123|rail-2-123)': {'this is a test': None},
15 | 'enc(railfence|zigzag)': {'this is a test': "t ashsi etist"},
16 | 'enc(rail-5|zigzag_5)': {'this is a test': "tah istsiet s"},
17 | 'enc(rail_5-3|rail_5_3)': {'this is a test': "it sss etiath "},
18 | 'enc(rail-5-3-up|rail_5_3-up)': {'this is a test': "h tiats e ssit"},
19 | 'enc(rail-7-4|rail_7_4)': {'this is a test': "a stiet shsti"},
20 | 'dec(zigzag)': {'': ""},
21 | }
22 | __guess__ = ["railfence-%d" % i for i in range(1, 11)] + ["railfence-%d-up" % i for i in range(1, 11)]
23 |
24 |
25 | def __build(text, rails, offset, up):
26 | l, rail = len(text), offset
27 | # set the starting rail and direction
28 | if up:
29 | dr = -1
30 | rail = rails - offset - 1
31 | else:
32 | dr = 1
33 | # create rails
34 | f = [[None] * l for i in range(rails)]
35 | # now zig-zag between rails
36 | for x in range(l):
37 | f[rail][x] = text[x]
38 | if rail >= rails - 1:
39 | dr = -1
40 | elif rail <= 0:
41 | dr = 1
42 | rail += dr
43 | return f
44 |
45 |
46 | def __check(length, rails, offset):
47 | if rails > length:
48 | raise ParameterError("Bad parameter for encoding 'railfence': rails=%d (should be <= %d)" % (rails, length))
49 | if offset > rails:
50 | raise ParameterError("Bad parameter for encoding 'railfence': offset=%d (should be <= %d)" % (offset, rails))
51 |
52 |
53 | def railfence_encode(rails, offset, up):
54 | rails, offset, up = int(rails or 3), int(offset or 0), up is not None and up != ""
55 | def encode(text, errors="strict"):
56 | r, l = "", len(text)
57 | __check(l, rails, offset)
58 | f = __build(text, rails, offset, up)
59 | for rail in range(rails):
60 | for x in range(l):
61 | if f[rail][x] is not None:
62 | r += f[rail][x]
63 | return r, l
64 | return encode
65 |
66 |
67 | def railfence_decode(rails, offset, up):
68 | rails, offset, up = int(rails or 3), int(offset or 0), up is not None and up != ""
69 | def decode(text, errors="strict"):
70 | # this if block is particularly useful with Python2 ; see codecs.py at line 492 in comparison with codecs.py
71 | # from Python3 at line 501: in Python2, a last block can be read while empty while in Python3 not
72 | # as a consequence, in Python2, an error is triggered as an empty text cannot be decoded with Rail Fence with
73 | # a rails parameter > 0 (see the __check(length, rails, offset)) function
74 | if text == "":
75 | return "", 0
76 | r, i, l = "", 0, len(text)
77 | __check(l, rails, offset)
78 | f = __build("." * len(text), rails, offset, up)
79 | # put the characters in the right place
80 | for rail in range(rails):
81 | for x in range(l):
82 | if f[rail][x] == ".":
83 | f[rail][x] = text[i]
84 | i += 1
85 | # read the characters in the right order
86 | for x in range(l):
87 | for rail in range(rails):
88 | if f[rail][x] is not None:
89 | r += f[rail][x]
90 | return r, len(text)
91 | return decode
92 |
93 |
94 | add("railfence", railfence_encode, railfence_decode,
95 | r"^(?:rail(?:[-_]?fence)?|zigzag)(?:[-_]([1-9]|[1-9]\d+)(?:[-_]([0-9]|[1-9]\d+))?(?:[-_](up))?)?$")
96 |
97 |
--------------------------------------------------------------------------------
/src/codext/crypto/rot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """ROT Codec - rot-with-N-offset content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from string import ascii_lowercase as LC, ascii_uppercase as UC, digits as DIG
11 |
12 | from ..__common__ import *
13 |
14 |
15 | __examples1__ = {
16 | 'enc(rot0|rot--10|rot100)': None,
17 | 'enc(rot1|rot-1|caesar_1)': {'this is a test': "uijt jt b uftu"},
18 | 'enc(rot3|caesar-3)': {'this is a test': "wklv lv d whvw"},
19 | 'enc(rot47)': {'this is a test': "E9:D :D 2 E6DE"},
20 | }
21 | __examples2__ = {
22 | 'enc(prot0|prot--10|prot100)': None,
23 | 'enc(prot1|prog-caesar_1)': {'this is a test': "ujlw oz j eqfh"},
24 | 'enc(prot3|pcaesar-3)': {'this is a test': "wlny qb l gshj"},
25 | }
26 | __examples3__ = {
27 | 'enc(arot0|arot--10|arot100)': None,
28 | 'enc(arot1|alt-caesar_1)': {'this is a test': "ugjr ht b udts"},
29 | 'enc(arot3|acaesar-3)': {'this is a test': "welp fv d wbvq"},
30 | }
31 | __guess1__ = ["rot-%d" % i for i in range(1, 26)] + ["rot-47"]
32 | __guess2__ = ["progressive-rot-%d" % i for i in range(1, 26)] + ["progressive-rot-n%d" % i for i in range(1, 26)]
33 | __guess3__ = ["alternative-rot-%d" % i for i in range(1, 26) if i != 13]
34 |
35 |
36 | ROT47 = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
37 |
38 |
39 | def _rotn(text, n=13, a=(LC, UC), alt=False, prog=False, neg=False):
40 | r = ""
41 | for i, c in enumerate(ensure_str(text)):
42 | found = False
43 | for l in a:
44 | if c in l:
45 | r += l[(l.index(c) + [1, -1][alt and i % 2 == 1] * n + ([1, -1][neg] * i if prog else 0)) % len(l)]
46 | found = True
47 | break
48 | if not found:
49 | r += c
50 | return r
51 |
52 |
53 | def arot_encode(i):
54 | def encode(text, errors="strict"):
55 | return _rotn(ensure_str(text), i, alt=True), len(text)
56 | return encode
57 |
58 |
59 | def arot_decode(i):
60 | def decode(text, errors="strict"):
61 | return _rotn(ensure_str(text), -i, alt=True), len(text)
62 | return decode
63 |
64 |
65 | def rot_encode(i):
66 | def encode(text, errors="strict"):
67 | t = ensure_str(text)
68 | r = _rotn(t, 47, [ROT47]) if i == 47 else _rotn(t, i)
69 | return r, len(r)
70 | return encode
71 |
72 |
73 | def rot_decode(i):
74 | def decode(text, errors="strict"):
75 | t = ensure_str(text)
76 | r = _rotn(t, -47, [ROT47]) if i == 47 else _rotn(t, -i)
77 | return r, len(r)
78 | return decode
79 |
80 |
81 | def prot_encode(n, i):
82 | def encode(text, errors="strict"):
83 | return _rotn(ensure_str(text), i, prog=True, neg=n == "n"), len(text)
84 | return encode
85 |
86 |
87 | def prot_decode(n, i):
88 | def decode(text, errors="strict"):
89 | return _rotn(ensure_str(text), -i, prog=True, neg=n != "n"), len(text)
90 | return decode
91 |
92 |
93 | # note: alternative-rot-13 is equivalent to rot-13, therefore excluded from the regex
94 | add("alternative-rot", arot_encode, arot_decode, r"a(?:lt(?:ernative)?-)?(?:caesar|rot)[-_]?([1-9]|1[0-24-9]|2[0-5])$",
95 | penalty=.2, entropy=lambda e: e, printables_rate=lambda pr: pr, transitive=True, examples=__examples3__,
96 | guess=__guess3__)
97 | add("rot", rot_encode, rot_decode, r"(?:caesar|rot)[-_]?([1-9]|1[0-9]|2[0-5]|47)$", aliases=["caesar"], penalty=.2,
98 | entropy=lambda e: e, printables_rate=lambda pr: pr, transitive=True, examples=__examples1__, guess=__guess1__)
99 | add("progressive-rot", prot_encode, prot_decode, r"p(?:rog(?:ressive)?-)?(?:caesar|rot)[-_]?(n?)([1-9]|1[0-9]|2[0-5])$",
100 | penalty=.2, entropy=lambda e: e, printables_rate=lambda pr: pr, transitive=True, examples=__examples2__,
101 | guess=__guess2__)
102 |
103 |
--------------------------------------------------------------------------------
/src/codext/crypto/scytale.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Scytale-N Codec - scytale content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from math import ceil
11 |
12 | from ..__common__ import *
13 |
14 |
15 | __examples__ = {
16 | 'enc(scytale0|scytale--10|scytale01)': None,
17 | 'enc(scytale2|scytale-2|scytale_2)': {'this is a test': "ti satshsi et"},
18 | 'enc(scytale5|scytale-5|scytale_5)': {'this is a test': "tithsei ssat "},
19 | }
20 | __guess__ = ["scytale-%d" % i for i in range(1, 10)]
21 |
22 |
23 | PADDING_CHAR = ""
24 |
25 |
26 | def scytale_encode(l):
27 | def encode(text, errors="strict"):
28 | s, n = "", int(ceil(len(text) / float(l)))
29 | for x in range(l):
30 | for y in range(n):
31 | try:
32 | s += text[y*l+x]
33 | except IndexError:
34 | s += PADDING_CHAR
35 | return s, len(s)
36 | return encode
37 |
38 |
39 | def scytale_decode(l):
40 | def decode(text, errors="strict"):
41 | s, n = "", int(ceil(len(text) / float(l)))
42 | pl = l * n - len(text)
43 | for x in range(n):
44 | for y in range(l):
45 | if y >= l-pl and x == n-1:
46 | continue
47 | s += text[y*n+x-max(0,y-(l-pl))]
48 | s = s.rstrip(PADDING_CHAR)
49 | return s, len(s)
50 | return decode
51 |
52 |
53 | add("scytale", scytale_encode, scytale_decode, r"^scytale[-_]?([1-9]\d*)$")
54 |
55 |
--------------------------------------------------------------------------------
/src/codext/crypto/shift.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Shift Codec - Shift-ordinal-with-N content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(shift0|shift--10|shift256)': None,
15 | 'enc(shift1|shift_1|shift-1)': {'this is a test': "uijt!jt!b!uftu"},
16 | 'enc(shift9|shift_9|shift-9)': {'this is a test': "}qr|)r|)j)}n|}"},
17 | }
18 | __guess__ = ["shift-%d" % i for i in range(1, 256)]
19 |
20 |
21 | def ord_shift_decode(i):
22 | return ord_shift_encode(-int(i))
23 |
24 |
25 | def ord_shift_encode(i):
26 | def encode(text, errors="strict"):
27 | r = "".join(chr((ord(c) + int(i)) % 256) for c in text)
28 | return r, len(r)
29 | return encode
30 |
31 |
32 | add("shift", ord_shift_encode, ord_shift_decode, r"shift[-_]?([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])$",
33 | transitive=True)
34 |
35 |
--------------------------------------------------------------------------------
/src/codext/crypto/xor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """XOR Codec - XOR-with-1-byte content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(xor0|xor--10|xor256|xor300)': None,
15 | 'enc(xor3|xor-3|xor_3)': {'this is a test': "wkjp#jp#b#wfpw"},
16 | 'enc(xor3|xor-3|xor_3)': {'wkjp#jp#b#wfpw': "this is a test"},
17 | 'enc(xor6|xor-6|xor_6)': {'this is a test': "rnou&ou&g&rcur"},
18 | }
19 | __guess__ = ["xor-%d" % i for i in range(1, 256)]
20 |
21 |
22 | def _xorn(text, n=1):
23 | return "".join(chr(ord(c) ^ (n % 256)) for c in text)
24 |
25 |
26 | def xor_byte_encode(i):
27 | def encode(text, errors="strict"):
28 | r = _xorn(ensure_str(text), i)
29 | return r, len(r)
30 | return encode
31 |
32 |
33 | add("xor", xor_byte_encode, xor_byte_encode, r"^xor[-_]?([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])$",
34 | transitive=True)
35 |
36 |
--------------------------------------------------------------------------------
/src/codext/hashing/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .blake import *
3 | from .checksums import *
4 | from .crypt import *
5 | from .md import *
6 | from .sha import *
7 | from .shake import *
8 |
9 |
--------------------------------------------------------------------------------
/src/codext/hashing/blake.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Case Codecs - string hashing with blake.
3 |
4 | These are codecs for hashing strings, for use with other codecs in encoding chains.
5 |
6 | These codecs:
7 | - transform strings from str to str
8 | - transform strings from bytes to bytes
9 | - transform file content from str to bytes (write)
10 | """
11 | from ..__common__ import *
12 |
13 |
14 | def blake_hash(c):
15 | def _hash_transform(l):
16 | l = (l or "64" if c == "b" else "32").lstrip("_-")
17 | def _encode(data, error="strict"):
18 | return getattr(hashlib, "blake2%s" % c)(b(data), digest_size=int(l)).hexdigest(), len(data)
19 | return _encode
20 | return _hash_transform
21 |
22 |
23 | add("blake2b", blake_hash("b"), pattern=r"^blake2b(|[-_](?:[1-9]|[1-5]\d|6[0-4]))$", guess=None)
24 | add("blake2s", blake_hash("s"), pattern=r"^blake2s(|[-_](?:[1-9]|[1-2]\d|3[0-2]))$", guess=None)
25 |
26 |
--------------------------------------------------------------------------------
/src/codext/hashing/crypt.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Case Codecs - string hashing with Unix's Crypt.
3 |
4 | These are codecs for hashing strings, for use with other codecs in encoding chains.
5 |
6 | These codecs:
7 | - transform strings from str to str
8 | - transform strings from bytes to bytes
9 | - transform file content from str to bytes (write)
10 | """
11 | from ..__common__ import add, ensure_str, UNIX
12 |
13 |
14 | if UNIX:
15 | try:
16 | import crypt
17 | except ImportError:
18 | import crypt_r as crypt
19 |
20 | METHODS = [x[7:].lower() for x in crypt.__dict__ if x.startswith("METHOD_")]
21 |
22 | def crypt_hash(method):
23 | method = (method or "").lstrip("-_") or "blowfish"
24 | if method not in METHODS:
25 | raise NotImplementedError("method '%s' is not implemented" % method)
26 | def _encode(input, error="strict"):
27 | m = getattr(crypt, "METHOD_" + method.upper())
28 | return crypt.crypt(ensure_str(input), crypt.mksalt(m)), len(input)
29 | return _encode
30 |
31 | add("crypt", crypt_hash, pattern=r"^crypt(|[-_](?:%s))$" % "|".join(METHODS), guess=None)
32 |
33 |
--------------------------------------------------------------------------------
/src/codext/hashing/md.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Case Codecs - string hashing with Message Digest (MD).
3 |
4 | These are codecs for hashing strings, for use with other codecs in encoding chains.
5 |
6 | These codecs:
7 | - transform strings from str to str
8 | - transform strings from bytes to bytes
9 | - transform file content from str to bytes (write)
10 | """
11 | from ..__common__ import *
12 |
13 |
14 | MD2_TABLE = [41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, 19, 98, 167, 5, 243, 192, 199, 115, 140,
15 | 152, 147, 43, 217, 188, 76, 130, 202, 30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, 138, 23, 229, 18, 190,
16 | 78, 196, 214, 218, 158, 222, 73, 160, 251, 245, 142, 187, 47, 238, 122, 169, 104, 121, 145, 21, 178, 7, 63, 148,
17 | 194, 16, 137, 11, 34, 95, 33, 128, 127, 93, 154, 90, 144, 50, 39, 53, 62, 204, 231, 191, 247, 151, 3, 255, 25, 48,
18 | 179, 72, 165, 181, 209, 215, 94, 146, 42, 172, 86, 170, 198, 79, 184, 56, 210, 150, 164, 125, 182, 118, 252, 107,
19 | 226, 156, 116, 4, 241, 69, 157, 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2, 27, 96, 37, 173,
20 | 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15, 85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186,
21 | 197, 234, 38, 44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, 129, 77, 82, 106, 220, 55, 200, 108, 193,
22 | 171, 250, 36, 225, 123, 8, 12, 189, 177, 74, 120, 136, 149, 139, 227, 99, 232, 109, 233, 203, 213, 254, 59, 0, 29,
23 | 57, 242, 239, 183, 14, 102, 88, 208, 228, 166, 119, 114, 248, 235, 117, 75, 10, 49, 68, 80, 180, 143, 237, 31, 26,
24 | 219, 153, 141, 51, 159, 17, 131, 20]
25 |
26 |
27 | def md2(data):
28 | # see spec in RFC1319
29 | bs, buff, rnd, data = 16, 48, 18, bytearray(b(data))
30 | # first pad the input data
31 | n = bs - len(data) % bs
32 | data += bytearray([n for _ in range(n)])
33 | # then compute the checksum and append it to the data
34 | checksum, prev, l, lt = bytearray(bs), 0, len(data) // bs, len(MD2_TABLE)
35 | for i in range(l):
36 | for j in range(bs):
37 | curr = data[bs * i + j]
38 | checksum[j] ^= MD2_TABLE[curr ^ prev]
39 | prev = checksum[j]
40 | data += checksum
41 | # now compute the digest
42 | digest = bytearray(buff)
43 | for i in range(l + 1):
44 | for j in range(bs):
45 | digest[bs + j] = data[i * bs + j]
46 | digest[2 * bs + j] = digest[bs + j] ^ digest[j]
47 | prev = 0
48 | for j in range(rnd):
49 | for k in range(buff):
50 | digest[k] = prev = digest[k] ^ MD2_TABLE[prev]
51 | prev = (prev + j) % lt
52 | return "".join("{:02x}".format(x) for x in digest[:16])
53 |
54 |
55 | add("md2", lambda s, error="strict": (md2(s), len(s)), guess=None)
56 | add("md5", lambda s, error="strict": (hashlib.new("md5", b(s)).hexdigest(), len(s)), guess=None)
57 | if "md4" in hashlib.algorithms_available:
58 | add("md4", lambda s, error="strict": (hashlib.new("md4", b(s)).hexdigest(), len(s)), guess=None)
59 |
60 |
--------------------------------------------------------------------------------
/src/codext/hashing/sha.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Case Codecs - string hashing with Secure Hash Algorithms.
3 |
4 | These are codecs for hashing strings, for use with other codecs in encoding chains.
5 |
6 | These codecs:
7 | - transform strings from str to str
8 | - transform strings from bytes to bytes
9 | - transform file content from str to bytes (write)
10 | """
11 | from ..__common__ import *
12 |
13 |
14 | add("sha1", lambda s, error="strict": (hashlib.sha1(b(s)).hexdigest(), len(s)), guess=None)
15 | add("sha224", lambda s, error="strict": (hashlib.sha224(b(s)).hexdigest(), len(s)), guess=None)
16 | add("sha256", lambda s, error="strict": (hashlib.sha256(b(s)).hexdigest(), len(s)), guess=None)
17 | add("sha384", lambda s, error="strict": (hashlib.sha384(b(s)).hexdigest(), len(s)), guess=None)
18 | add("sha512", lambda s, error="strict": (hashlib.sha512(b(s)).hexdigest(), len(s)), guess=None)
19 | add("sha3_224", lambda s, error="strict": (hashlib.sha3_224(b(s)).hexdigest(), len(s)), pattern=r"^sha3[-_]224$",
20 | guess=None)
21 | add("sha3_256", lambda s, error="strict": (hashlib.sha3_256(b(s)).hexdigest(), len(s)), pattern=r"^sha3[-_]256$",
22 | guess=None)
23 | add("sha3_384", lambda s, error="strict": (hashlib.sha3_384(b(s)).hexdigest(), len(s)), pattern=r"^sha3[-_]384$",
24 | guess=None)
25 | add("sha3_512", lambda s, error="strict": (hashlib.sha3_512(b(s)).hexdigest(), len(s)), pattern=r"^sha3[-_]512$",
26 | guess=None)
27 |
28 |
--------------------------------------------------------------------------------
/src/codext/hashing/shake.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Case Codecs - string hashing with SHAKE.
3 |
4 | These are codecs for hashing strings, for use with other codecs in encoding chains.
5 |
6 | These codecs:
7 | - transform strings from str to str
8 | - transform strings from bytes to bytes
9 | - transform file content from str to bytes (write)
10 | """
11 | from ..__common__ import *
12 |
13 |
14 | def shake_hash(i):
15 | def _hash_transform(l):
16 | l = (l or str(i)).lstrip("_-")
17 | def _encode(data, error="strict"):
18 | return getattr(hashlib, "shake_%d" % i)(b(data)).hexdigest(int(l)), len(data)
19 | return _encode
20 | return _hash_transform
21 |
22 |
23 | add("shake_128", shake_hash(128), pattern=r"^shake[-_]?128(|[-_][1-9]\d*)$", guess=None)
24 | add("shake_256", shake_hash(256), pattern=r"^shake[-_]?256(|[-_][1-9]\d*)$", guess=None)
25 |
26 |
--------------------------------------------------------------------------------
/src/codext/languages/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .braille import *
3 | from .galactic import *
4 | from .ipsum import *
5 | from .leetspeak import *
6 | from .morse import *
7 | from .navajo import *
8 | from .radio import *
9 | from .southpark import *
10 | from .tap import *
11 | from .tomtom import *
12 |
13 |
--------------------------------------------------------------------------------
/src/codext/languages/braille.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Braille Codec - braille content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(braille)': {'this is a test': "⠞⠓⠊⠎⠀⠊⠎⠀⠁⠀⠞⠑⠎⠞"},
15 | }
16 |
17 |
18 | ENCMAP = {
19 | # digits
20 | '0': '⠴', '1': '⠂', '2': '⠆', '3': '⠒', '4': '⠲', '5': '⠢', '6': '⠖', '7': '⠶', '8': '⠦', '9': '⠔',
21 | # letters
22 | 'a': '⠁', 'b': '⠃', 'c': '⠉', 'd': '⠙', 'e': '⠑', 'f': '⠋', 'g': '⠛', 'h': '⠓', 'i': '⠊', 'j': '⠚', 'k': '⠅',
23 | 'l': '⠇', 'm': '⠍', 'n': '⠝', 'o': '⠕', 'p': '⠏', 'q': '⠟', 'r': '⠗', 's': '⠎', 't': '⠞', 'u': '⠥', 'v': '⠧',
24 | 'w': '⠺', 'x': '⠭', 'y': '⠽', 'z': '⠵',
25 | # punctuation
26 | ' ': '⠀', '!': '⠮', '"': '⠐', '#': '⠼', '$': '⠫', '%': '⠩', '&': '⠯', ':': '⠱', ';': '⠰', '<': '⠣', '=': '⠿',
27 | '>': '⠜', '?': '⠹', '@': '⠈', "'": '⠄', '(': '⠷', ')': '⠾', '*': '⠡', '+': '⠬', ',': '⠠', '-': '⠤', '.': '⠨',
28 | '/': '⠌', '[': '⠪', '\\': '⠳', ']': '⠻', '^': '⠘', '_': '⠸',
29 | }
30 |
31 |
32 | add_map("braille", ENCMAP, ignore_case="encode")
33 |
34 |
--------------------------------------------------------------------------------
/src/codext/languages/galactic.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: UTF-8 -*-
3 | """Galactic Alphabet Codec - Minecraft enchantment language content encoding.
4 |
5 | This codec:
6 | - en/decodes strings from str to str
7 | - en/decodes strings from bytes to bytes
8 | - decodes file content to str (read)
9 | - encodes file content from str to bytes (write)
10 | """
11 | from ..__common__ import *
12 |
13 |
14 | __examples__ = {
15 | 'enc-dec(galactic|minecraft_enchanting_language)': ["test " + MASKS['l']],
16 | 'enc(galactic-alphabet|minecraft)': {'Bad test#': None},
17 | }
18 |
19 |
20 | # source: https://shapecatcher.com
21 | ENCMAP = {
22 | 'a': ["ᒋ", "ᔑ"], 'b': ["⦣", "ゝ", "ʖ"], 'c': ["ì", "ᓵ"], 'd': "↸", 'e': ["ᒷ", "Ŀ"], 'f': ["𝌁", "⎓"],
23 | 'g': ["𐌝", "┤", "⫞", "⊣"], 'h': ["₸", "⍑", "╤"], 'i': "╎", 'j': ["⫶", "⁝", "ⵗ", "⋮"], 'k': "ꖌ", 'l': "ꖎ",
24 | 'm': ["ᒲ", "⟓"], 'n': ["ソ", "リ"], 'o': ["⁊", "フ", "ㇷ", "𝙹"], 'p': ["ⅱ", "ij", "‼", "!"],
25 | 'q': ["ᑑ", "⊐", "コ"], 'r': ["⸬", "∷", "⛚"], 's': ["߆", "𝈿", "ꝇ", "ᓭ"], 't': ["ℸ", "ヿ", "⅂", "Ꞁ"],
26 | 'u': ["⚍", "⍨"], 'v': ["𝍦", "⍊", "╧"], 'w': ["∴", "⸫", "⛬"], 'x': ["ꜘ", "╱", " ̷", "⟋"],
27 | 'y': ["║", "‖", "∥", "ǁ", "𝄁", "|"], 'z': ["ᑎ", "⋂", "∩", "⨅", "⛫"],
28 | ' ': [" ", "⠀"],
29 | }
30 |
31 |
32 | add_map("galactic", ENCMAP, ignore_case="encode", printables_rate=0.,
33 | pattern=r"^(?:galactic(?:[-_]alphabet)?|minecraft(?:[-_](?:enchantment|enchanting[-_]language))?)$")
34 |
35 |
--------------------------------------------------------------------------------
/src/codext/languages/ipsum.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Letters Codec - letter indices-related content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | import random
11 |
12 | from ..__common__ import *
13 |
14 |
15 | __examples__ = {
16 | 'enc-dec(ipsum|lorem-ipsum)': ["This is a test !"],
17 | 'enc(ipsum)': {'Bad test#': None},
18 | }
19 |
20 |
21 | DICT = {
22 | 'a': ['a', 'ac', 'accumsan', 'ad', 'adipiscing', 'aenean', 'aliquam', 'aliquet', 'amet', 'ante', 'aptent', 'arcu',
23 | 'at', 'auctor', 'augue'],
24 | 'b': ['babel', 'bibendum', 'blandit', 'bomba', 'botum', 'buxus'],
25 | 'c': ['class', 'commodo', 'condimentum', 'congue', 'consectetur', 'consequat', 'conubia', 'convallis', 'cras',
26 | 'cubilia', 'curabitur', 'curae', 'cursus'],
27 | 'd': ['dapibus', 'diam', 'dictum', 'dictumst', 'dignissim', 'dis', 'dolor', 'donec', 'dui', 'duis'],
28 | 'e': ['efficitur', 'egestas', 'eget', 'eleifend', 'elementum', 'elit', 'enim', 'erat', 'eros', 'est', 'et', 'etiam',
29 | 'eu', 'euismod', 'ex'],
30 | 'f': ['facilisis', 'fames', 'faucibus', 'felis', 'fermentum', 'feugiat', 'finibus', 'fringilla', 'fusce'],
31 | 'g': ['gadus', 'galliarus', 'ganeo', 'garba', 'gemma', 'gener', 'genuine', 'gestus', 'gramma', 'gravida', 'grex',
32 | 'gusto', 'guttur', 'gyro'],
33 | 'h': ['habitant', 'habitasse', 'hac', 'haicu', 'halo', 'helleborum', 'hendrerit', 'hilarius', 'himenaeos',
34 | 'horreum', 'hydrus', 'hystericus'],
35 | 'i': ['iaculis', 'id', 'imperdiet', 'in', 'inceptos', 'integer', 'interdum', 'ipsum'],
36 | 'j': ['jaccae', 'jacio', 'jecur', 'jocundiatas', 'jovis', 'juctim', 'juger', 'juno', 'jussum', 'justo'],
37 | 'k': ['kal', 'kalatorium', 'kalium', 'kaput', 'kardo', 'kenia', 'koppa', 'kum'],
38 | 'l': ['lacinia', 'lacus', 'laoreet', 'lectus', 'leo', 'libero', 'ligula', 'litora', 'lobortis', 'lorem', 'luctus'],
39 | 'm': ['maecenas', 'magna', 'magnis', 'malesuada', 'massa', 'mattis', 'mauris', 'maximus', 'metus', 'mi', 'molestie',
40 | 'mollis', 'montes', 'morbi', 'mus'],
41 | 'n': ['nam', 'nascetur', 'natoque', 'nec', 'neque', 'netus', 'nibh', 'nisi', 'nisl', 'non', 'nostra', 'nulla',
42 | 'nullam', 'nunc'],
43 | 'o': ['odio', 'orci', 'ornare'],
44 | 'p': ['parturient', 'pellentesque', 'penatibus', 'per', 'pharetra', 'phasellus', 'placerat', 'platea', 'porta',
45 | 'porttitor', 'posuere', 'potenti', 'praesent', 'pretium', 'primis', 'proin', 'pulvinar', 'purus'],
46 | 'q': ['qua', 'quadrum', 'quam', 'quasi', 'quintum', 'quis', 'quisque', 'quo', 'quom', 'quota', 'qur'],
47 | 'r': ['radicitus', 'radius', 'ratio', 'recidivus', 'rectio', 'rhoncus', 'ridiculus', 'risus', 'ros', 'rutrum'],
48 | 's': ['sagittis', 'sapien', 'scelerisque', 'sed', 'sem', 'semper', 'senectus', 'sit', 'sociosqu', 'sodales',
49 | 'sollicitudin', 'suscipit', 'suspendisse'],
50 | 't': ['taciti', 'tellus', 'tempor', 'tempus', 'tincidunt', 'torquent', 'tortor', 'tristique', 'turpis'],
51 | 'u': ['ullamcorper', 'ultrices', 'ultricies', 'urna', 'ut'],
52 | 'v': ['varius', 'vehicula', 'vel', 'velit', 'venenatis', 'vestibulum', 'vitae', 'vivamus', 'volutpat', 'vulputate'],
53 | 'w': ['wadiarus', 'warantus', 'warra', 'werumensium', 'wormicia'],
54 | 'x': ['xandicus', 'xenon', 'xenium', 'xiphias', 'xvir', 'xylon', 'xysticus', 'xystus'],
55 | 'y': ['yata', 'yatum', 'yatus', 'ypra'],
56 | 'z': ['zamia', 'zelosus', 'zerum', 'zonatus', 'zymus'],
57 | }
58 | SCHARS = "0123456789.,:;!?+=-*/\\"
59 |
60 |
61 | def ipsum_encode(text, errors="strict"):
62 | s, strip = "", False
63 | for i, c in enumerate(text):
64 | try:
65 | if c == " " or c in SCHARS:
66 | s += c
67 | strip = False
68 | else:
69 | w = random.choice(DICT[c.lower()])
70 | s += (w.capitalize() if c.isupper() else w) + " "
71 | strip = True
72 | except KeyError:
73 | s += handle_error("ipsum", errors, " ")(c, i)
74 | return s[:-1] if strip else s, len(text)
75 |
76 |
77 | def ipsum_decode(text, errors="strict"):
78 | s = ""
79 | words = text.split(" ")
80 | for i, w in enumerate(words[:-1] if words[-1] == "" else words):
81 | if w.strip() == "":
82 | s += " "
83 | elif w in SCHARS:
84 | s += w
85 | else:
86 | try:
87 | if w.lower().strip(SCHARS) not in DICT[w[0].lower()]:
88 | raise KeyError
89 | s += w[:len(w)-len(w.lstrip(SCHARS))] + w.strip(SCHARS)[0] + w[len(w.rstrip(SCHARS)):len(w)]
90 | except KeyError:
91 | s += handle_error("ipsum", errors, decode=True, item="word")(w, i)
92 | return s, len(text)
93 |
94 |
95 | add("ipsum", ipsum_encode, ipsum_decode, pattern=r"^(?:lorem[-_]?)?ipsum$", printables_rate=1.,
96 | expansion_factor=(6., .5))
97 |
98 |
--------------------------------------------------------------------------------
/src/codext/languages/leetspeak.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Leetspeak Codec - leetspeak content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(leet|1337|leetspeak)': {'this is a test': "7h15 15 4 7357"},
15 | 'dec(leet|1337|leetspeak)': {'7H15 15 4 7357': "THIS IS A TEST"},
16 | }
17 |
18 |
19 | ENCMAP = {k: v for k, v in zip("aabeliostzg", "@4831105729")}
20 |
21 |
22 | add_map("leet", ENCMAP, ignore_case="encode", no_error=True, pattern=r"(?:leet|1337|leetspeak)$", entropy=lambda e: e)
23 |
24 |
--------------------------------------------------------------------------------
/src/codext/languages/morse.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Morse Codec - morse content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(morse|morse/-.)': {'this is a test': "- .... .. ... / .. ... / .- / - . ... -"},
15 | 'enc(morse-/AB)': {'this is a test': "A BBBB BB BBB / BB BBB / BA / A B BBB A"},
16 | 'enc(morse-01)': {'this is a test': "0 1111 11 111 - 11 111 - 10 - 0 1 111 0"},
17 | }
18 | __guess__ = ["morse", "morse/_.", "morse-/01", "morse-01", "morse-/ab", "morse-ab", "morse-/AB", "morse-AB"]
19 |
20 |
21 | ENCMAP = {
22 | # letters
23 | 'a': ".-", 'b': "-...", 'c': "-.-.", 'd': "-..", 'e': ".", 'f': "..-.", 'g': "--.", 'h': "....", 'i': "..",
24 | 'j': ".---", 'k': "-.-", 'l': ".-..", 'm': "--", 'n': "-.", 'o': "---", 'p': ".--.", 'q': "--.-", 'r': ".-.",
25 | 's': "...", 't': "-", 'u': "..-", 'v': "...-", 'w': ".--", 'x': "-..-", 'y': "-.--", 'z': "--..",
26 | # digits
27 | '1': ".----", '2': "..---", '3': "...--", '4': "....-", '5': ".....", '6': "-....", '7': "--...", '8': "---..",
28 | '9': "----.", '0': "-----",
29 | # punctuation
30 | ',': "--..--", '.': ".-.-.-", ':' : "---...", '?': "..--..", '/': "-..-.", '-': "-....-", '=' : "-...-",
31 | '(': "-.--.", ')': "-.--.-", '@' : ".--.-.", '\'': ".----.", '_': "..--.-", '!': "-.-.--", '&': ".-...",
32 | '"': ".-..-.", ';': "-.-.-.", '$': "...-..-",
33 | # word separator
34 | ' ' : "/",
35 | }
36 |
37 |
38 | add_map("morse", ENCMAP, "#", " ", ignore_case="encode", pattern=r"^morse([-_]?.{3})?$", printables_rate=1.,
39 | expansion_factor=(2.8, .6))
40 |
41 |
--------------------------------------------------------------------------------
/src/codext/languages/navajo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Navajo Codec - Navajo code content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {'enc-dec(navajo)': ["this is a test", "THIS\nIS\nA\nTEST"]}
14 |
15 |
16 | # source: https://www.history.navy.mil/research/library/online-reading-room/title-list-alphabetically/n/navajo-code-talker-dictionary.html
17 | ENCMAP = {
18 | 'A': ["WOL-LA-CHEE", "BE-LA-SANA", "TSE-NILL"], 'B': ["NA-HASH-CHID", "SHUSH", "TOISH-JEH"],
19 | 'C': ["MOASI", "TLA-GIN", "BA-GOSHI"], 'D': ["BE", "CHINDI", "LHA-CHA-EH"], 'E': ["AH-JAH", "DZEH", "AH-NAH"],
20 | 'F': ["CHUO", "TSA-E-DONIN-EE", "MA-E"], 'G': ["AH-TAD", "KLIZZIE", "JEHA"], 'H': ["TSE-GAH", "CHA", "LIN"],
21 | 'I': ["TKIN", "YEH-HES", "A-CHI"], 'J': ["TKELE-CHO-G", "AH-YA-TSINNE", "YIL-DOI"],
22 | 'K': ["JAD-HO-LONI", "BA-AH-NE-DI-TININ", "KLIZZIE-YAZZIE"], 'L': ["DIBEH-YAZZIE", "AH-JAD", "NASH-DOIE-TSO"],
23 | 'M': ["TSIN-TLITI", "BE-TAS-TNI", "NA-AS-TSO-SI"], 'N': ["TSAH", "A-CHIN"],
24 | 'O': ["A-KHA", "TLO-CHIN", "NE-AHS-JAH"], 'P': ["CLA-GI-AIH", "BI-SO-DIH", "NE-ZHONI"], 'Q': "CA-YEILTH",
25 | 'R': ["GAH", "DAH-NES-TSA", "AH-LOSZ"], 'S': ["DIBEH", "KLESH"], 'T': ["D-AH", "A-WOH", "THAN-ZIE"],
26 | 'U': ["SHI-DA", "NO-DA-IH"], 'V': "A-KEH-DI-GLINI", 'W': "GLOE-IH", 'X': "AL-NA-AS-DZOH", 'Y': "TSAH-AS-ZIH",
27 | 'Z': "BESH-DO-TLIZ",
28 | ' ': "-", '\n': "\n",
29 | '0': "0", '1': "1", '2': "2", '3': "3", '4': "4", '5': "5", '6': "6", '7': "7", '8': "8", '9': "9",
30 | }
31 |
32 |
33 | add_map("navajo", ENCMAP, ignore_case="both", sep=" ", pattern=r"^navajo$", printables_rate=1.,
34 | expansion_factor=(6.2, .8))
35 |
36 |
--------------------------------------------------------------------------------
/src/codext/languages/radio.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Radio Codec - NATO/Military phonetic alphabet content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(radio|military-alphabet)': {'test': "Tango Echo Sierra Tango"},
15 | 'enc(nato-alphabet|radio-phonetic)': {'string': "Sierra Tango Romeo India November Golf"},
16 | }
17 |
18 |
19 | ENCMAP = {
20 | 'A': "Alpha", 'B': "Bravo", 'C': "Charlie", 'D': "Delta", 'E': "Echo", 'F': "Foxtrot", 'G': "Golf", 'H': "Hotel",
21 | 'I': "India", 'J': "Juliett", 'K': "Kilo", 'L': "Lima", 'M': "Mike", 'N': "November", 'O': "Oscar", 'P': "Papa",
22 | 'Q': "Quebec", 'R': "Romeo", 'S': "Sierra", 'T': "Tango", 'U': "Uniform", 'V': "Victor", 'W': "Whiskey",
23 | 'X': "X-ray", 'Y': "Yankee", 'Z': "Zulu", ' ': "/",
24 | }
25 |
26 |
27 | add_map("radio", ENCMAP, sep=" ", ignore_case="both", printables_rate=1., expansion_factor=(5.5, .3),
28 | pattern=r"^(?:military|nato|radio)(?:(?:[-_]phonetic)?(?:[-_]alphabet)?)?$")
29 |
30 |
--------------------------------------------------------------------------------
/src/codext/languages/southpark.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Southpark Codec - Kenny's language content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples1__ = {
14 | 'enc_dec(kenny|southpark)': ["This is a Test"],
15 | 'enc_dec(kenny_123456|southpark-ABCDEF)': ["This is a Test"],
16 | }
17 | __guess1__ = ["southpark", "southpark-123456", "southpark-abcdef", "southpark-ABCDEF"]
18 | __examples2__ = {
19 | 'enc(southpark-icase|kenny_icase)': {'this is a test': "FMPMFPMFFFMMFFFMFFFMMFFFMMMFFFFMPMPPFMMFMP"},
20 | 'enc(southpark_icase-123)': {'this is a test': "123213211122111211122111222111123233122123"},
21 | }
22 | __guess2__ = ["southpark-icase", "southpark-icase-123", "southpark-icase-abc", "southpark-icase-ABC"]
23 |
24 |
25 | ENCMAP1 = {
26 | 'a': "mmm", 'b': "mmp", 'c': "mmf", 'd': "mpm", 'e': "mpp", 'f': "mpf", 'g': "mfm", 'h': "mfp", 'i': "mff",
27 | 'j': "pmm", 'k': "pmp", 'l': "pmf", 'm': "ppm", 'n': "ppp", 'o': "ppf", 'p': "pfm", 'q': "pfp", 'r': "pff",
28 | 's': "fmm", 't': "fmp", 'u': "fmf", 'v': "fpm", 'w': "fpp", 'x': "fpf", 'y': "ffm", 'z': "ffp",
29 | 'A': "Mmm", 'B': "Mmp", 'C': "Mmf", 'D': "Mpm", 'E': "Mpp", 'F': "Mpf", 'G': "Mfm", 'H': "Mfp", 'I': "Mff",
30 | 'J': "Pmm", 'K': "Pmp", 'L': "Pmf", 'M': "Ppm", 'N': "Ppp", 'O': "Ppf", 'P': "Pfm", 'Q': "Pfp", 'R': "Pff",
31 | 'S': "Fmm", 'T': "Fmp", 'U': "Fmf", 'V': "Fpm", 'W': "Fpp", 'X': "Fpf", 'Y': "Ffm", 'Z': "Ffp",
32 | ' ': ["fff", "Fff"],
33 | }
34 | ENCMAP2 = {
35 | 'a': "MMM", 'b': "MMP", 'c': "MMF", 'd': "MPM", 'e': "MPP", 'f': "MPF", 'g': "MFM", 'h': "MFP", 'i': "MFF",
36 | 'j': "PMM", 'k': "PMP", 'l': "PMF", 'm': "PPM", 'n': "PPP", 'o': "PPF", 'p': "PFM", 'q': "PFP", 'r': "PFF",
37 | 's': "FMM", 't': "FMP", 'u': "FMF", 'v': "FPM", 'w': "FPP", 'x': "FPF", 'y': "FFM", 'z': "FFP", ' ': "FFF",
38 | }
39 |
40 |
41 | add_map("southpark", ENCMAP1, pattern=r"^(?:kenny|southpark)([-_].{6})?$", examples=__examples1__, guess=__guess1__)
42 | add_map("southpark-icase", ENCMAP2, ignore_case="both", pattern=r"^(?:kenny|southpark)[-_]icase([-_].{3})?$",
43 | examples=__examples2__, guess=__guess2__, printables_rate=1., expansion_factor=3.)
44 |
45 |
--------------------------------------------------------------------------------
/src/codext/languages/tap.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Tap code - Tap/knock code encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(tap|knock-code|tap_code)': {'this is a test' : ".... ....⠀.. ...⠀.. ....⠀.... ...⠀ ⠀.. ....⠀.... ...⠀ ⠀. ."
15 | "⠀ ⠀.... ....⠀. .....⠀.... ...⠀.... ...."},
16 | }
17 | __guess__ = ["tap", "tap-inv"]
18 |
19 |
20 | def __build_encmap(a):
21 | d, i = {}, 0
22 | for x in range(1,6):
23 | for y in range(1,6):
24 | d[a[i]] = x * "." + " " + y * "."
25 | i += 1
26 | d['k'], d[' '] = d['c'], " "
27 | return d
28 |
29 |
30 |
31 | ENCMAP = {
32 | '': __build_encmap("abcdefghijlmnopqrstuvwxyz"),
33 | 'inv': __build_encmap("abcdefghijlmnopqrstuvwxyz"[::-1]),
34 | }
35 |
36 |
37 | add_map("tap", ENCMAP, ignore_case="both", sep="⠀", pattern=r"^(?:tap|knock)(?:[-_]code)?(|inv)$")
38 |
39 |
--------------------------------------------------------------------------------
/src/codext/languages/tomtom.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Tom-Tom Codec - tom-tom content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc': {
15 | 'this is a test': "\\\\/\\ /\\\\ /\\\\\\ \\/\\ | /\\\\\\ \\/\\ | / | \\\\/\\ /\\ \\/\\ \\\\/\\"
16 | }
17 | }
18 | __guess__ = ["tom-tom", "tom-tom/_.", "tom-tom-/01", "tom-tom-01", "tom-tom-/ab", "tom-tom-ab", "tom-tom-/AB",
19 | "tom-tom-AB"]
20 |
21 |
22 | ENCMAP = {
23 | # letters
24 | 'A': "/", 'B': "//", 'C': "///", 'D': "////", 'E': "/\\", 'F': "//\\", 'G': "///\\", 'H': "/\\\\", 'I': "/\\\\\\",
25 | 'J': "\\/", 'K': "\\\\/", 'L': "\\\\\\/", 'M': "\\//", 'N': "\\///", 'O': "/\\/", 'P': "//\\/", 'Q': "/\\\\/",
26 | 'R': "/\\//", 'S': "\\/\\", 'T': "\\\\/\\", 'U': "\\//\\", 'V': "\\/\\\\", 'W': "//\\\\", 'X': "\\\\//",
27 | 'Y': "\\/\\/", 'Z': "/\\/\\",
28 | # word separator
29 | ' ' : "|",
30 | }
31 |
32 |
33 | add_map("tom-tom", ENCMAP, ".", " ", ignore_case="both", pattern=r"^tom-?tom([-_]?.{3})?$", printables_rate=1.,
34 | expansion_factor=(3.8, .2))
35 |
36 |
--------------------------------------------------------------------------------
/src/codext/macros.json:
--------------------------------------------------------------------------------
1 | {
2 | "example-macro": [
3 | "gzip",
4 | "base62",
5 | "gzip",
6 | "base63",
7 | "gzip",
8 | "base64"
9 | ]
10 | }
11 |
--------------------------------------------------------------------------------
/src/codext/others/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .dna import *
3 | from .kbshift import *
4 | from .letters import *
5 | from .markdown import *
6 | from .uuencode import *
7 |
8 |
--------------------------------------------------------------------------------
/src/codext/others/dna.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """DNA Codec - dna content encoding.
3 |
4 | This implements the 8 methods of ATGC nucleotides following the rule of complementary pairing, according the literature4
5 | about coding and computing of DNA sequences.
6 |
7 | This codec:
8 | - en/decodes strings from str to str
9 | - en/decodes strings from bytes to bytes
10 | - decodes file content to str (read)
11 | - encodes file content from str to bytes (write)
12 | """
13 | from ..__common__ import *
14 |
15 |
16 | __examples__ = {
17 | 'enc(dna0|dna9)': None,
18 | 'enc(dna1)': {'this is a test': "GTGAGCCAGCCGGTATACAAGCCGGTATACAAGCAGACAAGTGAGCGGGTATGTGA"},
19 | 'enc(dna-2)': {'this is a test': "CTCACGGACGGCCTATAGAACGGCCTATAGAACGACAGAACTCACGCCCTATCTCA"},
20 | 'enc(dna_3)': {'this is a test': "ACAGATTGATTAACGCGTGGATTAACGCGTGGATGAGTGGACAGATAAACGCACAG"},
21 | 'enc(dna4)': {'this is a test': "AGACATTCATTAAGCGCTCCATTAAGCGCTCCATCACTCCAGACATAAAGCGAGAC"},
22 | 'enc(dna-5)': {'this is a test': "TCTGTAAGTAATTCGCGAGGTAATTCGCGAGGTAGTGAGGTCTGTATTTCGCTCTG"},
23 | 'enc(dna_6)': {'this is a test': "TGTCTAACTAATTGCGCACCTAATTGCGCACCTACTCACCTGTCTATTTGCGTGTC"},
24 | 'enc(dna7)': {'this is a test': "GAGTGCCTGCCGGATATCTTGCCGGATATCTTGCTGTCTTGAGTGCGGGATAGAGT"},
25 | 'enc(dna-8)': {'this is a test': "CACTCGGTCGGCCATATGTTCGGCCATATGTTCGTCTGTTCACTCGCCCATACACT"},
26 | }
27 | __guess__ = ["dna%d" % i for i in range(1, 9)]
28 |
29 |
30 | SEQUENCES = {
31 | '00': "AAGCGCTT",
32 | '11': "TTCGCGAA",
33 | '01': "GCAATTGC",
34 | '10': "CGTTAACG",
35 | }
36 | ENCMAP = []
37 | for i in range(8):
38 | ENCMAP.append({k: v[i] for k, v in SEQUENCES.items()})
39 |
40 |
41 | add_map("dna", ENCMAP, intype="bin", pattern=r"dna[-_]?([1-8])$", entropy=2., printables_rate=1., expansion_factor=4.)
42 |
43 |
--------------------------------------------------------------------------------
/src/codext/others/kbshift.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Keyboard-Shift Codec - keyboard line shifting content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | LAYOUTS = {
14 | 'ansi': "~!@#$%^&*()_+\n`1234567890-=\nqwertyuiop{}|\n[]\\\nasdfghjkl:\"\n;'\nzxcvbnm<>\n,./",
15 | 'azerty': "azertyuiop\nqsdfghjklm\nwxcvbn",
16 | 'azerty-be': "³1234567890°_\n²&é\"'(§è!çà)-\n|@#^{}\nazertyuiop$\n€[]\n¨*\nqsdfghjklm%£\nùµ\n´`\n>wxcvbn?./+\n<,;:=\n\\~",
17 | 'azerty-fr': "1234567890°+\n²&é\"'(-è_çà)=\n~#{[|`\\^@]}\nazertyuiop¨£\nqsdfghjklm%µ\nù*\n>wxcvbn?./§\n<,;:!",
18 | 'dvorak': "~!@#$%^&*(){}\n`1234567890[]\n\"<>pyfgcrl?+|\n',./=\\\naoeuidhtns_\n-\n:qjkxbmwvz\n;",
19 | 'qwerty': "qwertyuiop\nasdfghjkl\nzxcvbnm",
20 | 'qwerty-us': "~!@#$%^&*()_+\n`1234567890-=\nqwertyuiop{}|\n[]\\\nasdfghjkl:\"\n;,\nzxcvbnm<>?\n./",
21 | }
22 | __per_len = {}
23 | for k, s in LAYOUTS.items():
24 | i = max(map(len, s.split("\n")))
25 | __per_len.setdefault(i, [])
26 | __per_len[i].append(k)
27 |
28 |
29 | __examples__ = {"enc-dec(kbshift_%s_%d)" % (kb, n): ["@irandom{256,512}"] for n in range(10) for kb in LAYOUTS.keys()}
30 | __guess__ = []
31 | for mlen, kbs in __per_len.items():
32 | for k in kbs:
33 | __guess__.extend(["kbshift-%s-%d" % (k, i+1) for i in range(mlen)])
34 |
35 |
36 | def _kbshift(text, keyboard="azerty", n=1, decode=False):
37 | r = ""
38 | for c in text:
39 | nc = None
40 | for l in LAYOUTS[keyboard].splitlines():
41 | if c.lower() in l:
42 | nc = l[(l.index(c.lower()) + [-1, 1][decode] * n) % len(l)]
43 | break
44 | r += c if nc is None else nc
45 | return r
46 |
47 |
48 | def kbshift_encode(scheme):
49 | kb, shift = re.match(r"^(.*?)[-_]?(\d+)$", scheme or "azerty-1").groups()
50 | def encode(text, errors="strict"):
51 | r = _kbshift(ensure_str(text), kb, int(shift))
52 | return r, len(r)
53 | return encode
54 |
55 |
56 | def kbshift_decode(scheme):
57 | kb, shift = re.match(r"^(.*?)[-_]?(\d+)$", scheme or "azerty-1").groups()
58 | def decode(text, errors="strict"):
59 | r = _kbshift(ensure_str(text), kb, int(shift), True)
60 | return r, len(r)
61 | return decode
62 |
63 |
64 | add("kbshift", kbshift_encode, kbshift_decode, entropy=lambda e: e,printables_rate=lambda pr: pr, transitive=True,
65 | pattern=r"^kbshift(?:|[-_]((?:az|qw)erty[-_]?[1-9]|(?:ansi|azerty-(?:be|fr)|dvorak|qwerty-us)[-_]?(?:[1-9]|1[0-2])))$")
66 |
67 |
--------------------------------------------------------------------------------
/src/codext/others/letters.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Letters Codec - letter indices-related content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from string import ascii_uppercase
11 |
12 | from ..__common__ import *
13 |
14 |
15 | __examples__ = {
16 | 'enc(consonant-index|consonants_indices)': {
17 | 'This is a test': "166I15I15A16E1516",
18 | '\x00': None,
19 | '\xff': None,
20 | },
21 | 'dec(consonant-index|consonants_indices)': {
22 | '166I15I15A16E1516': "THISISATEST",
23 | '\x00': None,
24 | '\xff': None,
25 | },
26 | 'enc(vowel-index|vowels_indices)': {'This is a test': "TH3S3S1T2ST"},
27 | 'dec(vowel-index|vowels_indices)': {'TH3S3S1T2ST': "THISISATEST"},
28 | 'enc(consonant-vowel_indices)': {'This is a test': "C16C6V3C15V3C15V1C16V2C15C16"},
29 | 'dec(consonants_vowels-index)': {'C16C6V3C15V3C15V1C16V2C15C16': "THISISATEST"},
30 | }
31 | __guess__ = ["consonant-index", "vowel-index", "consonants_vowels-index"]
32 |
33 |
34 | VOWELS = "AEIOUY"
35 |
36 |
37 | def __get_encmap(letters):
38 | if re.match(r"^consonants?$", letters):
39 | encmap = {c: str(i+1) for i, c in enumerate(sorted(set(ascii_uppercase) - set(VOWELS)))}
40 | for c in VOWELS:
41 | encmap[c] = c
42 | elif re.match(r"^vowels?$", letters):
43 | encmap = {c: c for c in ascii_uppercase}
44 | for i, c in enumerate(VOWELS):
45 | encmap[c] = str(i+1)
46 | elif re.match(r"^consonants?[-_]vowels?$", letters):
47 | encmap = {c: "C" + str(i+1) for i, c in enumerate(sorted(set(ascii_uppercase) - set(VOWELS)))}
48 | for i, c in enumerate(VOWELS):
49 | encmap[c] = "V" + str(i+1)
50 | for c in " ":
51 | encmap[c] = ""
52 | return encmap
53 |
54 |
55 | def letters_encode(letters):
56 | encmap = __get_encmap(letters)
57 | def encode(text, errors="strict"):
58 | s = ""
59 | for i, c in enumerate(text.upper()):
60 | try:
61 | s += encmap[c]
62 | except KeyError:
63 | s += handle_error("letter-indices", errors)(c, i)
64 | return "".join(encmap.get(c.upper(), c) for c in text), len(text)
65 | return encode
66 |
67 |
68 | def letters_decode(letters):
69 | decmap = {v: k for k, v in __get_encmap(letters).items()}
70 | maxlen = max(len(x) for x in decmap.keys())
71 | def decode(text, errors="strict"):
72 | s, i = "", 0
73 | while i < len(text):
74 | err = True
75 | for j in range(maxlen, 0, -1):
76 | try:
77 | s += decmap[text[i:i+j]]
78 | i += j
79 | err = False
80 | break
81 | except (IndexError, KeyError):
82 | pass
83 | if err:
84 | s += handle_error("letter-indices", errors, decode=True)(text[i], i)
85 | return s, len(text)
86 | return decode
87 |
88 |
89 | add("letter-indices", letters_encode, letters_decode, printables_rate=1., expansion_factor=None,
90 | pattern=r"^(consonants?|vowels?|consonants?[-_]vowels?)[-_]ind(?:ex|ices)$")
91 |
92 |
--------------------------------------------------------------------------------
/src/codext/others/markdown.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Markdown Codec - markdown content conversion to HTML.
3 |
4 | This codec:
5 | - encodes strings from str to str
6 | - encodes strings from bytes to bytes
7 | - encodes file content from str to bytes (write)
8 | """
9 | from ..__common__ import *
10 |
11 |
12 | __guess__ = []
13 |
14 |
15 | try:
16 | from markdown2 import markdown as md2html
17 | # note: the group is NOT captured so that the pattern is only used to match the name of the codec and not to
18 | # dynamically bind to a parametrizable encode function
19 | add("markdown", lambda md, error="strict": (md2html(md), len(md)), pattern=r"^(?:markdown|Markdown|md)$")
20 | except ImportError:
21 | pass
22 |
23 |
--------------------------------------------------------------------------------
/src/codext/others/uuencode.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """UU Codec - UU content encoding, relying on the native uu package.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from binascii import a2b_uu as _dec, b2a_uu as _enc
11 |
12 | from ..__common__ import *
13 |
14 |
15 | __examples__ = {
16 | 'enc(uu|uu_codec)': {'this is a test': "begin 666 -\n.=&AI 0 and lines[-1].strip(b" \t\r\n\f") in [b"", b"`"]:
47 | lines = lines[:-1]
48 | r = b""
49 | for l in lines:
50 | r += _dec(l.strip(b" \t\r\n\f"))
51 | return r, len(text)
52 |
53 |
54 | add("uu", uu_encode, uu_decode, pattern=r"^uu(?:[-_]?encode|[-_]codec)?$",
55 | bonus_func=lambda o, *a: re.match(b"^begin [1-7]{3} .*\n.*\nend$", b(o.text).strip(b"\n"), re.M))
56 |
57 |
--------------------------------------------------------------------------------
/src/codext/stegano/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .hexagram import *
3 | from .klopf import *
4 | from .resistor import *
5 | from .rick import *
6 | from .sms import *
7 | from .whitespace import *
8 |
9 |
--------------------------------------------------------------------------------
/src/codext/stegano/hexagram.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Hexagram Codec - hexagram content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(hexagram|iching|i-ching-hexagrams)': {'this is a test': "䷰䷭䷚䷔䷞䷺䷗䷔䷞䷺䷗䷚䷏䷊䷂䷕䷞䷈䷇☯"},
15 | }
16 |
17 | ENCMAP = {c1: c2 for c1, c2 in zip("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
18 | "䷁䷗䷆䷒䷎䷣䷭䷊䷏䷲䷧䷵䷽䷶䷟䷡䷇䷂䷜䷻䷦䷾䷯䷄䷬䷐䷮䷹䷞䷰䷛䷪䷖䷚䷃䷨䷳䷕"
19 | "䷑䷙䷢䷔䷿䷥䷷䷝䷱䷍䷓䷩䷺䷼䷴䷤䷸䷈䷋䷘䷅䷉䷠䷌䷫䷀☯")}
20 | DECMAP = {c2: c1 for c1, c2 in ENCMAP.items()}
21 |
22 |
23 | def hexagram_encode(input, errors="strict"):
24 | return "".join(ENCMAP[c] for c in codecs.encode(input, "base64")), len(input)
25 |
26 |
27 | def hexagram_decode(input, errors="strict"):
28 | r, ehandler = "", handle_error("hexagram", errors, decode=True)
29 | for i, c in enumerate(input):
30 | try:
31 | r += DECMAP[c]
32 | except KeyError:
33 | r += ehandler(c, i, r)
34 | return codecs.decode(r, "base64"), len(input)
35 |
36 |
37 | add("hexagram", hexagram_encode, hexagram_decode, printables_rate=0.,
38 | pattern=r"^(?:(?:i-ching-)?hexagrams?|i-?ching)$")
39 |
40 |
--------------------------------------------------------------------------------
/src/codext/stegano/klopf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Klopf Codec - Polybius-based content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(klopf|klopfcode)': {'this is a test': "44324234 4234 11 44513444"},
15 | }
16 |
17 |
18 | ENCMAP = {"ABCDEFGHIKLMNOPQRSTUVWXYZ"[y*5+x]: "".join([str(x+1), str(y+1)]) for x in range(5) for y in range(5)}
19 | ENCMAP['J'] = "43"
20 | ENCMAP[' '] = " "
21 |
22 |
23 | add_map("klopf", ENCMAP, ignore_case="both", pattern=r"^(?:klopf(?:code)?)$", printables_rate=1.,
24 | expansion_factor=(1.85, .15))
25 |
26 |
--------------------------------------------------------------------------------
/src/codext/stegano/resistor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Resistor Codec - resistor color codes content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(resistor|resistor_color|condensator_color_code|condensators-color-code)': {
15 | 'Test': "\x1b[48;5;232m \x1b[0;00m\x1b[48;5;245m \x1b[0;00m\x1b[48;5;11m \x1b[0;00m\x1b[48;5;130m "
16 | "\x1b[0;00m\x1b[48;5;232m \x1b[0;00m\x1b[48;5;130m \x1b[0;00m\x1b[48;5;130m \x1b[0;00m\x1b[48;5;130m "
17 | "\x1b[0;00m\x1b[48;5;2m \x1b[0;00m\x1b[48;5;130m \x1b[0;00m\x1b[48;5;130m \x1b[0;00m\x1b[48;5;4m "
18 | "\x1b[0;00m"
19 | },
20 | }
21 |
22 |
23 | ENCMAP = {i: "\033[48;5;%dm \033[0;00m" % c for i, c in zip("0123456789", [232, 130, 1, 214, 11, 2, 4, 129, 245, 231])}
24 |
25 |
26 | add_map("resistor", ENCMAP, intype="ord", pattern=r"^(?:condensator|resistor)s?(?:[-_]color(?:[-_]code)?)?$",
27 | entropy=3.4, printables_rate=.3333333333333333, expansion_factor=(56., 2.))
28 |
29 |
--------------------------------------------------------------------------------
/src/codext/stegano/rick.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Rick Astley Codec - Rick Astley's song content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(rick|rick-astley)': {'this is a test': "TELL LET You gonna + You gonna + NEVER + TELL UP gonna TELL"},
15 | }
16 |
17 |
18 | # inspired from: https://github.com/moongazer07/rick-cipher
19 | ENCMAP = {
20 | 'A': "NEVER", 'B': "GONNA", 'C': "GIVE", 'D': "YOU", 'E': "UP", 'F': "Never", 'G': "Gonna", 'H': "LET", 'I': "You",
21 | 'J': "DOWN", 'K': "NEver", 'L': "GOnna", 'M': "TURN", 'N': "AROUND", 'O': "AND", 'P': ["DESERT", "DESSERT"],
22 | 'Q': "YOu", 'R': "NEVer", 'S': "gonna", 'T': "TELL", 'U': "A", 'V': "LIE", 'W': "and", 'X': "HURT", 'Y': "you",
23 | 'Z': "rick", ' ': "+", '.': ".", '\n': "\n",
24 | '0': "0", '1': "1", '2': "2", '3': "3", '4': "4", '5': "5", '6': "6", '7': "7", '8': "8", '9': "9",
25 | '': "astley", # silent this token for decoding ("rick astley" causes an issue with the separator " ")
26 | }
27 |
28 |
29 | add_map("rick", ENCMAP, "?", " ", ignore_case="encode", pattern=r"^rick(?:[-_]astley)?(?:[-_]cipher)?$",
30 | printables_rate=1.)
31 |
32 |
--------------------------------------------------------------------------------
/src/codext/stegano/sms.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """SMS Codec - phone keystrokes content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(sms|nokia3310|nokia-3310|nokia_3310|t9)': {'this is a test': "8-44-444-7777-0-444-7777-0-2-0-8-33-7777-8"},
15 | }
16 |
17 |
18 | ENCMAP = {
19 | ' ': "0", 'a': "2", 'b': "22", 'c': "222", 'd': "3", 'e': "33", 'f': "333", 'g': "4", 'h': "44", 'i': "444",
20 | 'j': "5", 'k': "55", 'l': "555", 'm': "6", 'n': "66", 'o': "666", 'p': "7", 'q': "77", 'r': "777", 's': "7777",
21 | 't': "8", 'u': "88", 'v': "888", 'w': "9", 'x': "99", 'y': "999", 'z': "9999", '*': "*", '#': "#",
22 | }
23 |
24 |
25 | add_map("sms", ENCMAP, "?", "-_", ignore_case="encode", pattern=r"^(?:nokia(?:[-_]?3310)?|sms|t9)$", printables_rate=1.,
26 | expansion_factor=(2.9, .2))
27 |
28 |
--------------------------------------------------------------------------------
/src/codext/stegano/whitespace.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """Whitespace Codec - whitespace/tabs content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | import random
11 | import re
12 | from string import printable
13 |
14 | from ..__common__ import *
15 |
16 |
17 | __examples1__ = {
18 | 'enc(whitespace|whitespaces)': {'test': "\t \t \t\t\t \t\t \t \t \t\t \t \t \t\t"},
19 | 'enc(whitespace-inv|whitespace_inverted)': {'test': " \t\t\t \t \t\t \t \t \t\t\t \t\t \t\t\t \t "},
20 | }
21 | __guess1__ = ["whitespace", "whitespace-inv"]
22 | __guess2__ = ["whitespace+after-before", "whitespace-after+before"]
23 |
24 |
25 | ENCMAP = {r'': {'0': "\t", '1': " "}, r'[-_]inv(erted)?': {'0': " ", '1': "\t"}}
26 | add_map("whitespace", ENCMAP, intype="bin", pattern=r"^whitespaces?([-_]inv(?:erted)?)?$", examples=__examples1__,
27 | guess=__guess1__, entropy=1., printables_rate=1., expansion_factor=8.)
28 |
29 |
30 | def wsba_encode(p):
31 | eq = "ord(c)" + p
32 | def encode(text, errors="strict"):
33 | r = []
34 | for i, c in enumerate(text):
35 | if ord(c) < min(ord(c) for c in printable[:-6]):
36 | r.append(handle_error("whitespace" + p, errors, repl_char="\x00")(c, i))
37 | continue
38 | enc = "\x00"
39 | offset = random.randint(-10,10)
40 | while enc not in printable[:-6]:
41 | after = random.randint(0, 20)
42 | before = random.randint(0, 20)
43 | enc = chr(eval(eq) % 256)
44 | r.append(" " * before + enc + " " * after)
45 | s = "\n".join(r)
46 | return s, len(s)
47 | return encode
48 |
49 |
50 | def wsba_decode(p):
51 | eq = "ord(c)" + "".join({'-':"+",'+':"-"}.get(c, c) for c in p)
52 | def decode(text, errors="strict"):
53 | s = ""
54 | for i, l in enumerate(text.split("\n")):
55 | ll = len(l.strip())
56 | if ll == 0:
57 | continue
58 | if ll > 1:
59 | s += handle_error("whitespace_after_before", errors, decode=True, item="line")(l, i)
60 | after = len(l) - len(l.rstrip(" "))
61 | before = len(l) - len(l.lstrip(" "))
62 | c = l[before]
63 | s += chr(eval(eq))
64 | return s, len(text)
65 | return decode
66 |
67 |
68 | op = r"[+-](?:\d+(?:\.\d+)?[*/])?"
69 | add("whitespace_after_before", wsba_encode, wsba_decode, guess=__guess2__, entropy=1., printables_rate=1., penalty=.1,
70 | expansion_factor=(22., 3.), pattern=r"whitespace("+op+r"before"+op+r"after|"+op+r"after"+op+r"before)$")
71 |
72 |
--------------------------------------------------------------------------------
/src/codext/web/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from .html import *
3 | from .url import *
4 |
5 |
--------------------------------------------------------------------------------
/src/codext/web/url.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """URL Codec - urlencode content encoding.
3 |
4 | This codec:
5 | - en/decodes strings from str to str
6 | - en/decodes strings from bytes to bytes
7 | - decodes file content to str (read)
8 | - encodes file content from str to bytes (write)
9 | """
10 | from ..__common__ import *
11 |
12 |
13 | __examples__ = {
14 | 'enc(url|urlencode)': {'?=this/is-a_test/../': "%3F%3Dthis%2Fis-a_test%2F%2E%2E%2F"},
15 | 'dec(url|urlencode)': {'test/test%2etxt': "test/test.txt", 'test%2ftest.txt': "test/test.txt"}
16 | }
17 |
18 |
19 | SAFE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-"
20 | ENCMAP = {}
21 | for i in range(256):
22 | c = chr(i)
23 | if c not in SAFE:
24 | ENCMAP[c] = "%{:02X}".format(i)
25 |
26 |
27 | add_map("url", ENCMAP, ignore_case="decode", no_error=True, pattern=r"^url(?:encode)?$", printables_rate=1.,
28 | expansion_factor=(1.2, .2))
29 |
30 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dhondta/python-codext/9811df6922b7abdb2252289c104ff09a508b3fbb/tests/__init__.py
--------------------------------------------------------------------------------