├── tests
    ├── __init__.py
    └── tests.py
├── .flake8
├── compressed_rtf
    ├── version.py
    ├── __init__.py
    ├── crc32.py
    └── compressed_rtf.py
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    ├── ISSUE_TEMPLATE
    │   ├── change.md
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   └── feature_branch_build.yml
├── .pylintrc
├── .gitignore
├── pyproject.toml
├── LICENSE
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | 
4 | 


--------------------------------------------------------------------------------
/compressed_rtf/version.py:
--------------------------------------------------------------------------------
1 | """Package version"""
2 | 
3 | __version__ = '1.0.8'
4 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | 
 3 | Provide at least some details on the changes.
 4 | 
 5 | 
 6 | # Checklist
 7 | 
 8 | - [ ] Your code works
 9 | - [ ] The changes are cool
10 | - [ ] The version is bumped
11 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [DESIGN]
 2 | 
 3 | # Maximum number of branch for function / method body.
 4 | max-branches=20
 5 | 
 6 | # Maximum number of locals for function / method body
 7 | max-locals=25
 8 | 
 9 | # Maximum number of statements in function / method body.
10 | max-statements=55
11 | 
12 | 
13 | [REFACTORING]
14 | 
15 | # Maximum number of nested blocks for function / method body
16 | max-nested-blocks=7
17 | 


--------------------------------------------------------------------------------
/compressed_rtf/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Compressed Rich Text Format (RTF) compression and decompression package
 5 | 
 6 | Based on Rich Text Format (RTF) Compression Algorithm
 7 | https://msdn.microsoft.com/en-us/library/cc463890(v=exchg.80).aspx
 8 | """
 9 | 
10 | from .compressed_rtf import compress, decompress
11 | from .version import __version__
12 | 
13 | __all__ = ['compress', 'decompress', '__version__']
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/change.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Change Request
 3 | about: Suggest a positive change for this project
 4 | title: '[Change] '
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: '[Bug] '
 5 | labels: 'bug'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior, e.g.:
15 | 1. The exact command(s) you ran.
16 | 2. Input data.
17 | 3. Output and/or error messages.
18 | 4. The environment (Python version, OS, library version, etc).
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Additional context**
24 | Add any other context about the problem here.
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: '[Feature] '
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools >= 77.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "compressed-rtf"
 7 | dynamic = ["version"]
 8 | description = "Compressed Rich Text Format (RTF) compression and decompression package"
 9 | readme = "README.md"
10 | license = "MIT"
11 | license-files = ["LICENSE*"]
12 | authors = [
13 |     { name = "Dmitry Alimov" },
14 | ]
15 | keywords = [
16 |     "compressed-rtf",
17 |     "lzfu",
18 |     "mela",
19 |     "rtf",
20 | ]
21 | classifiers = [
22 |     "Development Status :: 5 - Production/Stable",
23 |     "Environment :: Console",
24 |     "Environment :: Win32 (MS Windows)",
25 |     "Operating System :: Microsoft :: Windows",
26 |     "Operating System :: POSIX :: Linux",
27 |     "Programming Language :: Python :: 2.7",
28 |     "Programming Language :: Python :: 3",
29 |     "Topic :: System :: Archiving :: Compression",
30 |     "Topic :: Text Processing",
31 | ]
32 | 
33 | [project.urls]
34 | Homepage = "https://github.com/delimitry/compressed_rtf"
35 | 
36 | [tool.setuptools.dynamic]
37 | version = { attr = "compressed_rtf.version.__version__" }
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Dmitry Alimov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # compressed_rtf
 2 | 
 3 | [![PyPI](https://img.shields.io/pypi/v/compressed_rtf)](https://pypi.org/project/compressed-rtf/)
 4 | [![MIT license](http://img.shields.io/badge/license-MIT-brightgreen.svg)](https://github.com/delimitry/compressed_rtf/blob/master/LICENSE)
 5 | 
 6 | Compressed Rich Text Format (RTF) compression worker in Python
 7 | 
 8 | Description:
 9 | ------------
10 | 
11 | Compressed RTF also known as "LZFu" compression format
12 | 
13 | Based on Rich Text Format (RTF) Compression Algorithm:
14 | 
15 | https://msdn.microsoft.com/en-us/library/cc463890(v=exchg.80).aspx
16 | 
17 | 
18 | Usage example:
19 | --------------
20 | 
21 | ```python
22 | >>> from compressed_rtf import compress, decompress
23 | >>>
24 | >>> data = '{\\rtf1\\ansi\\ansicpg1252\\pard test}'
25 | >>> comp = compress(data, compressed=True)  # compressed
26 | >>> comp
27 | '#\x00\x00\x00"\x00\x00\x00LZFu3\\\xe8t\x03\x00\n\x00rcpg125\x922\n\xf3 t\x07\x90t}\x0f\x10'
28 | >>>
29 | >>> raw = compress(data, compressed=False)  # raw/uncompressed
30 | >>> raw
31 | '.\x00\x00\x00"\x00\x00\x00MELA \xdf\x12\xce{\\rtf1\\ansi\\ansicpg1252\\pard test}'
32 | >>>
33 | >>> decompress(comp)
34 | '{\\rtf1\\ansi\\ansicpg1252\\pard test}'
35 | >>>
36 | >>> decompress(raw)
37 | '{\\rtf1\\ansi\\ansicpg1252\\pard test}'
38 | >>>
39 | ```
40 | 
41 | License:
42 | --------
43 | Released under [The MIT License](https://github.com/delimitry/compressed_rtf/blob/master/LICENSE).
44 | 


--------------------------------------------------------------------------------
/tests/tests.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf8 -*-
 2 | 
 3 | import unittest
 4 | from compressed_rtf.compressed_rtf import compress, decompress
 5 | from compressed_rtf.crc32 import crc32
 6 | 
 7 | 
 8 | class Test(unittest.TestCase):
 9 |     """
10 |     Test RTF compression and decompression
11 |     """
12 | 
13 |     def test_decompress(self):
14 |         """
15 |         Test decompression
16 |         """
17 |         data = b'-\x00\x00\x00+\x00\x00\x00LZFu\xf1\xc5\xc7\xa7\x03\x00\n\x00' \
18 |             b'rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80\x0f\xa0'
19 |         self.assertEqual(
20 |             decompress(data),
21 |             b'{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n')
22 |         # test raw decompression
23 |         data = b'.\x00\x00\x00"\x00\x00\x00MELA\x00\x00\x00\x00{\\rtf1\\ansi\\an' \
24 |             b'sicpg1252\\pard test}'
25 |         self.assertEqual(
26 |             decompress(data),
27 |             b'{\\rtf1\\ansi\\ansicpg1252\\pard test}')
28 |         # test < 16 bytes long data exception
29 |         with self.assertRaises(Exception):
30 |             decompress(b'')
31 |         with self.assertRaises(Exception):
32 |             decompress(b'0123456789abcde')
33 |         # test unknown compression type exception
34 |         with self.assertRaises(Exception):
35 |             decompress(b'\x10\x00\x00\x00\x11\x00\x00\x00ABCD\xff\xff\xff\xff')
36 |         # test invalid CRC exception
37 |         with self.assertRaises(Exception):
38 |             decompress(b'\x10\x00\x00\x00\x11\x00\x00\x00LZFu\xff\xff\xff\xff')
39 | 
40 |     def test_crc32(self):
41 |         """
42 |         Test CRC32 computation
43 |         """
44 |         data = b'\x03\x00\n\x00rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80' \
45 |             b'\x0f\xa0'
46 |         self.assertEqual(crc32(data), 0xa7c7c5f1)
47 |         # test empty crc32
48 |         self.assertEqual(crc32(b''), 0x00000000)
49 | 
50 |     def test_compression(self):
51 |         """
52 |         Test compression types compressed and uncompressed
53 |         """
54 |         data = b'{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n'
55 |         self.assertEqual(
56 |             compress(data, compressed=True),
57 |             b'-\x00\x00\x00+\x00\x00\x00LZFu\xf1\xc5\xc7\xa7\x03\x00\n\x00'
58 |             b'rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80\x0f\xa0')
59 |         # test uncompressed
60 |         self.assertEqual(
61 |             compress(data, compressed=False),
62 |             b'7\x00\x00\x00+\x00\x00\x00MELA\x00\x00\x00\x00{\\rtf1\\ansi\\ansicpg'
63 |             b'1252\\pard hello world}\r\n')
64 | 
65 |     def test_compression_repeated_tokens(self):
66 |         """
67 |         Test compression of data with repeated tokens, crossing write position
68 |         """
69 |         data = b'{\\rtf1 WXYZWXYZWXYZWXYZWXYZ}'
70 |         self.assertEqual(
71 |             compress(data),
72 |             b'\x1a\x00\x00\x00\x1c\x00\x00\x00LZFu\xe2\xd4KQA\x00\x04 WXYZ\r'
73 |             b'n}\x01\x0e\xb0')
74 | 
75 |     def test_hither_and_thither(self):
76 |         """
77 |         Test decompression of compressed data
78 |         """
79 |         data = b'{\\rtf1\\ansi\\mac\\deff0\\deftab720'
80 |         self.assertEqual(decompress(compress(data, compressed=True)), data)
81 | 
82 |     def test_hither_and_thither_long(self):
83 |         """
84 |         Test decompression of compressed data larger than 4096
85 |         """
86 |         data = b'{\\rtf1\\ansi\\ansicpg1252\\pard hello world'
87 |         while len(data) < 4096:
88 |             data += b'testtest'
89 |         data += b'}'
90 |         self.assertEqual(decompress(compress(data, compressed=True)), data)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     unittest.main(verbosity=2)
95 | 


--------------------------------------------------------------------------------
/compressed_rtf/crc32.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf8 -*-
 2 | """
 3 | Module for CRC32 calculation
 4 | """
 5 | 
 6 | from io import BytesIO
 7 | 
 8 | __all__ = ['crc32']
 9 | 
10 | table = [
11 |     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
12 |     0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
13 |     0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
14 |     0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
15 |     0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
16 |     0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
17 |     0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
18 |     0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
19 |     0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
20 |     0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
21 |     0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
22 |     0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
23 |     0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
24 |     0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
25 |     0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
26 |     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
27 |     0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
28 |     0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
29 |     0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
30 |     0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
31 |     0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
32 |     0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
33 |     0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
34 |     0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
35 |     0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
36 |     0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
37 |     0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
38 |     0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
39 |     0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
40 |     0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
41 |     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
42 |     0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
43 |     0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
44 |     0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
45 |     0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
46 |     0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
47 |     0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
48 |     0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
49 |     0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
50 |     0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
51 |     0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
52 |     0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
53 |     0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
54 |     0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
55 |     0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
56 |     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
57 |     0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
58 |     0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
59 |     0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
60 |     0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
61 |     0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
62 |     0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
63 |     0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
64 |     0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
65 |     0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
66 |     0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
67 |     0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
68 |     0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
69 |     0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
70 |     0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
71 |     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
72 |     0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
73 |     0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
74 |     0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
75 | ]
76 | 
77 | 
78 | def crc32(data):
79 |     """
80 |     Calculate CRC32 from given data bytes
81 |     """
82 |     stream = BytesIO(data)
83 |     crc_value = 0x00000000
84 |     while True:
85 |         char = stream.read(1)
86 |         if not char:
87 |             break
88 |         table_pos = (crc_value ^ ord(char)) & 0xff
89 |         intermediate_value = crc_value >> 8
90 |         crc_value = table[table_pos] ^ intermediate_value
91 |     return crc_value
92 | 


--------------------------------------------------------------------------------
/.github/workflows/feature_branch_build.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # yamllint disable rule:line-length
  3 | # yamllint disable rule:truthy
  4 | 
  5 | name: Build Feature Branch
  6 | 
  7 | on:
  8 |   workflow_dispatch:
  9 |   push:
 10 |     branches-ignore:
 11 |       - master
 12 | 
 13 | jobs:
 14 | 
 15 |   validate_new_version:
 16 |     name: Validate new version
 17 |     runs-on: ubuntu-latest
 18 |     outputs:
 19 |       planned_version: ${{ steps.validate_new_version.outputs.planned_version }}
 20 |       version_file_exists: ${{ steps.validate_new_version.outputs.version_file_exists }}
 21 |       tag_hash: ${{ steps.validate_new_version.outputs.tag_hash }}
 22 |       can_create: ${{ steps.validate_new_version.outputs.can_create }}
 23 |       tag_exists: ${{ steps.validate_new_version.outputs.tag_exists }}
 24 |       branch_name: ${{ steps.validate_new_version.outputs.branch_name }}
 25 |       underscored_branch_name: ${{ steps.generate_tag_from_branch.outputs.underscored_branch_name }}
 26 | 
 27 |     steps:
 28 | 
 29 |       - name: Check out code
 30 |         uses: actions/checkout@v4
 31 |         with:
 32 |           fetch-depth: 0
 33 | 
 34 |       - name: Workaround the version file format
 35 |         id: prepare_version_file
 36 |         shell: bash
 37 |         run: >-
 38 |           grep "__version__" compressed_rtf/version.py | cut -d "=" -f2 | tr -d "' " > .version
 39 | 
 40 |       - name: Use latest released action
 41 |         id: validate_new_version
 42 |         uses: reinvented-stuff/validate-version-action@1.2.0
 43 |         with:
 44 |           version_filename: ".version"
 45 |           github_token: "${{ secrets.GITHUB_TOKEN }}"
 46 |           fail_on_error: false
 47 | 
 48 | 
 49 |   test_with_unittest:
 50 |     name: Test with unittest
 51 |     runs-on: ubuntu-latest
 52 |     outputs:
 53 |       job_status: ${{ job.status }}
 54 |     env:
 55 |       APP_NAME: "compressed_rtf"
 56 | 
 57 |     steps:
 58 | 
 59 |       - name: Check out this repository
 60 |         uses: actions/checkout@v4
 61 |         with:
 62 |           fetch-depth: 0
 63 | 
 64 |       - name: Set up Python3
 65 |         id: setup_python
 66 |         uses: actions/setup-python@v2
 67 |         with:
 68 |           python-version: '3.10'
 69 | 
 70 |       - name: Prepare environment
 71 |         id: prepare_test_env
 72 |         shell: bash
 73 |         run: |
 74 |           python3 -m pip install --upgrade pip
 75 |           python3 -m pip install setuptools wheel
 76 | 
 77 |       - name: Run tests
 78 |         id: run_tests
 79 |         shell: bash
 80 |         run: >-
 81 |           python3 -m unittest
 82 | 
 83 |       - name: Upload test artifacts
 84 |         uses: actions/upload-artifact@v4
 85 |         id: upload_test_artifacts
 86 |         if: always()
 87 |         with:
 88 |           name: "${{ env.APP_NAME }}_${{ github.run_id }}"
 89 |           path: |
 90 |             unittest_*.txt
 91 |             unittest_*.html
 92 | 
 93 |           if-no-files-found: warn
 94 |           retention-days: 90
 95 | 
 96 | 
 97 |   lint:
 98 |     name: Linters and such
 99 |     runs-on: ubuntu-latest
100 |     outputs:
101 |       job_status: ${{ job.status }}
102 |     env:
103 |       APP_NAME: "compressed_rtf"
104 | 
105 |     steps:
106 | 
107 |       - name: Check out this repository
108 |         uses: actions/checkout@v4
109 |         with:
110 |           fetch-depth: 0
111 | 
112 |       - name: Set up Python3
113 |         id: setup_python
114 |         uses: actions/setup-python@v2
115 |         with:
116 |           python-version: '3.10'
117 | 
118 |       - name: Prepare environment
119 |         id: prepare_lint_env
120 |         shell: bash
121 |         run: |
122 |           python3 -m pip install --upgrade pip
123 |           python3 -m pip install flake8 pylint
124 | 
125 |       - name: Run pylint
126 |         id: run_pylint
127 |         continue-on-error: true
128 |         shell: bash
129 |         run: >-
130 |           python3 -m pylint "${APP_NAME}"
131 | 
132 |       - name: Run flake8
133 |         id: run_flake8
134 |         continue-on-error: true
135 |         shell: bash
136 |         run: >-
137 |           python3 -m flake8 "${APP_NAME}"
138 | 
139 |       - name: Final decision on linters
140 |         id: final_decision
141 |         shell: bash
142 |         run: |-
143 |           echo "run_pylint: ${{ steps.run_pylint.outcome }}"
144 |           echo "run_flake8: ${{ steps.run_flake8.outcome }}"
145 | 
146 |           if [[ "${{ steps.run_pylint.outcome }}" != "success" ]] || [[ "${{ steps.run_flake8.outcome }}" != "success" ]]; then
147 |             echo "Failing the linters job" >&2
148 |             exit 1
149 |           fi
150 | 
151 | 
152 |   build:
153 |     name: Build
154 |     runs-on: ubuntu-latest
155 |     outputs:
156 |       job_status: ${{ job.status }}
157 |     env:
158 |       APP_NAME: "compressed_rtf"
159 | 
160 |     steps:
161 | 
162 |       - name: Check out this repository
163 |         uses: actions/checkout@v4
164 |         with:
165 |           fetch-depth: 0
166 | 
167 |       - name: Set up Python3
168 |         id: setup_python
169 |         uses: actions/setup-python@v2
170 |         with:
171 |           python-version: '3.10'
172 | 
173 |       - name: Prepare environment
174 |         id: prepare_build_env
175 |         shell: bash
176 |         run: |
177 |           python3 -m pip install --upgrade pip
178 |           python3 -m pip install build
179 | 
180 |       - name: Build
181 |         id: run_build
182 |         shell: bash
183 |         run: >-
184 |           python3 -m build
185 | 
186 |       - name: Upload build artifacts
187 |         uses: actions/upload-artifact@v4
188 |         id: upload_build_artifacts
189 |         if: always()
190 |         with:
191 |           name: "${{ env.APP_NAME }}_${{ github.run_id }}"
192 |           path: |
193 |             dist/*.whl
194 |             dist/*.tar.gz
195 | 
196 |           if-no-files-found: warn
197 |           retention-days: 90
198 | 
199 | ...
200 | 


--------------------------------------------------------------------------------
/compressed_rtf/compressed_rtf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf8 -*-
  2 | """
  3 | Compressed Rich Text Format (RTF) worker
  4 | 
  5 | Based on Rich Text Format (RTF) Compression Algorithm
  6 | https://msdn.microsoft.com/en-us/library/cc463890(v=exchg.80).aspx
  7 | """
  8 | 
  9 | import struct
 10 | import sys
 11 | from io import BytesIO
 12 | from .crc32 import crc32
 13 | 
 14 | __all__ = ['compress', 'decompress']
 15 | 
 16 | PY3 = sys.version_info[0] == 3
 17 | 
 18 | INIT_DICT = (
 19 |     b'{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\'
 20 |     b'fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New '
 21 |     b'RomanCourier{\\colortbl\\red0\\green0\\blue0\r\n\\par \\pard\\plain\\'
 22 |     b'f0\\fs20\\b\\i\\u\\tab\\tx'
 23 | )
 24 | 
 25 | INIT_DICT_SIZE = 207
 26 | MAX_DICT_SIZE = 4096
 27 | 
 28 | COMPRESSED = b'LZFu'
 29 | UNCOMPRESSED = b'MELA'
 30 | 
 31 | 
 32 | def char_to_int(val):
 33 |     """Convert a character to its ordinal value."""
 34 |     return ord(val) if PY3 else val
 35 | 
 36 | 
 37 | def compress(data, compressed=True):
 38 |     """
 39 |     Compress `data` using RTF compression algorithm
 40 |     If `compressed` flag is False, data will be written uncompressed
 41 |     """
 42 |     output_buffer = b''
 43 |     # set init dict
 44 |     init_dict = list(INIT_DICT + b' ' * (MAX_DICT_SIZE - INIT_DICT_SIZE))
 45 |     write_offset = INIT_DICT_SIZE
 46 |     # compressed
 47 |     if compressed:
 48 |         comp_type = COMPRESSED
 49 |         # make stream
 50 |         in_stream = BytesIO(data)
 51 |         # init params
 52 |         control_byte = 0
 53 |         control_bit = 1
 54 |         token_offset = 0
 55 |         token_buffer = b''
 56 |         while True:
 57 |             # find the longest match
 58 |             dict_offset, longest_match, write_offset = \
 59 |                 _find_longest_match(init_dict, in_stream, write_offset)
 60 |             char = in_stream.read(longest_match if longest_match > 1 else 1)
 61 |             # EOF input stream
 62 |             if not char:
 63 |                 # update params
 64 |                 control_byte |= 1 << control_bit - 1
 65 |                 control_bit += 1
 66 |                 token_offset += 2
 67 |                 # add dict reference
 68 |                 dict_ref = (write_offset & 0xfff) << 4
 69 |                 token_buffer += struct.pack('>H', dict_ref)
 70 |                 # add to output
 71 |                 output_buffer += struct.pack('B', control_byte)
 72 |                 output_buffer += token_buffer[:token_offset]
 73 |                 break
 74 |             if longest_match > 1:
 75 |                 # update params
 76 |                 control_byte |= 1 << control_bit - 1
 77 |                 control_bit += 1
 78 |                 token_offset += 2
 79 |                 # add dict reference
 80 |                 dict_ref = (dict_offset & 0xfff) << 4 | (
 81 |                         longest_match - 2) & 0xf
 82 |                 token_buffer += struct.pack('>H', dict_ref)
 83 |             else:
 84 |                 # character is not found in dictionary
 85 |                 if longest_match == 0:
 86 |                     init_dict[write_offset] = char_to_int(char)
 87 |                     write_offset = (write_offset + 1) % MAX_DICT_SIZE
 88 |                 # update params
 89 |                 control_byte |= 0 << control_bit - 1
 90 |                 control_bit += 1
 91 |                 token_offset += 1
 92 |                 # add literal
 93 |                 token_buffer += char
 94 |             if control_bit > 8:
 95 |                 # add to output
 96 |                 output_buffer += struct.pack('B', control_byte)
 97 |                 output_buffer += token_buffer[:token_offset]
 98 |                 # reset params
 99 |                 control_byte = 0
100 |                 control_bit = 1
101 |                 token_offset = 0
102 |                 token_buffer = b''
103 |         crc_value = struct.pack('<I', crc32(output_buffer))
104 |     else:
105 |         # if uncompressed - copy data to output
106 |         comp_type = UNCOMPRESSED
107 |         output_buffer = data
108 |         crc_value = struct.pack('<I', 0x00000000)
109 |     # write compressed RTF header
110 |     comp_size = struct.pack('<I', len(output_buffer) + 12)
111 |     raw_size = struct.pack('<I', len(data))
112 |     return comp_size + raw_size + comp_type + crc_value + output_buffer
113 | 
114 | 
115 | def decompress(data):
116 |     """
117 |     Decompress `data` using RTF compression algorithm
118 |     """
119 |     # set init dict
120 |     init_dict = list(INIT_DICT)
121 |     init_dict += list(b' ' * (MAX_DICT_SIZE - INIT_DICT_SIZE))
122 |     if len(data) < 16:
123 |         raise Exception('Data must be at least 16 bytes long')  # pylint: disable=broad-exception-raised
124 |     write_offset = INIT_DICT_SIZE
125 |     output_buffer = BytesIO()
126 |     # make stream
127 |     in_stream = BytesIO(data)
128 | 
129 |     # read compressed RTF header
130 |     comp_size = struct.unpack('<I', in_stream.read(4))[0]
131 |     raw_size = struct.unpack('<I', in_stream.read(4))[0]
132 |     comp_type = in_stream.read(4)
133 |     crc_value = struct.unpack('<I', in_stream.read(4))[0]
134 | 
135 |     # get only data
136 |     contents = BytesIO(in_stream.read(comp_size - 12))
137 | 
138 |     if comp_type == COMPRESSED:
139 |         # check CRC
140 |         if crc_value != crc32(contents.read()):
141 |             raise Exception('CRC is invalid! The file is corrupt!')  # pylint: disable=broad-exception-raised
142 |         contents.seek(0)
143 |         end = False
144 |         while not end:
145 |             val = contents.read(1)
146 |             if not val:
147 |                 break
148 |             control = '{0:08b}'.format(ord(val))  # pylint: disable=consider-using-f-string
149 |             # check bits from LSB to MSB
150 |             for i in range(1, 9):
151 |                 if control[-i] == '1':
152 |                     # token is reference (16 bit)
153 |                     val = contents.read(2)
154 |                     if not val:
155 |                         break
156 |                     token = struct.unpack('>H', val)[0]  # big-endian
157 |                     # extract [12 bit offset][4 bit length]
158 |                     offset = (token >> 4) & 0b111111111111
159 |                     length = token & 0b1111
160 |                     # end indicator
161 |                     if write_offset == offset:
162 |                         end = True
163 |                         break
164 |                     actual_length = length + 2
165 |                     for step in range(actual_length):
166 |                         read_offset = (offset + step) % MAX_DICT_SIZE
167 |                         char = init_dict[read_offset]
168 |                         if PY3:
169 |                             output_buffer.write(bytes([char]))
170 |                         else:
171 |                             output_buffer.write(char)
172 |                         init_dict[write_offset] = char
173 |                         write_offset = (write_offset + 1) % MAX_DICT_SIZE
174 |                 else:
175 |                     # token is literal (8 bit)
176 |                     val = contents.read(1)
177 |                     if not val:
178 |                         break
179 |                     output_buffer.write(val)
180 |                     init_dict[write_offset] = ord(val) if PY3 else val
181 |                     write_offset = (write_offset + 1) % MAX_DICT_SIZE
182 |     elif comp_type == UNCOMPRESSED:
183 |         # check CRC
184 |         if crc_value != 0x00000000:
185 |             raise Exception('CRC is invalid! Must be 0x00000000!')  # pylint: disable=broad-exception-raised
186 |         return contents.read(raw_size)
187 |     else:
188 |         raise Exception('Unknown type of RTF compression!')  # pylint: disable=broad-exception-raised
189 |     return output_buffer.getvalue()
190 | 
191 | 
192 | def _find_longest_match(init_dict, stream, write_offset):
193 |     """
194 |     Find the longest match
195 |     """
196 |     # read the first char
197 |     char = stream.read(1)
198 |     if not char:
199 |         return 0, 0, write_offset
200 |     prev_write_offset = write_offset
201 |     dict_index = 0
202 |     match_len = 0
203 |     longest_match_len = 0
204 |     dict_offset = 0
205 |     # find the first char
206 |     while True:
207 |         if init_dict[dict_index % MAX_DICT_SIZE] == char_to_int(char):
208 |             match_len += 1
209 |             # if found the longest match
210 |             if longest_match_len < match_len <= 17:
211 |                 dict_offset = dict_index - match_len + 1
212 |                 # add to dictionary and update the longest match
213 |                 init_dict[write_offset] = char_to_int(char)
214 |                 write_offset = (write_offset + 1) % MAX_DICT_SIZE
215 |                 longest_match_len = match_len
216 |             # read the next char
217 |             char = stream.read(1)
218 |             if not char:
219 |                 stream.seek(stream.tell() - match_len, 0)
220 |                 return dict_offset, longest_match_len, write_offset
221 |         else:
222 |             stream.seek(stream.tell() - match_len - 1, 0)
223 |             match_len = 0
224 |             # read the first char
225 |             char = stream.read(1)
226 |             if not char:
227 |                 break
228 |         dict_index += 1
229 |         if dict_index >= prev_write_offset + longest_match_len:
230 |             break
231 |     stream.seek(stream.tell() - match_len - 1, 0)
232 |     return dict_offset, longest_match_len, write_offset
233 | 


--------------------------------------------------------------------------------