├── tests ├── __init__.py └── tests.py ├── .flake8 ├── compressed_rtf ├── version.py ├── __init__.py ├── crc32.py └── compressed_rtf.py ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── ISSUE_TEMPLATE │ ├── change.md │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── feature_branch_build.yml ├── .pylintrc ├── .gitignore ├── pyproject.toml ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | 4 | -------------------------------------------------------------------------------- /compressed_rtf/version.py: -------------------------------------------------------------------------------- 1 | """Package version""" 2 | 3 | __version__ = '1.0.8' 4 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | Provide at least some details on the changes. 4 | 5 | 6 | # Checklist 7 | 8 | - [ ] Your code works 9 | - [ ] The changes are cool 10 | - [ ] The version is bumped 11 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [DESIGN] 2 | 3 | # Maximum number of branch for function / method body. 4 | max-branches=20 5 | 6 | # Maximum number of locals for function / method body 7 | max-locals=25 8 | 9 | # Maximum number of statements in function / method body. 10 | max-statements=55 11 | 12 | 13 | [REFACTORING] 14 | 15 | # Maximum number of nested blocks for function / method body 16 | max-nested-blocks=7 17 | -------------------------------------------------------------------------------- /compressed_rtf/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Compressed Rich Text Format (RTF) compression and decompression package 5 | 6 | Based on Rich Text Format (RTF) Compression Algorithm 7 | https://msdn.microsoft.com/en-us/library/cc463890(v=exchg.80).aspx 8 | """ 9 | 10 | from .compressed_rtf import compress, decompress 11 | from .version import __version__ 12 | 13 | __all__ = ['compress', 'decompress', '__version__'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/change.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Change Request 3 | about: Suggest a positive change for this project 4 | title: '[Change] ' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '[Bug] ' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior, e.g.: 15 | 1. The exact command(s) you ran. 16 | 2. Input data. 17 | 3. Output and/or error messages. 18 | 4. The environment (Python version, OS, library version, etc). 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Additional context** 24 | Add any other context about the problem here. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[Feature] ' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 77.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "compressed-rtf" 7 | dynamic = ["version"] 8 | description = "Compressed Rich Text Format (RTF) compression and decompression package" 9 | readme = "README.md" 10 | license = "MIT" 11 | license-files = ["LICENSE*"] 12 | authors = [ 13 | { name = "Dmitry Alimov" }, 14 | ] 15 | keywords = [ 16 | "compressed-rtf", 17 | "lzfu", 18 | "mela", 19 | "rtf", 20 | ] 21 | classifiers = [ 22 | "Development Status :: 5 - Production/Stable", 23 | "Environment :: Console", 24 | "Environment :: Win32 (MS Windows)", 25 | "Operating System :: Microsoft :: Windows", 26 | "Operating System :: POSIX :: Linux", 27 | "Programming Language :: Python :: 2.7", 28 | "Programming Language :: Python :: 3", 29 | "Topic :: System :: Archiving :: Compression", 30 | "Topic :: Text Processing", 31 | ] 32 | 33 | [project.urls] 34 | Homepage = "https://github.com/delimitry/compressed_rtf" 35 | 36 | [tool.setuptools.dynamic] 37 | version = { attr = "compressed_rtf.version.__version__" } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Dmitry Alimov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compressed_rtf 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/compressed_rtf)](https://pypi.org/project/compressed-rtf/) 4 | [![MIT license](http://img.shields.io/badge/license-MIT-brightgreen.svg)](https://github.com/delimitry/compressed_rtf/blob/master/LICENSE) 5 | 6 | Compressed Rich Text Format (RTF) compression worker in Python 7 | 8 | Description: 9 | ------------ 10 | 11 | Compressed RTF also known as "LZFu" compression format 12 | 13 | Based on Rich Text Format (RTF) Compression Algorithm: 14 | 15 | https://msdn.microsoft.com/en-us/library/cc463890(v=exchg.80).aspx 16 | 17 | 18 | Usage example: 19 | -------------- 20 | 21 | ```python 22 | >>> from compressed_rtf import compress, decompress 23 | >>> 24 | >>> data = '{\\rtf1\\ansi\\ansicpg1252\\pard test}' 25 | >>> comp = compress(data, compressed=True) # compressed 26 | >>> comp 27 | '#\x00\x00\x00"\x00\x00\x00LZFu3\\\xe8t\x03\x00\n\x00rcpg125\x922\n\xf3 t\x07\x90t}\x0f\x10' 28 | >>> 29 | >>> raw = compress(data, compressed=False) # raw/uncompressed 30 | >>> raw 31 | '.\x00\x00\x00"\x00\x00\x00MELA \xdf\x12\xce{\\rtf1\\ansi\\ansicpg1252\\pard test}' 32 | >>> 33 | >>> decompress(comp) 34 | '{\\rtf1\\ansi\\ansicpg1252\\pard test}' 35 | >>> 36 | >>> decompress(raw) 37 | '{\\rtf1\\ansi\\ansicpg1252\\pard test}' 38 | >>> 39 | ``` 40 | 41 | License: 42 | -------- 43 | Released under [The MIT License](https://github.com/delimitry/compressed_rtf/blob/master/LICENSE). 44 | -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import unittest 4 | from compressed_rtf.compressed_rtf import compress, decompress 5 | from compressed_rtf.crc32 import crc32 6 | 7 | 8 | class Test(unittest.TestCase): 9 | """ 10 | Test RTF compression and decompression 11 | """ 12 | 13 | def test_decompress(self): 14 | """ 15 | Test decompression 16 | """ 17 | data = b'-\x00\x00\x00+\x00\x00\x00LZFu\xf1\xc5\xc7\xa7\x03\x00\n\x00' \ 18 | b'rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80\x0f\xa0' 19 | self.assertEqual( 20 | decompress(data), 21 | b'{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n') 22 | # test raw decompression 23 | data = b'.\x00\x00\x00"\x00\x00\x00MELA\x00\x00\x00\x00{\\rtf1\\ansi\\an' \ 24 | b'sicpg1252\\pard test}' 25 | self.assertEqual( 26 | decompress(data), 27 | b'{\\rtf1\\ansi\\ansicpg1252\\pard test}') 28 | # test < 16 bytes long data exception 29 | with self.assertRaises(Exception): 30 | decompress(b'') 31 | with self.assertRaises(Exception): 32 | decompress(b'0123456789abcde') 33 | # test unknown compression type exception 34 | with self.assertRaises(Exception): 35 | decompress(b'\x10\x00\x00\x00\x11\x00\x00\x00ABCD\xff\xff\xff\xff') 36 | # test invalid CRC exception 37 | with self.assertRaises(Exception): 38 | decompress(b'\x10\x00\x00\x00\x11\x00\x00\x00LZFu\xff\xff\xff\xff') 39 | 40 | def test_crc32(self): 41 | """ 42 | Test CRC32 computation 43 | """ 44 | data = b'\x03\x00\n\x00rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80' \ 45 | b'\x0f\xa0' 46 | self.assertEqual(crc32(data), 0xa7c7c5f1) 47 | # test empty crc32 48 | self.assertEqual(crc32(b''), 0x00000000) 49 | 50 | def test_compression(self): 51 | """ 52 | Test compression types compressed and uncompressed 53 | """ 54 | data = b'{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n' 55 | self.assertEqual( 56 | compress(data, compressed=True), 57 | b'-\x00\x00\x00+\x00\x00\x00LZFu\xf1\xc5\xc7\xa7\x03\x00\n\x00' 58 | b'rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80\x0f\xa0') 59 | # test uncompressed 60 | self.assertEqual( 61 | compress(data, compressed=False), 62 | b'7\x00\x00\x00+\x00\x00\x00MELA\x00\x00\x00\x00{\\rtf1\\ansi\\ansicpg' 63 | b'1252\\pard hello world}\r\n') 64 | 65 | def test_compression_repeated_tokens(self): 66 | """ 67 | Test compression of data with repeated tokens, crossing write position 68 | """ 69 | data = b'{\\rtf1 WXYZWXYZWXYZWXYZWXYZ}' 70 | self.assertEqual( 71 | compress(data), 72 | b'\x1a\x00\x00\x00\x1c\x00\x00\x00LZFu\xe2\xd4KQA\x00\x04 WXYZ\r' 73 | b'n}\x01\x0e\xb0') 74 | 75 | def test_hither_and_thither(self): 76 | """ 77 | Test decompression of compressed data 78 | """ 79 | data = b'{\\rtf1\\ansi\\mac\\deff0\\deftab720' 80 | self.assertEqual(decompress(compress(data, compressed=True)), data) 81 | 82 | def test_hither_and_thither_long(self): 83 | """ 84 | Test decompression of compressed data larger than 4096 85 | """ 86 | data = b'{\\rtf1\\ansi\\ansicpg1252\\pard hello world' 87 | while len(data) < 4096: 88 | data += b'testtest' 89 | data += b'}' 90 | self.assertEqual(decompress(compress(data, compressed=True)), data) 91 | 92 | 93 | if __name__ == '__main__': 94 | unittest.main(verbosity=2) 95 | -------------------------------------------------------------------------------- /compressed_rtf/crc32.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | """ 3 | Module for CRC32 calculation 4 | """ 5 | 6 | from io import BytesIO 7 | 8 | __all__ = ['crc32'] 9 | 10 | table = [ 11 | 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 12 | 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 13 | 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 14 | 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 15 | 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 16 | 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 17 | 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 18 | 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 19 | 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 20 | 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 21 | 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 22 | 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 23 | 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 24 | 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 25 | 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 26 | 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 27 | 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 28 | 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 29 | 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 30 | 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 31 | 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 32 | 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 33 | 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 34 | 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 35 | 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 36 | 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 37 | 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 38 | 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 39 | 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 40 | 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 41 | 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 42 | 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 43 | 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 44 | 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 45 | 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 46 | 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 47 | 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 48 | 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 49 | 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 50 | 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 51 | 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 52 | 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 53 | 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 54 | 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 55 | 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 56 | 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 57 | 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 58 | 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 59 | 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 60 | 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 61 | 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 62 | 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 63 | 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 64 | 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 65 | 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 66 | 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 67 | 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 68 | 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 69 | 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 70 | 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 71 | 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 72 | 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 73 | 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 74 | 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d 75 | ] 76 | 77 | 78 | def crc32(data): 79 | """ 80 | Calculate CRC32 from given data bytes 81 | """ 82 | stream = BytesIO(data) 83 | crc_value = 0x00000000 84 | while True: 85 | char = stream.read(1) 86 | if not char: 87 | break 88 | table_pos = (crc_value ^ ord(char)) & 0xff 89 | intermediate_value = crc_value >> 8 90 | crc_value = table[table_pos] ^ intermediate_value 91 | return crc_value 92 | -------------------------------------------------------------------------------- /.github/workflows/feature_branch_build.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # yamllint disable rule:line-length 3 | # yamllint disable rule:truthy 4 | 5 | name: Build Feature Branch 6 | 7 | on: 8 | workflow_dispatch: 9 | push: 10 | branches-ignore: 11 | - master 12 | 13 | jobs: 14 | 15 | validate_new_version: 16 | name: Validate new version 17 | runs-on: ubuntu-latest 18 | outputs: 19 | planned_version: ${{ steps.validate_new_version.outputs.planned_version }} 20 | version_file_exists: ${{ steps.validate_new_version.outputs.version_file_exists }} 21 | tag_hash: ${{ steps.validate_new_version.outputs.tag_hash }} 22 | can_create: ${{ steps.validate_new_version.outputs.can_create }} 23 | tag_exists: ${{ steps.validate_new_version.outputs.tag_exists }} 24 | branch_name: ${{ steps.validate_new_version.outputs.branch_name }} 25 | underscored_branch_name: ${{ steps.generate_tag_from_branch.outputs.underscored_branch_name }} 26 | 27 | steps: 28 | 29 | - name: Check out code 30 | uses: actions/checkout@v4 31 | with: 32 | fetch-depth: 0 33 | 34 | - name: Workaround the version file format 35 | id: prepare_version_file 36 | shell: bash 37 | run: >- 38 | grep "__version__" compressed_rtf/version.py | cut -d "=" -f2 | tr -d "' " > .version 39 | 40 | - name: Use latest released action 41 | id: validate_new_version 42 | uses: reinvented-stuff/validate-version-action@1.2.0 43 | with: 44 | version_filename: ".version" 45 | github_token: "${{ secrets.GITHUB_TOKEN }}" 46 | fail_on_error: false 47 | 48 | 49 | test_with_unittest: 50 | name: Test with unittest 51 | runs-on: ubuntu-latest 52 | outputs: 53 | job_status: ${{ job.status }} 54 | env: 55 | APP_NAME: "compressed_rtf" 56 | 57 | steps: 58 | 59 | - name: Check out this repository 60 | uses: actions/checkout@v4 61 | with: 62 | fetch-depth: 0 63 | 64 | - name: Set up Python3 65 | id: setup_python 66 | uses: actions/setup-python@v2 67 | with: 68 | python-version: '3.10' 69 | 70 | - name: Prepare environment 71 | id: prepare_test_env 72 | shell: bash 73 | run: | 74 | python3 -m pip install --upgrade pip 75 | python3 -m pip install setuptools wheel 76 | 77 | - name: Run tests 78 | id: run_tests 79 | shell: bash 80 | run: >- 81 | python3 -m unittest 82 | 83 | - name: Upload test artifacts 84 | uses: actions/upload-artifact@v4 85 | id: upload_test_artifacts 86 | if: always() 87 | with: 88 | name: "${{ env.APP_NAME }}_${{ github.run_id }}" 89 | path: | 90 | unittest_*.txt 91 | unittest_*.html 92 | 93 | if-no-files-found: warn 94 | retention-days: 90 95 | 96 | 97 | lint: 98 | name: Linters and such 99 | runs-on: ubuntu-latest 100 | outputs: 101 | job_status: ${{ job.status }} 102 | env: 103 | APP_NAME: "compressed_rtf" 104 | 105 | steps: 106 | 107 | - name: Check out this repository 108 | uses: actions/checkout@v4 109 | with: 110 | fetch-depth: 0 111 | 112 | - name: Set up Python3 113 | id: setup_python 114 | uses: actions/setup-python@v2 115 | with: 116 | python-version: '3.10' 117 | 118 | - name: Prepare environment 119 | id: prepare_lint_env 120 | shell: bash 121 | run: | 122 | python3 -m pip install --upgrade pip 123 | python3 -m pip install flake8 pylint 124 | 125 | - name: Run pylint 126 | id: run_pylint 127 | continue-on-error: true 128 | shell: bash 129 | run: >- 130 | python3 -m pylint "${APP_NAME}" 131 | 132 | - name: Run flake8 133 | id: run_flake8 134 | continue-on-error: true 135 | shell: bash 136 | run: >- 137 | python3 -m flake8 "${APP_NAME}" 138 | 139 | - name: Final decision on linters 140 | id: final_decision 141 | shell: bash 142 | run: |- 143 | echo "run_pylint: ${{ steps.run_pylint.outcome }}" 144 | echo "run_flake8: ${{ steps.run_flake8.outcome }}" 145 | 146 | if [[ "${{ steps.run_pylint.outcome }}" != "success" ]] || [[ "${{ steps.run_flake8.outcome }}" != "success" ]]; then 147 | echo "Failing the linters job" >&2 148 | exit 1 149 | fi 150 | 151 | 152 | build: 153 | name: Build 154 | runs-on: ubuntu-latest 155 | outputs: 156 | job_status: ${{ job.status }} 157 | env: 158 | APP_NAME: "compressed_rtf" 159 | 160 | steps: 161 | 162 | - name: Check out this repository 163 | uses: actions/checkout@v4 164 | with: 165 | fetch-depth: 0 166 | 167 | - name: Set up Python3 168 | id: setup_python 169 | uses: actions/setup-python@v2 170 | with: 171 | python-version: '3.10' 172 | 173 | - name: Prepare environment 174 | id: prepare_build_env 175 | shell: bash 176 | run: | 177 | python3 -m pip install --upgrade pip 178 | python3 -m pip install build 179 | 180 | - name: Build 181 | id: run_build 182 | shell: bash 183 | run: >- 184 | python3 -m build 185 | 186 | - name: Upload build artifacts 187 | uses: actions/upload-artifact@v4 188 | id: upload_build_artifacts 189 | if: always() 190 | with: 191 | name: "${{ env.APP_NAME }}_${{ github.run_id }}" 192 | path: | 193 | dist/*.whl 194 | dist/*.tar.gz 195 | 196 | if-no-files-found: warn 197 | retention-days: 90 198 | 199 | ... 200 | -------------------------------------------------------------------------------- /compressed_rtf/compressed_rtf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | """ 3 | Compressed Rich Text Format (RTF) worker 4 | 5 | Based on Rich Text Format (RTF) Compression Algorithm 6 | https://msdn.microsoft.com/en-us/library/cc463890(v=exchg.80).aspx 7 | """ 8 | 9 | import struct 10 | import sys 11 | from io import BytesIO 12 | from .crc32 import crc32 13 | 14 | __all__ = ['compress', 'decompress'] 15 | 16 | PY3 = sys.version_info[0] == 3 17 | 18 | INIT_DICT = ( 19 | b'{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\' 20 | b'fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New ' 21 | b'RomanCourier{\\colortbl\\red0\\green0\\blue0\r\n\\par \\pard\\plain\\' 22 | b'f0\\fs20\\b\\i\\u\\tab\\tx' 23 | ) 24 | 25 | INIT_DICT_SIZE = 207 26 | MAX_DICT_SIZE = 4096 27 | 28 | COMPRESSED = b'LZFu' 29 | UNCOMPRESSED = b'MELA' 30 | 31 | 32 | def char_to_int(val): 33 | """Convert a character to its ordinal value.""" 34 | return ord(val) if PY3 else val 35 | 36 | 37 | def compress(data, compressed=True): 38 | """ 39 | Compress `data` using RTF compression algorithm 40 | If `compressed` flag is False, data will be written uncompressed 41 | """ 42 | output_buffer = b'' 43 | # set init dict 44 | init_dict = list(INIT_DICT + b' ' * (MAX_DICT_SIZE - INIT_DICT_SIZE)) 45 | write_offset = INIT_DICT_SIZE 46 | # compressed 47 | if compressed: 48 | comp_type = COMPRESSED 49 | # make stream 50 | in_stream = BytesIO(data) 51 | # init params 52 | control_byte = 0 53 | control_bit = 1 54 | token_offset = 0 55 | token_buffer = b'' 56 | while True: 57 | # find the longest match 58 | dict_offset, longest_match, write_offset = \ 59 | _find_longest_match(init_dict, in_stream, write_offset) 60 | char = in_stream.read(longest_match if longest_match > 1 else 1) 61 | # EOF input stream 62 | if not char: 63 | # update params 64 | control_byte |= 1 << control_bit - 1 65 | control_bit += 1 66 | token_offset += 2 67 | # add dict reference 68 | dict_ref = (write_offset & 0xfff) << 4 69 | token_buffer += struct.pack('>H', dict_ref) 70 | # add to output 71 | output_buffer += struct.pack('B', control_byte) 72 | output_buffer += token_buffer[:token_offset] 73 | break 74 | if longest_match > 1: 75 | # update params 76 | control_byte |= 1 << control_bit - 1 77 | control_bit += 1 78 | token_offset += 2 79 | # add dict reference 80 | dict_ref = (dict_offset & 0xfff) << 4 | ( 81 | longest_match - 2) & 0xf 82 | token_buffer += struct.pack('>H', dict_ref) 83 | else: 84 | # character is not found in dictionary 85 | if longest_match == 0: 86 | init_dict[write_offset] = char_to_int(char) 87 | write_offset = (write_offset + 1) % MAX_DICT_SIZE 88 | # update params 89 | control_byte |= 0 << control_bit - 1 90 | control_bit += 1 91 | token_offset += 1 92 | # add literal 93 | token_buffer += char 94 | if control_bit > 8: 95 | # add to output 96 | output_buffer += struct.pack('B', control_byte) 97 | output_buffer += token_buffer[:token_offset] 98 | # reset params 99 | control_byte = 0 100 | control_bit = 1 101 | token_offset = 0 102 | token_buffer = b'' 103 | crc_value = struct.pack('H', val)[0] # big-endian 157 | # extract [12 bit offset][4 bit length] 158 | offset = (token >> 4) & 0b111111111111 159 | length = token & 0b1111 160 | # end indicator 161 | if write_offset == offset: 162 | end = True 163 | break 164 | actual_length = length + 2 165 | for step in range(actual_length): 166 | read_offset = (offset + step) % MAX_DICT_SIZE 167 | char = init_dict[read_offset] 168 | if PY3: 169 | output_buffer.write(bytes([char])) 170 | else: 171 | output_buffer.write(char) 172 | init_dict[write_offset] = char 173 | write_offset = (write_offset + 1) % MAX_DICT_SIZE 174 | else: 175 | # token is literal (8 bit) 176 | val = contents.read(1) 177 | if not val: 178 | break 179 | output_buffer.write(val) 180 | init_dict[write_offset] = ord(val) if PY3 else val 181 | write_offset = (write_offset + 1) % MAX_DICT_SIZE 182 | elif comp_type == UNCOMPRESSED: 183 | # check CRC 184 | if crc_value != 0x00000000: 185 | raise Exception('CRC is invalid! Must be 0x00000000!') # pylint: disable=broad-exception-raised 186 | return contents.read(raw_size) 187 | else: 188 | raise Exception('Unknown type of RTF compression!') # pylint: disable=broad-exception-raised 189 | return output_buffer.getvalue() 190 | 191 | 192 | def _find_longest_match(init_dict, stream, write_offset): 193 | """ 194 | Find the longest match 195 | """ 196 | # read the first char 197 | char = stream.read(1) 198 | if not char: 199 | return 0, 0, write_offset 200 | prev_write_offset = write_offset 201 | dict_index = 0 202 | match_len = 0 203 | longest_match_len = 0 204 | dict_offset = 0 205 | # find the first char 206 | while True: 207 | if init_dict[dict_index % MAX_DICT_SIZE] == char_to_int(char): 208 | match_len += 1 209 | # if found the longest match 210 | if longest_match_len < match_len <= 17: 211 | dict_offset = dict_index - match_len + 1 212 | # add to dictionary and update the longest match 213 | init_dict[write_offset] = char_to_int(char) 214 | write_offset = (write_offset + 1) % MAX_DICT_SIZE 215 | longest_match_len = match_len 216 | # read the next char 217 | char = stream.read(1) 218 | if not char: 219 | stream.seek(stream.tell() - match_len, 0) 220 | return dict_offset, longest_match_len, write_offset 221 | else: 222 | stream.seek(stream.tell() - match_len - 1, 0) 223 | match_len = 0 224 | # read the first char 225 | char = stream.read(1) 226 | if not char: 227 | break 228 | dict_index += 1 229 | if dict_index >= prev_write_offset + longest_match_len: 230 | break 231 | stream.seek(stream.tell() - match_len - 1, 0) 232 | return dict_offset, longest_match_len, write_offset 233 | --------------------------------------------------------------------------------