├── src └── rans │ ├── __init__.py │ └── rANSCoder.py ├── setup.py ├── .gitignore ├── README.md └── LICENSE /src/rans/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="py_rans", 8 | version="1.0.5", 9 | author="Fedor Glazov", 10 | author_email="fedorglazov@gmail.com", 11 | description="A lightweight, pure python implementation of a rANS coder.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/FGlazov/Python-rANSCoder", 15 | packages=setuptools.find_packages(where="src"), 16 | package_dir={"": "src"}, 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", 20 | "Operating System :: OS Independent", 21 | ], 22 | install_requires=[ 23 | 'numba>=0.56.3' 24 | ] 25 | ) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /src/rans/rANSCoder.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import numba 4 | from numba import types 5 | from numba.core.errors import NumbaPendingDeprecationWarning 6 | from numba.experimental import jitclass 7 | 8 | # Disable warning that comes when pure python list is passed to functions below. 9 | # We use a specific version of numba that is still compatible with pure python lists. 10 | warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning) 11 | 12 | RANS64_L = 2 ** 30 13 | MIN_PROB = 8 14 | prob_bits = 14 15 | prob_scale = 1 << prob_bits 16 | 17 | 18 | @numba.jit(nopython=True) 19 | def argmax(values): 20 | if not values: 21 | return -1 # Empty list has no argmax 22 | 23 | current_max = values[0] 24 | current_max_index = 0 25 | for i in range(1, len(values)): 26 | if values[i] > current_max: 27 | current_max = values[i] 28 | current_max_index = i 29 | 30 | return current_max_index 31 | 32 | 33 | @numba.jit(nopython=True) 34 | def float_to_int_probs(float_probs): 35 | pdf = [] 36 | cdf = [0] 37 | 38 | for prob in float_probs: 39 | next_prob = round(prob * prob_scale) 40 | if prob > 0 and next_prob < MIN_PROB: 41 | next_prob = MIN_PROB 42 | 43 | pdf.append(next_prob) 44 | cdf.append(cdf[-1] + next_prob) 45 | 46 | # Account for possible rounding error 47 | # Remove the correction from the largest element 48 | to_correct = prob_scale - cdf[-1] 49 | 50 | largest_index = argmax(pdf) 51 | pdf[largest_index] += to_correct 52 | for i in range(largest_index + 1, len(cdf)): 53 | cdf[i] += to_correct 54 | 55 | return pdf, cdf 56 | 57 | 58 | @numba.jit(nopython=True) 59 | def find_in_int_dist(cdf, to_find): 60 | for i in range(len(cdf) - 1): 61 | if cdf[i] <= to_find and cdf[i + 1] > to_find: 62 | return i 63 | 64 | print("Error: Could not find symbol in integer-dist") 65 | 66 | 67 | spec = [ 68 | ('state', numba.uint64), 69 | ('encoded_data', types.ListType(types.uint32)), 70 | ] 71 | 72 | 73 | @jitclass(spec=spec) 74 | class Encoder: 75 | def __init__(self): 76 | self.state = RANS64_L 77 | self.encoded_data = numba.typed.List.empty_list(types.uint32) 78 | 79 | def encode_symbol(self, freqs, symbol): 80 | (pdf, cdf) = float_to_int_probs(freqs) 81 | freq = pdf[symbol] 82 | start = cdf[symbol] 83 | 84 | if freq == 0: 85 | print("Error: Can't encode symbol with frequency 0!") 86 | return 87 | 88 | x = self.state 89 | 90 | x_max = ((RANS64_L >> prob_bits) << 32) * freq 91 | if x >= x_max: 92 | self.encoded_data.append(types.uint32(x & 0xffffffff)) 93 | x >>= 32 94 | 95 | self.state = ((x // freq) << prob_bits) + (x % freq) + start 96 | 97 | def get_encoded(self): 98 | self.encoded_data.append(types.uint32(self.state & 0xffffffff)) 99 | self.state >>= 32 100 | self.encoded_data.append(types.uint32(self.state & 0xffffffff)) 101 | return self.encoded_data 102 | 103 | 104 | spec = [ 105 | ('state', numba.uint64), 106 | ('encoded_data', types.ListType(types.uint32)), 107 | ] 108 | 109 | 110 | @jitclass(spec=spec) 111 | class Decoder: 112 | def __init__(self, encoded_data): 113 | self.state = (encoded_data.pop() << 32) | encoded_data.pop() 114 | self.encoded_data = encoded_data 115 | 116 | def decode_symbol(self, freqs): 117 | (pdf, cdf) = float_to_int_probs(freqs) 118 | 119 | # Extract symbol 120 | to_find = self.state & (prob_scale - 1) 121 | symbol = find_in_int_dist(cdf, to_find) 122 | 123 | # Symbol related variables. 124 | start = cdf[symbol] 125 | freq = pdf[symbol] 126 | 127 | mask = prob_scale - 1 128 | 129 | # Move state foward one step 130 | x = self.state 131 | x = freq * (x >> prob_bits) + (x & mask) - start 132 | 133 | # Enough of state has been read that we now need to get more out of encoded_data. 134 | if x < RANS64_L: 135 | x = (x << 32) | self.encoded_data.pop() 136 | 137 | self.state = x 138 | 139 | return symbol 140 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This is a lightweight, pure python version of a rANSCoder. A rANSCoder is an entropy coder, which was first described in Jarek Duda's paper (http://arxiv.org/abs/1311.2540). This variant of rANS works on 64 bits and emits 32-bit words at the same time. It is a port of Fabien Giesen 64 bit rANSCoder to python. It should be compatible with the functions rANS64XXXXXX in Giesen's project - i.e. you could theoretically encode or decode with this project, and do the other step via Giesen's code. 3 | 4 | # Installation 5 | 6 | This repository is available via pip. You can install it as follows. 7 | 8 | ``` 9 | pip install py_rans 10 | ``` 11 | 12 | # Purpose 13 | 14 | The focus of this repository is not performance, but rather rapid prototyping. This entropy coder will provide compresion performance which is close to entropy at a reasonable speed. If you want the same compression performance and higher compression speed you should look at a C/C++ library fufilling those needs - I can recommend either Giesen's or Bonfield's implementation of rANS. The following are the Github links to their two projects: 15 | 16 | - https://github.com/rygorous/ryg_rans 17 | - https://github.com/jkbonfield/rans_static 18 | 19 | # API by simple example 20 | 21 | The API is deliberately kept very simple, with a single way to use the rANSCoder. The interface consists of two classes "Encoder" and Decoder". 22 | 23 | To encode something, you would do the following: 24 | 25 | ```python 26 | import rans.rANSCoder as rANS 27 | import random 28 | import numpy as np 29 | 30 | encoder = rANS.Encoder() 31 | probs = np.array([0.1, 0.2, 0.3, 0.05, 0.05, 0.15, 0.05, 0.1], dtype=np.float32) 32 | data = np.array([random.randint(0,7) for i in range(10000)], dtype=np.int32) 33 | # Use numpy arrays for better performance! Numba works better with numpy arrays than vanilla python arrays. 34 | 35 | for symbol in data: 36 | encoder.encode_symbol(probs, symbol) 37 | encoded = encoder.get_encoded() 38 | print("The number of bytes needed is: " + str(len(encoded) * 4)) 39 | 40 | # Encoded is a list of 32-bit words. 41 | # Do things afterwards with encoded - e.g. save it into a file or send it somewhere. 42 | ``` 43 | 44 | And later, to decode: 45 | 46 | ```python 47 | import rans.rANSCoder as rANS 48 | 49 | # Given: length_decoded, encoded, probs 50 | # Assuming the previous code snippet was how it was encoded, then: 51 | # length_decoded = len(data) 52 | # encoded = encoded 53 | # probs = probs 54 | 55 | decoder = rANS.Decoder(encoded) 56 | decoded_data = [] 57 | for _ in range(length_decoded): 58 | decoded_data.append(decoder.decode_symbol(probs)) 59 | 60 | # decoded_data is the original data, in REVERSE ORDER. 61 | ``` 62 | 63 | If you run the two code snippets in order, then you will need to call decoded_data.reverse() before running the check ```data == decoded_data```. 64 | 65 | # API more formally 66 | 67 | This python package consists of two classes, Encoder and Decoder. There are also a few helper functions outside of the two classes - but they are not meant to be used as part of the public API - this package is meant to be interfaced with solely through the two classes. All of the code is wrapped in numba wrappers, which compiles the code just in time before use. The first call to any method will take longer than any calls following it. 68 | 69 | 70 | 71 | The Encoder is responsible for taking the uncompressed data and compressing it. It is an entropy coder, so it expects data and a corresponding model (i.e. probability distribution which says how frequent a given symbol is). The Encoder's constructor takes no arguments, so you may simply construct it via ```encoder = rANS.Encoder()```. 72 | 73 | There are two methods: 74 | 75 | ```Encoder.encode_symbol(probs, symb)``` 76 | 77 | This method takes a symbol and adds it to the state inside of the Encoder which represents the currently compressed data. In order to compress it, the encoder requires a probability distribution which says how likely a given symb will occur. So, for example, given ```probs = [0.15, 0.8, 0.05]```, the symbol 0 occurs with 15% probability, 1 occurs with 80% probability, and 2 with 5% probability. So note that symb is a number with the bounds ```0 ≤ symb < len(probs)```. 78 | 79 | For this method to work properly, ```sum(probs)``` should be close to 1 - i.e. you need to pass a proper probability distribution. Note that you may use different probs for each succesive symbol you encode in an Encoder. This is allowed and intentional, as your model might change while you're encoding. 80 | 81 | ```Encoder.get_encoded()``` 82 | 83 | This method flushes the state in the encoder, and returns an array of 32-bit words which represents the compressed data. Note that you should not use the Encoder after calling this method - if you want to encode more data later on, you should then create a new Encoder. 84 | 85 | The Decoder is responsible for taking the compressed data generated by an Encoder, and transforming it back into the original data. **Note that the data is output in reversed order**. The constructor expects the encoded data as input, i.e. you may construct the Decoder as ```decoder = rANS.Decoder(encoded_data)```. 86 | 87 | The Decoder only has one method: 88 | 89 | ```Decoder.decode_symbol(probs)``` 90 | 91 | This method takes the probability distribution that the corresponding symbol was encoded with, and retrieves this symbol. The next time this method is called, the next symbol is decoded. Notice that these symbols are decoded **in reverse order** - i.e. this coder follows a FILO principle - the last symbol encoded is the first symbol decoded. 92 | 93 | Further note that the Decoder does not know how many symbols are left - you will either need to have messages/data of fixed length, or you will have to store the length with the compressed data in order to later be able to decompress it. 94 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | --------------------------------------------------------------------------------