├── tests ├── __init__.py ├── slow.py ├── tests.py └── data │ └── the_happy_prince.txt ├── .hgtags ├── requirements.txt ├── .hgignore ├── .gitignore ├── MANIFEST.in ├── tox.ini ├── LICENSE.txt ├── README.txt ├── setup.py └── lzw └── __init__.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Put your tests in this directory. 2 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 94aac27cdf3c6547021aa59d5b454badffc1239f Release-0.01.01 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Include setup.py's install_requires 2 | -e . 3 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | 2 | .*\.pyc$ 3 | .*~$ 4 | ^doc/.+ 5 | ^dist/.+ 6 | ^MANIFEST 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info/ 3 | 4 | /.tox/ 5 | /MANIFEST 6 | /build/ 7 | /dist/ 8 | /env/ 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | recursive-include tests *.py 3 | recursive-include tests/data * 4 | recursive-include doc * 5 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | ; Developed with tox version 2.3.1 2 | [tox] 3 | envlist = 4 | py34, py27 5 | 6 | [testenv] 7 | setenv = 8 | PYTHONPATH = {toxinidir}:{toxinidir}/tests 9 | commands = 10 | python setup.py test 11 | deps = 12 | -r{toxinidir}/requirements.txt 13 | 14 | [testenv:py27] 15 | basepython = python2.7 16 | 17 | [testenv:py34] 18 | basepython = python3.4 19 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2010 Joseph Bowers 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /tests/slow.py: -------------------------------------------------------------------------------- 1 | 2 | import lzw 3 | import unittest 4 | 5 | import os 6 | import six 7 | import tempfile 8 | 9 | TEST_ROOT = os.path.dirname(__file__) 10 | BIG_FILE = os.path.join(TEST_ROOT, "data", "library-of-congress-smaller.ppm") 11 | GIANT_FILE = os.path.join(TEST_ROOT, "data", "library-of-congress-photo.ppm") 12 | 13 | class TestEncoderSlowly(unittest.TestCase): 14 | 15 | def test_big_file(self): 16 | self.verify_compressed_file(BIG_FILE) 17 | 18 | def test_giant_file(self): 19 | self.verify_compressed_file(GIANT_FILE) 20 | 21 | 22 | def verify_compressed_file(self, testfile=GIANT_FILE): 23 | 24 | with tempfile.TemporaryFile("w+b") as compressedfile: 25 | 26 | originalsize = 0 27 | compressedsize = 0 28 | uncompressedsize = 0 29 | 30 | bigstream = lzw.readbytes(testfile) 31 | compressed = lzw.compress(bigstream) 32 | 33 | for bs in compressed: 34 | compressedsize = compressedsize + 1 35 | compressedfile.write(bs) 36 | 37 | ############################ 38 | 39 | compressedfile.flush() 40 | compressedfile.seek(0) 41 | 42 | checkstream = lzw.readbytes(testfile) 43 | uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) 44 | 45 | for oldbyte, newbyte in six.moves.zip_longest(checkstream, uncompressed): 46 | uncompressedsize = uncompressedsize + 1 47 | 48 | if oldbyte != newbyte: 49 | msg = "Corrupted byte at {0}, original {1} != {2}".format(uncompressedsize, oldbyte, newbyte) 50 | self.assertEquals(oldbyte, newbyte, msg) 51 | 52 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | This is the README file for lzw, small, low level, pure python module 2 | for simple, stream-friendly data compression, built around iterators. 3 | Please see the accompanying LICENSE.txt file for license terms. 4 | 5 | lzw currently requires python 2.7 or python 3.4 to run. 6 | 7 | Before going on, potential users are advised to take a look at the 8 | gzip, zlib, bz2, zipfile, and tarfile modules available in the python 9 | standard library, which are dynamite-fast, mature, well supported, and 10 | generally awesome. 11 | 12 | Seriously, check them out! You've already got them! 13 | 14 | ---- 15 | 16 | This software is in Pre-Alpha release, any bug reports (or even 17 | stories about ways you use the software, or wish you could use the 18 | software) are appreciated! Mail joerbowers@joe-bowers.com with your 19 | info. 20 | 21 | --- 22 | 23 | INSTALLING 24 | 25 | you should be able to install this package with 26 | 27 | python setup.py install 28 | 29 | ---- 30 | 31 | Ok, moving on. 32 | 33 | The easiest way to use lzw is probably something like this 34 | 35 | >>> import lzw 36 | >>> 37 | >>> infile = lzw.readbytes("My Uncompressed File.txt") 38 | >>> compressed = lzw.compress(infile) 39 | >>> lzw.writebytes("My Compressed File.lzw", compressed) 40 | >>> 41 | >>> # Then later (or elsewhere) 42 | >>> infile = lzw.readbytes("My Compressed File.lzw", compressed) 43 | >>> uncompressed = lzw.decompress(infile) 44 | >>> for bt in uncompressed: 45 | >>> do_something_awesome_with_this_byte(bt) 46 | >>> 47 | 48 | See the module documentation for more details. 49 | 50 | --- 51 | 52 | The underlying compression algorithm for this module is as expressed 53 | in section 13 of the TIFF 6.0 specification, pages 58 to 62, available 54 | at the time of this writing on-line at 55 | 56 | http://partners.adobe.com/public/developer/en/tiff/TIFF6.pdf 57 | 58 | Wherever possible, I've tried to adhere to the algorithm and 59 | conventions that are described (in exhaustive and yet very readable 60 | detail!) in that document, even when it gets a bit Tiff 61 | specific. Where there are differences, they are likely bugs in this 62 | code. 63 | 64 | --- 65 | 66 | Current dev priorities: 67 | 68 | - Hunt down some potential user applications, see why they're 69 | potential rather than actual, and then get on that bus. 70 | 71 | 72 | For now 73 | 74 | - Keep things as simple and intelligible as possible 75 | - Adhere as closely to the TIFF spec as is reasonable 76 | - Keep memory use low for good use of the iterators 77 | - Stay in pure python 78 | - Faster would be nicer, though... 79 | 80 | 81 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | from setuptools import setup, Command, distutils 3 | import unittest 4 | import doctest 5 | from unittest import defaultTestLoader, TextTestRunner 6 | 7 | import sys 8 | 9 | __author__ = "Joe Bowers" 10 | __license__ = "MIT License" 11 | __version__ = "0.2.11" 12 | __status__ = "Development" 13 | __email__ = "joerbowers@gmail.com" 14 | __url__ = "http://www.joe-bowers.com/static/lzw" 15 | 16 | TEST_MODULE_NAME = "tests.tests" 17 | SLOW_TEST_MODULE_NAME = "tests.slow" 18 | DOC_DIR_NAME = "doc" 19 | MODULES = [ "lzw" ] 20 | 21 | class RunTestsCommand(Command): 22 | """Runs package tests""" 23 | 24 | user_options = [('runslow', None, 'also runs the (fairly slow) functional tests')] 25 | 26 | def initialize_options(self): 27 | self.runslow = False 28 | 29 | def finalize_options(self): 30 | pass # how on earth is this supposed to work? 31 | 32 | 33 | def run(self): 34 | import lzw 35 | success = doctest.testmod(lzw).failed == 0 36 | 37 | utests = defaultTestLoader.loadTestsFromName(TEST_MODULE_NAME) 38 | urunner = TextTestRunner(verbosity=2) 39 | success &= urunner.run(utests).wasSuccessful() 40 | 41 | if self.runslow: 42 | utests = defaultTestLoader.loadTestsFromName(SLOW_TEST_MODULE_NAME) 43 | urunner = TextTestRunner(verbosity=2) 44 | success &= urunner.run(utests).wasSuccessful() 45 | 46 | if not success: 47 | raise distutils.errors.DistutilsError('Test failure') 48 | 49 | 50 | class DocCommand(Command): 51 | """Generates package documentation using epydoc""" 52 | 53 | user_options = [] 54 | 55 | def initialize_options(self): pass 56 | def finalize_options(self): pass 57 | 58 | def run(self): 59 | # Slightly stupid. Move to sphinx when you can, please. 60 | import epydoc.cli 61 | real_argv = sys.argv 62 | sys.argv = [ "epydoc", "--output", DOC_DIR_NAME, "--no-private" ] + MODULES 63 | epydoc.cli.cli() 64 | sys.argv = real_argv 65 | 66 | 67 | setup(name="lzw", 68 | description="Low Level, pure python lzw compression/decompression library", 69 | 70 | py_modules=MODULES, 71 | version=__version__, 72 | author=__author__, 73 | author_email=__email__, 74 | url=__url__, 75 | license=__license__, 76 | 77 | classifiers = [ 78 | "Development Status :: 2 - Pre-Alpha", 79 | "Programming Language :: Python", 80 | "Programming Language :: Python :: 2.7", 81 | "Programming Language :: Python :: 3.4", 82 | "Operating System :: OS Independent", 83 | "License :: OSI Approved :: MIT License", 84 | "Topic :: System :: Archiving", 85 | "Topic :: Software Development :: Libraries :: Python Modules", 86 | "Intended Audience :: Developers", 87 | "Natural Language :: English", 88 | ], 89 | 90 | packages = ['lzw'], 91 | 92 | install_requires=['six'], 93 | 94 | long_description = """ 95 | A pure python module for compressing and decompressing streams of 96 | data, built around iterators. Requires python 2.6 97 | """, 98 | 99 | cmdclass = { 100 | 'test' : RunTestsCommand, 101 | 'doc' : DocCommand, 102 | }, 103 | ) 104 | -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | 2 | import lzw 3 | 4 | import unittest 5 | import random 6 | import six 7 | import struct 8 | import os 9 | 10 | 11 | # These tests are less interesting than the doctests inside of the lzw 12 | # module itself, they're generally not really unit tests at all; 13 | # rather, they're functional tests written for "dual" subsystems in 14 | # lzw, or else particularly tricky bits and otherwise detected bugs I 15 | # want to be sure don't regress against code changes. 16 | 17 | TEST_ROOT = os.path.dirname(__file__) 18 | ENGLISH_FILE = os.path.join(TEST_ROOT, "data", "the_happy_prince.txt") 19 | 20 | class TestEncoder(unittest.TestCase): 21 | 22 | def setUp(self): 23 | self.english = None 24 | 25 | with open(ENGLISH_FILE, "rb") as inf: 26 | self.english = inf.read() 27 | 28 | self.gibberish = b"".join(struct.pack("B", random.randrange(256)) 29 | for b in self.english) 30 | 31 | 32 | def test_readbytes(self): 33 | 34 | realbytes = None 35 | 36 | with open(ENGLISH_FILE, "rb") as inf: 37 | realbytes = inf.read() 38 | 39 | testbytes = b"".join(lzw.readbytes(ENGLISH_FILE)) 40 | for (old,new) in six.moves.zip_longest(realbytes, testbytes): 41 | self.assertEqual(old, new) 42 | 43 | 44 | def test_encoder_packing_assumption(self): 45 | """ 46 | Our bitpacking scheme relies on assumptions about the size of 47 | the codebook in relation to the number of codes produced. In 48 | particular: 49 | 50 | - each non-control code emitted by the encoder is FOLLOWED by 51 | a single addition to the encoder's codebook, or is followed 52 | by a control code in the output. 53 | 54 | - each control code emitted by the encoder is FOLLOWED by a 55 | reversion of the codebook to it's initial state. 56 | 57 | """ 58 | encoder = lzw.Encoder() 59 | initial_codesize = encoder.code_size() 60 | codesize = initial_codesize 61 | codes_seen = 0 62 | 63 | probablyfailed = False 64 | 65 | for pt in encoder.encode(self.english): 66 | codesize = encoder.code_size() 67 | 68 | if pt == lzw.CLEAR_CODE: 69 | codes_seen = 0 70 | probablyfailed = False 71 | elif probablyfailed: 72 | self.fail(probablyfailed) 73 | else: 74 | codes_seen = codes_seen + 1 75 | if initial_codesize + codes_seen != codesize: 76 | probablyfailed = "Expected code size {0} but found {1}, Not followed by a control signal" 77 | 78 | 79 | def test_encodedecode(self): 80 | 81 | encoder = lzw.Encoder() 82 | decoder = lzw.Decoder() 83 | 84 | codepoints = encoder.encode(self.english) 85 | decoded = decoder.decode(codepoints) 86 | newbytes = b"".join(decoded) 87 | 88 | self.assertEqual(self.english, newbytes) 89 | 90 | encoder = lzw.Encoder() 91 | decoder = lzw.Decoder() 92 | 93 | codepoints = encoder.encode(self.gibberish) 94 | decoded = decoder.decode(codepoints) 95 | newbytes = b"".join(decoded) 96 | 97 | self.assertEqual(self.gibberish, newbytes) 98 | 99 | 100 | def test_compressdecompress(self): 101 | english = self.english 102 | gibberish = self.gibberish 103 | 104 | compressed = lzw.compress(english) 105 | compressed = [ b for b in compressed ] 106 | 107 | decompressed = b"".join(lzw.decompress(compressed)) 108 | 109 | self.assertEqual(english, decompressed) 110 | 111 | compressed = lzw.compress(gibberish) 112 | compressed = [ b for b in compressed ] 113 | 114 | decompressed = b"".join(lzw.decompress(compressed)) 115 | 116 | self.assertEqual(gibberish, decompressed) 117 | 118 | -------------------------------------------------------------------------------- /tests/data/the_happy_prince.txt: -------------------------------------------------------------------------------- 1 | 2 | THE HAPPY PRINCE 3 | By Oscar Wilde 4 | Retrieved from Project Gutenberg: http://www.gutenberg.org/files/30120/30120.txt 5 | 6 | 7 | First published by David Nutt, May, 1888 8 | 9 | Reprinted January, 1889; February, 1902; September, 1905; February, 10 | 1907; March, 1908; March, 1910 11 | 12 | Reset and published by arrangement with David Nutt by Duckworth & Co., 13 | 1920 14 | 15 | Special Edition, reset. With illustrations by Charles Robinson, 16 | published by arrangement with David Nutt by Duckworth & Co., 1913. 17 | Reprinted 1920 18 | 19 | Printed in Great Britain 20 | By Hazell, Watson and Viney, Ld., 21 | London and Aylesbury. 22 | 23 | ==== 24 | 25 | High above the city, on a tall column, stood the statue of the Happy 26 | Prince. He was gilded all over with thin leaves of fine gold, for eyes 27 | he had two bright sapphires, and a large red ruby glowed on his 28 | sword-hilt. 29 | 30 | He was very much admired indeed. "He is as beautiful as a weathercock," 31 | remarked one of the Town Councillors who wished to gain a reputation for 32 | having artistic tastes; "only not quite so useful," he added, fearing 33 | lest people should think him unpractical, which he really was not. 34 | 35 | "Why can't you be like the Happy Prince?" asked a sensible mother of her 36 | little boy who was crying for the moon. "The Happy Prince never dreams 37 | of crying for anything." 38 | 39 | "I am glad there is some one in the world who is quite happy," muttered 40 | a disappointed man as he gazed at the wonderful statue. 41 | 42 | "He looks just like an angel," said the Charity Children as they came 43 | out of the cathedral in their bright scarlet cloaks and their clean 44 | white pinafores. 45 | 46 | "How do you know?" said the Mathematical Master, "you have never seen 47 | one." 48 | 49 | "Ah! but we have, in our dreams," answered the children; and the 50 | Mathematical Master frowned and looked very severe, for he did not 51 | approve of children dreaming. 52 | 53 | One night there flew over the city a little Swallow. His friends had 54 | gone away to Egypt six weeks before, but he had stayed behind, for he 55 | was in love with the most beautiful Reed. He had met her early in the 56 | spring as he was flying down the river after a big yellow moth, and had 57 | been so attracted by her slender waist that he had stopped to talk to 58 | her. 59 | 60 | "Shall I love you?" said the Swallow, who liked to come to the point at 61 | once, and the Reed made him a low bow. So he flew round and round her, 62 | touching the water with his wings, and making silver ripples. This was 63 | his courtship, and it lasted all through the summer. 64 | 65 | "It is a ridiculous attachment," twittered the other Swallows; "she has 66 | no money, and far too many relations;" and indeed the river was quite 67 | full of Reeds. Then, when the autumn came they all flew away. 68 | 69 | After they had gone he felt lonely, and began to tire of his lady-love. 70 | "She has no conversation," he said, "and I am afraid that she is a 71 | coquette, for she is always flirting with the wind." And certainly, 72 | whenever the wind blew, the Reed made the most graceful curtseys. 73 | "I admit that she is domestic," he continued, "but I love travelling, 74 | and my wife, consequently, should love travelling also." 75 | 76 | "Will you come away with me?" he said finally to her; but the Reed shook 77 | her head, she was so attached to her home. 78 | 79 | "You have been trifling with me," he cried. "I am off to the Pyramids. 80 | Good-bye!" and he flew away. 81 | 82 | All day long he flew, and at night-time he arrived at the city. "Where 83 | shall I put up?" he said; "I hope the town has made preparations." 84 | 85 | Then he saw the statue on the tall column. 86 | 87 | "I will put up there," he cried; "it is a fine position, with plenty of 88 | fresh air." So he alighted just between the feet of the Happy Prince. 89 | 90 | "I have a golden bedroom," he said softly to himself as he looked round, 91 | and he prepared to go to sleep; but just as he was putting his head 92 | under his wing a large drop of water fell on him. "What a curious 93 | thing!" he cried; "there is not a single cloud in the sky, the stars are 94 | quite clear and bright, and yet it is raining. The climate in the north 95 | of Europe is really dreadful. The Reed used to like the rain, but that 96 | was merely her selfishness." 97 | 98 | Then another drop fell. 99 | 100 | "What is the use of a statue if it cannot keep the rain off?" he said; 101 | "I must look for a good chimney-pot," and he determined to fly away. 102 | 103 | But before he had opened his wings, a third drop fell, and he looked up, 104 | and saw-- Ah! what did he see? 105 | 106 | The eyes of the Happy Prince were filled with tears, and tears were 107 | running down his golden cheeks. His face was so beautiful in the 108 | moonlight that the little Swallow was filled with pity. 109 | 110 | "Who are you?" he said. 111 | 112 | "I am the Happy Prince." 113 | 114 | "Why are you weeping then?" asked the Swallow; "you have quite 115 | drenched me." 116 | 117 | [Illustration: THE PALACE OF SANS-SOUCI] 118 | 119 | "When I was alive and had a human heart," answered the statue, "I did 120 | not know what tears were, for I lived in the Palace of Sans-Souci, where 121 | sorrow is not allowed to enter. In the daytime I played with my 122 | companions in the garden, and in the evening I led the dance in the 123 | Great Hall. Round the garden ran a very lofty wall, but I never cared 124 | to ask what lay beyond it, everything about me was so beautiful. 125 | My courtiers called me the Happy Prince, and happy indeed I was, if 126 | pleasure be happiness. So I lived, and so I died. And now that I am dead 127 | they have set me up here so high that I can see all the ugliness and all 128 | the misery of my city, and though my heart is made of lead yet I cannot 129 | choose but weep." 130 | 131 | "What! is he not solid gold?" said the Swallow to himself. He was too 132 | polite to make any personal remarks out loud. 133 | 134 | "Far away," continued the statue in a low musical voice, "far away in a 135 | little street there is a poor house. One of the windows is open, and 136 | through it I can see a woman seated at a table. Her face is thin and 137 | worn, and she has coarse, red hands, all pricked by the needle, for she 138 | is a seamstress. She is embroidering passion-flowers on a satin gown for 139 | the loveliest of the Queen's maids-of-honour to wear at the next 140 | Court-ball. In a bed in the corner of the room her little boy is lying 141 | ill. He has a fever, and is asking for oranges. His mother has nothing 142 | to give him but river water, so he is crying. Swallow, Swallow, little 143 | Swallow, will you not bring her the ruby out of my sword-hilt? My feet 144 | are fastened to this pedestal and I cannot move." 145 | 146 | "I am waited for in Egypt," said the Swallow. "My friends are flying up 147 | and down the Nile, and talking to the large lotus-flowers. Soon they 148 | will go to sleep in the tomb of the great King. The King is there 149 | himself in his painted coffin. He is wrapped in yellow linen, and 150 | embalmed with spices. Round his neck is a chain of pale green jade, 151 | and his hands are like withered leaves." 152 | 153 | "Swallow, Swallow, little Swallow," said the Prince, "will you not stay 154 | with me for one night, and be my messenger? The boy is so thirsty, and 155 | the mother so sad." 156 | 157 | "I don't think I like boys," answered the Swallow. "Last summer, when I 158 | was staying on the river, there were two rude boys, the miller's sons, 159 | who were always throwing stones at me. They never hit me, of course; 160 | we swallows fly far too well for that, and besides, I come of a family 161 | famous for its agility; but still, it was a mark of disrespect." 162 | 163 | But the Happy Prince looked so sad that the little Swallow was sorry. 164 | "It is very cold here," he said; "but I will stay with you for one 165 | night, and be your messenger." 166 | 167 | "Thank you, little Swallow," said the Prince. 168 | 169 | So the Swallow picked out the great ruby from the Prince's sword, 170 | and flew away with it in his beak over the roofs of the town. 171 | 172 | He passed by the cathedral tower, where the white marble angels were 173 | sculptured. He passed by the palace and heard the sound of dancing. 174 | A beautiful girl came out on the balcony with her lover. "How wonderful 175 | the stars are," he said to her, "and how wonderful is the power of 176 | love!" 177 | 178 | "I hope my dress will be ready in time for the State-ball," she 179 | answered; "I have ordered passion-flowers to be embroidered on it; 180 | but the seamstresses are so lazy." 181 | 182 | He passed over the river, and saw the lanterns hanging to the masts of 183 | the ships. He passed over the Ghetto, and saw the old Jews bargaining 184 | with each other, and weighing out money in copper scales. At last he 185 | came to the poor house and looked in. The boy was tossing feverishly on 186 | his bed, and the mother had fallen asleep, she was so tired. In he 187 | hopped, and laid the great ruby on the table beside the woman's thimble. 188 | Then he flew gently round the bed, fanning the boy's forehead with his 189 | wings. "How cool I feel!" said the boy, "I must be getting better;" 190 | and he sank into a delicious slumber. 191 | 192 | Then the Swallow flew back to the Happy Prince, and told him what he had 193 | done. "It is curious," he remarked, "but I feel quite warm now, although 194 | it is so cold." 195 | 196 | "That is because you have done a good action," said the Prince. And the 197 | little Swallow began to think, and then he fell asleep. Thinking always 198 | made him sleepy. 199 | 200 | When day broke he flew down to the river and had a bath. "What a 201 | remarkable phenomenon," said the Professor of Ornithology as he was 202 | passing over the bridge. "A swallow in winter!" And he wrote a long 203 | letter about it to the local newspaper. Every one quoted it, it was full 204 | of so many words that they could not understand. 205 | 206 | "To-night I go to Egypt," said the Swallow, and he was in high spirits 207 | at the prospect. He visited all the public monuments, and sat a long 208 | time on top of the church steeple. Wherever he went the Sparrows 209 | chirruped, and said to each other, "What a distinguished stranger!" 210 | so he enjoyed himself very much. 211 | 212 | When the moon rose he flew back to the Happy Prince. "Have you any 213 | commissions for Egypt?" he cried; "I am just starting." 214 | 215 | "Swallow, Swallow, little Swallow," said the Prince, "will you not stay 216 | with me one night longer?" 217 | 218 | [Illustration: THE LOVELIEST OF THE QUEEN'S MAIDS OF HONOUR] 219 | 220 | "I am waited for in Egypt," answered the Swallow. "To-morrow my friends 221 | will fly up to the Second Cataract. The river-horse couches there among 222 | the bulrushes, and on a great granite throne sits the God Memnon. All 223 | night long he watches the stars, and when the morning star shines he 224 | utters one cry of joy, and then he is silent. At noon the yellow lions 225 | come down to the water's edge to drink. They have eyes like green 226 | beryls, and their roar is louder than the roar of the cataract." 227 | 228 | "Swallow, Swallow, little Swallow," said the Prince, "far away across 229 | the city I see a young man in a garret. He is leaning over a desk 230 | covered with papers, and in a tumbler by his side there is a bunch of 231 | withered violets. His hair is brown and crisp, and his lips are red as a 232 | pomegranate, and he has large and dreamy eyes. He is trying to finish a 233 | play for the Director of the Theatre, but he is too cold to write any 234 | more. There is no fire in the grate, and hunger has made him faint." 235 | 236 | "I will wait with you one night longer," said the Swallow, who really 237 | had a good heart. "Shall I take him another ruby?" 238 | 239 | "Alas! I have no ruby now," said the Prince; "my eyes are all that I 240 | have left. They are made of rare sapphires, which were brought out of 241 | India a thousand years ago. Pluck out one of them and take it to him. He 242 | will sell it to the jeweller, and buy food and firewood, and finish his 243 | play." 244 | 245 | "Dear Prince," said the Swallow, "I cannot do that"; and he began to 246 | weep. 247 | 248 | "Swallow, Swallow, little Swallow," said the Prince, "do as I command 249 | you." 250 | 251 | So the Swallow plucked out the Prince's eye, and flew away to the 252 | student's garret. It was easy enough to get in, as there was a hole in 253 | the roof. Through this he darted, and came into the room. The young man 254 | had his head buried in his hands, so he did not hear the flutter of the 255 | bird's wings, and when he looked up he found the beautiful sapphire 256 | lying on the withered violets. 257 | 258 | "I am beginning to be appreciated," he cried; "this is from some great 259 | admirer. Now I can finish my play," and he looked quite happy. 260 | 261 | The next day the Swallow flew down to the harbour. He sat on the mast of 262 | a large vessel and watched the sailors hauling big chests out of the 263 | hold with ropes. "Heave a-hoy!" they shouted as each chest came up. 264 | "I am going to Egypt!" cried the Swallow, but nobody minded, and when 265 | the moon rose he flew back to the Happy Prince. 266 | 267 | "I am come to bid you good-bye," he cried. 268 | 269 | "Swallow, Swallow, little Swallow," said the Prince, "will you not stay 270 | with me one night longer?" 271 | 272 | "It is winter," answered the Swallow, "and the chill snow will soon be 273 | here. In Egypt the sun is warm on the green palm-trees, and the 274 | crocodiles lie in the mud and look lazily about them. My companions are 275 | building a nest in the Temple of Baalbec, and the pink and white doves 276 | are watching them, and cooing to each other. Dear Prince, I must leave 277 | you, but I will never forget you, and next spring I will bring you back 278 | two beautiful jewels in place of those you have given away. The ruby 279 | shall be redder than a red rose, and the sapphire shall be as blue as 280 | the great sea." 281 | 282 | "In the square below," said the Happy Prince, "there stands a little 283 | match-girl. She has let her matches fall in the gutter, and they are all 284 | spoiled. Her father will beat her if she does not bring home some money, 285 | and she is crying. She has no shoes or stockings, and her little head is 286 | bare. Pluck out my other eye and give it to her, and her father will not 287 | beat her." 288 | 289 | "I will stay with you one night longer," said the Swallow, "but I cannot 290 | pluck out your eye. You would be quite blind then." 291 | 292 | "Swallow, Swallow, little Swallow," said the Prince, "do as I command 293 | you." 294 | 295 | So he plucked out the Prince's other eye, and darted down with it. He 296 | swooped past the match-girl, and slipped the jewel into the palm of her 297 | hand. "What a lovely bit of glass!" cried the little girl; and she ran 298 | home, laughing. 299 | 300 | Then the Swallow came back to the Prince. "You are blind now," he said, 301 | "so I will stay with you always." 302 | 303 | "No, little Swallow," said the poor Prince, "you must go away to Egypt." 304 | 305 | "I will stay with you always," said the Swallow, and he slept at the 306 | Prince's feet. 307 | 308 | All the next day he sat on the Prince's shoulder, and told him stories 309 | of what he had seen in strange lands. He told him of the red ibises, 310 | who stand in long rows on the banks of the Nile, and catch gold-fish in 311 | their beaks; of the Sphinx, who is as old as the world itself, and lives 312 | in the desert, and knows everything; of the merchants, who walk slowly 313 | by the side of their camels and carry amber beads in their hands; of the 314 | King of the Mountains of the Moon, who is as black as ebony, and 315 | worships a large crystal; of the great green snake that sleeps in a palm 316 | tree, and has twenty priests to feed it with honey-cakes; and of the 317 | pygmies who sail over a big lake on large flat leaves, and are always at 318 | war with the butterflies. 319 | 320 | "Dear little Swallow," said the Prince, "you tell me of marvellous 321 | things, but more marvellous than anything is the suffering of men and of 322 | women. There is no Mystery so great as Misery. Fly over my city, little 323 | Swallow, and tell me what you see there." 324 | 325 | [Illustration: THE RICH MAKING MERRY IN THEIR BEAUTIFUL HOUSES, 326 | WHILE THE BEGGARS WERE SITTING AT THE GATES] 327 | 328 | So the Swallow flew over the great city, and saw the rich making merry 329 | in their beautiful houses, while the beggars were sitting at the gates. 330 | He flew into dark lanes, and saw the white faces of starving children 331 | looking out listlessly at the black streets. Under the archway of a 332 | bridge two little boys were lying in one another's arms to try and keep 333 | themselves warm. "How hungry we are!" they said. "You must not lie 334 | here," shouted the Watchman, and they wandered out into the rain. 335 | 336 | Then he flew back and told the Prince what he had seen. 337 | 338 | "I am covered with fine gold," said the Prince, "you must take it off, 339 | leaf by leaf, and give it to my poor; the living always think that gold 340 | can make them happy." 341 | 342 | Leaf after leaf of the fine gold the Swallow picked off, till the Happy 343 | Prince looked quite dull and grey. Leaf after leaf of the fine gold he 344 | brought to the poor, and the children's faces grew rosier, and they 345 | laughed and played games in the street. "We have bread now!" they cried. 346 | 347 | Then the snow came, and after the snow came the frost. The streets 348 | looked as if they were made of silver, they were so bright and 349 | glistening; long icicles like crystal daggers hung down from the eaves 350 | of the houses, everybody went about in furs, and the little boys wore 351 | scarlet caps and skated on the ice. 352 | 353 | The poor little Swallow grew colder and colder, but he would not leave 354 | the Prince, he loved him too well. He picked up crumbs outside the 355 | baker's door when the baker was not looking, and tried to keep himself 356 | warm by flapping his wings. 357 | 358 | But at last he knew that he was going to die. He had just strength to 359 | fly up to the Prince's shoulder once more. "Good-bye, dear Prince!" 360 | he murmured, "will you let me kiss your hand?" 361 | 362 | "I am glad that you are going to Egypt at last, little Swallow," said 363 | the Prince, "you have stayed too long here; but you must kiss me on the 364 | lips, for I love you." 365 | 366 | "It is not to Egypt that I am going," said the Swallow. "I am going to 367 | the House of Death. Death is the brother of Sleep, is he not?" 368 | 369 | And he kissed the Happy Prince on the lips, and fell down dead at his 370 | feet. 371 | 372 | At that moment a curious crack sounded inside the statue, as if 373 | something had broken. The fact is that the leaden heart had snapped 374 | right in two. It certainly was a dreadfully hard frost. 375 | 376 | Early the next morning the Mayor was walking in the square below in 377 | company with the Town Councillors. As they passed the column he looked 378 | up at the statue: "Dear me! how shabby the Happy Prince looks!" he said. 379 | 380 | "How shabby, indeed!" cried the Town Councillors, who always agreed with 381 | the Mayor; and they went up to look at it. 382 | 383 | "The ruby has fallen out of his sword, his eyes are gone, and he is 384 | golden no longer," said the Mayor; "in fact, he is little better than a 385 | beggar!" 386 | 387 | "Little better than a beggar," said the Town Councillors. 388 | 389 | "And here is actually a dead bird at his feet!" continued the Mayor. "We 390 | must really issue a proclamation that birds are not to be allowed to die 391 | here." And the Town Clerk made a note of the suggestion. 392 | 393 | So they pulled down the statue of the Happy Prince. "As he is no longer 394 | beautiful he is no longer useful," said the Art Professor at the 395 | University. 396 | 397 | Then they melted the statue in a furnace, and the Mayor held a meeting 398 | of the Corporation to decide what was to be done with the metal. "We 399 | must have another statue, of course," he said, "and it shall be a statue 400 | of myself." 401 | 402 | "Of myself," said each of the Town Councillors, and they quarrelled. 403 | When I last heard of them they were quarrelling still. 404 | 405 | "What a strange thing!" said the overseer of the workmen at the foundry. 406 | "This broken lead heart will not melt in the furnace. We must throw it 407 | away." So they threw it on a dust-heap where the dead Swallow was also 408 | lying. 409 | 410 | "Bring me the two most precious things in the city," said God to one of 411 | His Angels; and the Angel brought Him the leaden heart and the dead 412 | bird. 413 | 414 | "You have rightly chosen," said God, "for in my garden of Paradise this 415 | little bird shall sing for evermore, and in my city of gold the Happy 416 | Prince shall praise me." 417 | 418 | -------------------------------------------------------------------------------- /lzw/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | A stream friendly, simple compression library, built around 4 | iterators. See L{compress} and L{decompress} for the easiest way to 5 | get started. 6 | 7 | After the TIFF implementation of LZW, as described at 8 | U{http://www.fileformat.info/format/tiff/corion-lzw.htm} 9 | 10 | 11 | In an even-nuttier-shell, lzw compresses input bytes with integer 12 | codes. Starting with codes 0-255 that code to themselves, and two 13 | control codes, we work our way through a stream of bytes. When we 14 | encounter a pair of codes c1,c2 we add another entry to our code table 15 | with the lowest available code and the value value(c1) + value(c2)[0] 16 | 17 | Of course, there are details :) 18 | 19 | The Details 20 | =========== 21 | 22 | Our control codes are 23 | 24 | - CLEAR_CODE (codepoint 256). When this code is encountered, we flush 25 | the codebook and start over. 26 | - END_OF_INFO_CODE (codepoint 257). This code is reserved for 27 | encoder/decoders over the integer codepoint stream (like the 28 | mechanical bit that unpacks bits into codepoints) 29 | 30 | When dealing with bytes, codes are emitted as variable 31 | length bit strings packed into the stream of bytes. 32 | 33 | codepoints are written with varying length 34 | - initially 9 bits 35 | - at 512 entries 10 bits 36 | - at 1025 entries at 11 bits 37 | - at 2048 entries 12 bits 38 | - with max of 4095 entries in a table (including Clear and EOI) 39 | 40 | code points are stored with their MSB in the most significant bit 41 | available in the output character. 42 | 43 | >>> import lzw 44 | >>> 45 | >>> mybytes = lzw.readbytes("README.txt") 46 | >>> lessbytes = lzw.compress(mybytes) 47 | >>> newbytes = b"".join(lzw.decompress(lessbytes)) 48 | >>> oldbytes = b"".join(lzw.readbytes("README.txt")) 49 | >>> oldbytes == newbytes 50 | True 51 | 52 | 53 | """ 54 | 55 | 56 | import struct 57 | import itertools 58 | import six 59 | 60 | CLEAR_CODE = 256 61 | END_OF_INFO_CODE = 257 62 | 63 | DEFAULT_MIN_BITS = 9 64 | DEFAULT_MAX_BITS = 12 65 | 66 | 67 | 68 | 69 | def compress(plaintext_bytes): 70 | """ 71 | Given an iterable of bytes, returns a (hopefully shorter) iterable 72 | of bytes that you can store in a file or pass over the network or 73 | what-have-you, and later use to get back your original bytes with 74 | L{decompress}. This is the best place to start using this module. 75 | """ 76 | encoder = ByteEncoder() 77 | return encoder.encodetobytes(plaintext_bytes) 78 | 79 | 80 | def decompress(compressed_bytes): 81 | """ 82 | Given an iterable of bytes that were the result of a call to 83 | L{compress}, returns an iterator over the uncompressed bytes. 84 | """ 85 | decoder = ByteDecoder() 86 | return decoder.decodefrombytes(compressed_bytes) 87 | 88 | 89 | 90 | 91 | 92 | class ByteEncoder(object): 93 | """ 94 | Takes a stream of uncompressed bytes and produces a stream of 95 | compressed bytes, usable by L{ByteDecoder}. Combines an L{Encoder} 96 | with a L{BitPacker}. 97 | 98 | 99 | >>> import lzw 100 | >>> 101 | >>> enc = lzw.ByteEncoder(12) 102 | >>> bigstr = b"gabba gabba yo gabba gabba gabba yo gabba gabba gabba yo gabba gabba gabba yo" 103 | >>> encoding = enc.encodetobytes(bigstr) 104 | >>> encoded = b"".join( b for b in encoding ) 105 | >>> encoded == b'3\\x98LF#\\x08\\x82\\x05\\x04\\x83\\x1eM\\xf0x\\x1c\\x16\\x1b\\t\\x88C\\xe1q(4"\\x1f\\x17\\x85C#1X\\xec.\\x00' 106 | True 107 | >>> 108 | >>> dec = lzw.ByteDecoder() 109 | >>> decoding = dec.decodefrombytes(encoded) 110 | >>> decoded = b"".join(decoding) 111 | >>> decoded == bigstr 112 | True 113 | 114 | """ 115 | 116 | def __init__(self, max_width=DEFAULT_MAX_BITS): 117 | """ 118 | max_width is the maximum width in bits we want to see in the 119 | output stream of codepoints. 120 | """ 121 | self._encoder = Encoder(max_code_size=2**max_width) 122 | self._packer = BitPacker(initial_code_size=self._encoder.code_size()) 123 | 124 | 125 | def encodetobytes(self, bytesource): 126 | """ 127 | Returns an iterator of bytes, adjusting our packed width 128 | between minwidth and maxwidth when it detects an overflow is 129 | about to occur. Dual of L{ByteDecoder.decodefrombytes}. 130 | """ 131 | codepoints = self._encoder.encode(bytesource) 132 | codebytes = self._packer.pack(codepoints) 133 | 134 | return codebytes 135 | 136 | 137 | class ByteDecoder(object): 138 | """ 139 | Decodes, combines bit-unpacking and interpreting a codepoint 140 | stream, suitable for use with bytes generated by 141 | L{ByteEncoder}. 142 | 143 | See L{ByteDecoder} for a usage example. 144 | """ 145 | def __init__(self): 146 | """ 147 | """ 148 | 149 | self._decoder = Decoder() 150 | self._unpacker = BitUnpacker(initial_code_size=self._decoder.code_size()) 151 | self.remaining = [] 152 | 153 | def decodefrombytes(self, bytesource): 154 | """ 155 | Given an iterator over BitPacked, Encoded bytes, Returns an 156 | iterator over the uncompressed bytes. Dual of 157 | L{ByteEncoder.encodetobytes}. See L{ByteEncoder} for an 158 | example of use. 159 | """ 160 | codepoints = self._unpacker.unpack(bytesource) 161 | clearbytes = self._decoder.decode(codepoints) 162 | 163 | return clearbytes 164 | 165 | 166 | class BitPacker(object): 167 | """ 168 | Translates a stream of lzw codepoints into a variable width packed 169 | stream of bytes, for use by L{BitUnpacker}. One of a (potential) 170 | set of encoders for a stream of LZW codepoints, intended to behave 171 | as closely to the TIFF variable-width encoding as possible. 172 | 173 | The inbound stream of integer lzw codepoints are packed into 174 | variable width bit fields, starting at the smallest number of bits 175 | it can and then increasing the bit width as it anticipates the LZW 176 | code size growing to overflow. 177 | 178 | This class knows all kinds of intimate things about how it's 179 | upstream codepoint processors work; it knows the control codes 180 | CLEAR_CODE and END_OF_INFO_CODE, and (more intimately still), it 181 | makes assumptions about the rate of growth of it's consumer's 182 | codebook. This is ok, as long as the underlying encoder/decoders 183 | don't know any intimate details about their BitPackers/Unpackers 184 | """ 185 | 186 | def __init__(self, initial_code_size): 187 | """ 188 | Takes an initial code book size (that is, the count of known 189 | codes at the beginning of encoding, or after a clear) 190 | """ 191 | self._initial_code_size = initial_code_size 192 | 193 | 194 | def pack(self, codepoints): 195 | """ 196 | Given an iterator of integer codepoints, returns an iterator 197 | over bytes containing the codepoints packed into varying 198 | lengths, with bit width growing to accomodate an input code 199 | that it assumes will grow by one entry per codepoint seen. 200 | 201 | Widths will be reset to the given initial_code_size when the 202 | LZW CLEAR_CODE or END_OF_INFO_CODE code appears in the input, 203 | and bytes following END_OF_INFO_CODE will be aligned to the 204 | next byte boundary. 205 | 206 | >>> import lzw, six 207 | >>> pkr = lzw.BitPacker(258) 208 | >>> [ b for b in pkr.pack([ 1, 257]) ] == [ six.int2byte(0), six.int2byte(0xC0), six.int2byte(0x40) ] 209 | True 210 | """ 211 | tailbits = [] 212 | codesize = self._initial_code_size 213 | 214 | minwidth = 8 215 | while (1 << minwidth) < codesize: 216 | minwidth = minwidth + 1 217 | 218 | nextwidth = minwidth 219 | 220 | for pt in codepoints: 221 | 222 | newbits = inttobits(pt, nextwidth) 223 | tailbits = tailbits + newbits 224 | 225 | # PAY ATTENTION. This calculation should be driven by the 226 | # size of the upstream codebook, right now we're just trusting 227 | # that everybody intends to follow the TIFF spec. 228 | codesize = codesize + 1 229 | 230 | if pt == END_OF_INFO_CODE: 231 | while len(tailbits) % 8: 232 | tailbits.append(0) 233 | 234 | if pt in [ CLEAR_CODE, END_OF_INFO_CODE ]: 235 | nextwidth = minwidth 236 | codesize = self._initial_code_size 237 | elif codesize >= (2 ** nextwidth): 238 | nextwidth = nextwidth + 1 239 | 240 | while len(tailbits) > 8: 241 | nextbits = tailbits[:8] 242 | nextbytes = bitstobytes(nextbits) 243 | for bt in nextbytes: 244 | yield struct.pack("B", bt) 245 | 246 | tailbits = tailbits[8:] 247 | 248 | 249 | if tailbits: 250 | tail = bitstobytes(tailbits) 251 | for bt in tail: 252 | yield struct.pack("B", bt) 253 | 254 | 255 | 256 | 257 | class BitUnpacker(object): 258 | """ 259 | An adaptive-width bit unpacker, intended to decode streams written 260 | by L{BitPacker} into integer codepoints. Like L{BitPacker}, knows 261 | about code size changes and control codes. 262 | """ 263 | 264 | def __init__(self, initial_code_size): 265 | """ 266 | initial_code_size is the starting size of the codebook 267 | associated with the to-be-unpacked stream. 268 | """ 269 | self._initial_code_size = initial_code_size 270 | 271 | 272 | def unpack(self, bytesource): 273 | """ 274 | Given an iterator of bytes, returns an iterator of integer 275 | code points. Auto-magically adjusts point width when it sees 276 | an almost-overflow in the input stream, or an LZW CLEAR_CODE 277 | or END_OF_INFO_CODE 278 | 279 | Trailing bits at the end of the given iterator, after the last 280 | codepoint, will be dropped on the floor. 281 | 282 | At the end of the iteration, or when an END_OF_INFO_CODE seen 283 | the unpacker will ignore the bits after the code until it 284 | reaches the next aligned byte. END_OF_INFO_CODE will *not* 285 | stop the generator, just reset the alignment and the width 286 | 287 | 288 | >>> import lzw, six 289 | >>> unpk = lzw.BitUnpacker(initial_code_size=258) 290 | >>> [ i for i in unpk.unpack([ six.int2byte(0), six.int2byte(0xC0), six.int2byte(0x40) ]) ] 291 | [1, 257] 292 | """ 293 | bits = [] 294 | offset = 0 295 | ignore = 0 296 | 297 | codesize = self._initial_code_size 298 | minwidth = 8 299 | while (1 << minwidth) < codesize: 300 | minwidth = minwidth + 1 301 | 302 | pointwidth = minwidth 303 | 304 | for nextbit in bytestobits(bytesource): 305 | 306 | offset = (offset + 1) % 8 307 | if ignore > 0: 308 | ignore = ignore - 1 309 | continue 310 | 311 | bits.append(nextbit) 312 | 313 | if len(bits) == pointwidth: 314 | codepoint = intfrombits(bits) 315 | bits = [] 316 | 317 | yield codepoint 318 | 319 | codesize = codesize + 1 320 | 321 | if codepoint in [ CLEAR_CODE, END_OF_INFO_CODE ]: 322 | codesize = self._initial_code_size 323 | pointwidth = minwidth 324 | else: 325 | # is this too late? 326 | while codesize >= (2 ** pointwidth): 327 | pointwidth = pointwidth + 1 328 | 329 | if codepoint == END_OF_INFO_CODE: 330 | ignore = (8 - offset) % 8 331 | 332 | 333 | 334 | class Decoder(object): 335 | """ 336 | Uncompresses a stream of lzw code points, as created by 337 | L{Encoder}. Given a list of integer code points, with all 338 | unpacking foolishness complete, turns that list of codepoints into 339 | a list of uncompressed bytes. See L{BitUnpacker} for what this 340 | doesn't do. 341 | """ 342 | def __init__(self): 343 | """ 344 | Creates a new Decoder. Decoders should not be reused for 345 | different streams. 346 | """ 347 | self._clear_codes() 348 | self.remainder = [] 349 | 350 | 351 | def code_size(self): 352 | """ 353 | Returns the current size of the Decoder's code book, that is, 354 | it's mapping of codepoints to byte strings. The return value of 355 | this method will change as the decode encounters more encoded 356 | input, or control codes. 357 | """ 358 | return len(self._codepoints) 359 | 360 | 361 | def decode(self, codepoints): 362 | """ 363 | Given an iterable of integer codepoints, yields the 364 | corresponding bytes, one at a time, as byte strings of length 365 | E{1}. Retains the state of the codebook from call to call, so 366 | if you have another stream, you'll likely need another 367 | decoder! 368 | 369 | Decoders will NOT handle END_OF_INFO_CODE (rather, they will 370 | handle the code by throwing an exception); END_OF_INFO should 371 | be handled by the upstream codepoint generator (see 372 | L{BitUnpacker}, for example) 373 | 374 | >>> import lzw 375 | >>> dec = lzw.Decoder() 376 | >>> result = b''.join(dec.decode([103, 97, 98, 98, 97, 32, 258, 260, 262, 121, 111, 263, 259, 261, 256])) 377 | >>> result == b'gabba gabba yo gabba' 378 | True 379 | 380 | """ 381 | codepoints = [ cp for cp in codepoints ] 382 | 383 | for cp in codepoints: 384 | decoded = self._decode_codepoint(cp) 385 | for character in six.iterbytes(decoded): 386 | # TODO optimize, casting back to bytes when bytes above 387 | yield six.int2byte(character) 388 | 389 | 390 | 391 | def _decode_codepoint(self, codepoint): 392 | """ 393 | Will raise a ValueError if given an END_OF_INFORMATION 394 | code. EOI codes should be handled by callers if they're 395 | present in our source stream. 396 | 397 | >>> import lzw 398 | >>> dec = lzw.Decoder() 399 | >>> beforesize = dec.code_size() 400 | >>> dec._decode_codepoint(0x80) == b'\\x80' 401 | True 402 | >>> dec._decode_codepoint(0x81) == b'\\x81' 403 | True 404 | >>> beforesize + 1 == dec.code_size() 405 | True 406 | >>> dec._decode_codepoint(256) == b'' 407 | True 408 | >>> beforesize == dec.code_size() 409 | True 410 | """ 411 | 412 | ret = b"" 413 | 414 | if codepoint == CLEAR_CODE: 415 | self._clear_codes() 416 | elif codepoint == END_OF_INFO_CODE: 417 | raise ValueError("End of information code not supported directly by this Decoder") 418 | else: 419 | if codepoint in self._codepoints: 420 | ret = self._codepoints[ codepoint ] 421 | if None != self._prefix: 422 | self._codepoints[ len(self._codepoints) ] = self._prefix + six.int2byte(six.indexbytes(ret, 0)) 423 | 424 | else: 425 | ret = self._prefix + six.int2byte(six.indexbytes(self._prefix, 0)) 426 | self._codepoints[ len(self._codepoints) ] = ret 427 | 428 | self._prefix = ret 429 | 430 | return ret 431 | 432 | 433 | def _clear_codes(self): 434 | self._codepoints = dict( (pt, struct.pack("B", pt)) for pt in range(256) ) 435 | self._codepoints[CLEAR_CODE] = CLEAR_CODE 436 | self._codepoints[END_OF_INFO_CODE] = END_OF_INFO_CODE 437 | self._prefix = None 438 | 439 | 440 | class Encoder(object): 441 | """ 442 | Given an iterator of bytes, returns an iterator of integer 443 | codepoints, suitable for use by L{Decoder}. The core of the 444 | "compression" side of lzw compression/decompression. 445 | """ 446 | def __init__(self, max_code_size=(2**DEFAULT_MAX_BITS)): 447 | """ 448 | When the encoding codebook grows larger than max_code_size, 449 | the Encoder will clear its codebook and emit a CLEAR_CODE 450 | """ 451 | 452 | self.closed = False 453 | 454 | self._max_code_size = max_code_size 455 | self._buffer = b'' 456 | self._clear_codes() 457 | 458 | if max_code_size < self.code_size(): 459 | raise ValueError("Max code size too small, (must be at least {0})".format(self.code_size())) 460 | 461 | 462 | def code_size(self): 463 | """ 464 | Returns a count of the known codes, including codes that are 465 | implicit in the data but have not yet been produced by the 466 | iterator. 467 | """ 468 | return len(self._prefixes) 469 | 470 | 471 | def flush(self): 472 | """ 473 | Yields any buffered codepoints, followed by a CLEAR_CODE, and 474 | clears the codebook as a side effect. 475 | """ 476 | 477 | flushed = [] 478 | 479 | if self._buffer: 480 | yield self._prefixes[ self._buffer ] 481 | self._buffer = b'' 482 | 483 | yield CLEAR_CODE 484 | self._clear_codes() 485 | 486 | 487 | 488 | 489 | def encode(self, bytesource): 490 | """ 491 | Given an iterator over bytes, yields the 492 | corresponding stream of codepoints. 493 | Will clear the codes at the end of the stream. 494 | 495 | >>> import lzw 496 | >>> enc = lzw.Encoder() 497 | >>> [ cp for cp in enc.encode(b"gabba gabba yo gabba") ] 498 | [103, 97, 98, 98, 97, 32, 258, 260, 262, 121, 111, 263, 259, 261, 256] 499 | 500 | """ 501 | for b in bytesource: 502 | for point in self._encode_byte(b): 503 | yield point 504 | 505 | if self.code_size() >= self._max_code_size: 506 | for pt in self.flush(): 507 | yield pt 508 | 509 | for point in self.flush(): 510 | yield point 511 | 512 | 513 | def _encode_byte(self, point): 514 | # Yields one or zero bytes, AND changes the internal state of 515 | # the codebook and prefix buffer. 516 | # 517 | # Unless you're in self.encode(), you almost certainly don't 518 | # want to call this. 519 | 520 | # In python3 iterating over the bytestring will return in codepoints, 521 | # we use the byte([]) constructor to conver this back into bytestring 522 | # so we can add to new_prefix and key the _prefixes by the bytestring. 523 | byte = point if isinstance(point, six.binary_type) else six.int2byte(point) 524 | 525 | new_prefix = self._buffer 526 | 527 | if new_prefix + byte in self._prefixes: 528 | new_prefix = new_prefix + byte 529 | elif new_prefix: 530 | encoded = self._prefixes[ new_prefix ] 531 | self._add_code(new_prefix + byte) 532 | new_prefix = byte 533 | 534 | yield encoded 535 | 536 | self._buffer = new_prefix 537 | 538 | 539 | 540 | 541 | def _clear_codes(self): 542 | 543 | # Teensy hack, CLEAR_CODE and END_OF_INFO_CODE aren't 544 | # equal to any possible string. 545 | 546 | self._prefixes = dict( (struct.pack("B", codept), codept) for codept in range(256) ) 547 | self._prefixes[ CLEAR_CODE ] = CLEAR_CODE 548 | self._prefixes[ END_OF_INFO_CODE ] = END_OF_INFO_CODE 549 | 550 | 551 | def _add_code(self, newstring): 552 | self._prefixes[ newstring ] = len(self._prefixes) 553 | 554 | 555 | 556 | class PagingEncoder(object): 557 | """ 558 | UNTESTED. Handles encoding of multiple chunks or streams of encodable data, 559 | separated with control codes. Dual of PagingDecoder. 560 | """ 561 | def __init__(self, initial_code_size, max_code_size): 562 | self._initial_code_size = initial_code_size 563 | self._max_code_size = max_code_size 564 | 565 | 566 | def encodepages(self, pages): 567 | """ 568 | Given an iterator of iterators of bytes, produces a single 569 | iterator containing a delimited sequence of independantly 570 | compressed LZW sequences, all beginning on a byte-aligned 571 | spot, all beginning with a CLEAR code and all terminated with 572 | an END_OF_INFORMATION code (and zero to seven trailing junk 573 | bits.) 574 | 575 | The dual of PagingDecoder.decodepages 576 | 577 | >>> import lzw 578 | >>> enc = lzw.PagingEncoder(257, 2**12) 579 | >>> coded = enc.encodepages([ b"say hammer yo hammer mc hammer go hammer", 580 | ... b"and the rest can go and play", 581 | ... b"can't touch this" ]) 582 | ... 583 | >>> result = b"".join(coded) 584 | >>> result == b'\\x80\\x1c\\xcc\\'\\x91\\x01\\xa0\\xc2m6\\x99NB\\x03\\xc9\\xbe\\x0b\\x07\\x84\\xc2\\xcd\\xa68|"\\x14 3\\xc3\\xa0\\xd1c\\x94\\x02\\x02\\x80\\x18M\\xc6A\\x01\\xd0\\xd0e\\x10\\x1c\\x8c\\xa73\\xa0\\x80\\xc7\\x02\\x10\\x19\\xcd\\xe2\\x08\\x14\\x10\\xe0l0\\x9e`\\x10\\x10\\x80\\x18\\xcc&\\xe19\\xd0@t7\\x9dLf\\x889\\xa0\\xd2s\\x80@@' 585 | True 586 | 587 | """ 588 | 589 | for page in pages: 590 | 591 | encoder = Encoder(max_code_size=self._max_code_size) 592 | codepoints = encoder.encode(page) 593 | codes_and_eoi = itertools.chain([ CLEAR_CODE ], codepoints, [ END_OF_INFO_CODE ]) 594 | 595 | packer = BitPacker(initial_code_size=encoder.code_size()) 596 | packed = packer.pack(codes_and_eoi) 597 | 598 | for byte in packed: 599 | yield byte 600 | 601 | 602 | 603 | 604 | class PagingDecoder(object): 605 | """ 606 | UNTESTED. Dual of PagingEncoder, knows how to handle independantly encoded, 607 | END_OF_INFO_CODE delimited chunks of an inbound byte stream 608 | """ 609 | 610 | def __init__(self, initial_code_size): 611 | self._initial_code_size = initial_code_size 612 | self._remains = [] 613 | 614 | def next_page(self, codepoints): 615 | """ 616 | Iterator over the next page of codepoints. 617 | """ 618 | self._remains = [] 619 | 620 | try: 621 | while 1: 622 | cp = six.next(codepoints) 623 | if cp != END_OF_INFO_CODE: 624 | yield cp 625 | else: 626 | self._remains = codepoints 627 | break 628 | 629 | except StopIteration: 630 | pass 631 | 632 | 633 | def decodepages(self, bytesource): 634 | """ 635 | Takes an iterator of bytes, returns an iterator of iterators 636 | of uncompressed data. Expects input to conform to the output 637 | conventions of PagingEncoder(), in particular that "pages" are 638 | separated with an END_OF_INFO_CODE and padding up to the next 639 | byte boundary. 640 | 641 | BUG: Dangling trailing page on decompression. 642 | 643 | >>> import lzw 644 | >>> pgdec = lzw.PagingDecoder(initial_code_size=257) 645 | >>> pgdecoded = pgdec.decodepages( 646 | ... b''.join([b'\\x80\\x1c\\xcc\\'\\x91\\x01\\xa0\\xc2m6', 647 | ... b'\\x99NB\\x03\\xc9\\xbe\\x0b\\x07\\x84\\xc2', 648 | ... b'\\xcd\\xa68|"\\x14 3\\xc3\\xa0\\xd1c\\x94', 649 | ... b'\\x02\\x02\\x80\\x18M\\xc6A\\x01\\xd0\\xd0e', 650 | ... b'\\x10\\x1c\\x8c\\xa73\\xa0\\x80\\xc7\\x02\\x10', 651 | ... b'\\x19\\xcd\\xe2\\x08\\x14\\x10\\xe0l0\\x9e`\\x10', 652 | ... b'\\x10\\x80\\x18\\xcc&\\xe19\\xd0@t7\\x9dLf\\x889', 653 | ... b'\\xa0\\xd2s\\x80@@']) 654 | ... ) 655 | >>> result = [ b"".join(pg) for pg in pgdecoded ] 656 | >>> result == [b'say hammer yo hammer mc hammer go hammer', b'and the rest can go and play', b"can't touch this", b''] 657 | True 658 | 659 | """ 660 | 661 | # TODO: WE NEED A CODE SIZE POLICY OBJECT THAT ISN'T THIS. 662 | # honestly, we should have a "codebook" object we need to pass 663 | # to bit packing/unpacking tools, etc, such that we don't have 664 | # to roll all of these code size assumptions everyplace. 665 | 666 | unpacker = BitUnpacker(initial_code_size=self._initial_code_size) 667 | codepoints = unpacker.unpack(bytesource) 668 | 669 | self._remains = codepoints 670 | while self._remains: 671 | nextpoints = self.next_page(self._remains) 672 | nextpoints = [ nx for nx in nextpoints ] 673 | 674 | decoder = Decoder() 675 | decoded = decoder.decode(nextpoints) 676 | decoded = [ dec for dec in decoded ] 677 | 678 | yield decoded 679 | 680 | 681 | 682 | ######################################### 683 | # Conveniences. 684 | 685 | 686 | def unpackbyte(b): 687 | """ 688 | Given a one-byte long byte string, returns an integer. Equivalent 689 | to struct.unpack("B", b) 690 | """ 691 | if isinstance(b, bytes): 692 | return six.byte2int(b) 693 | return b 694 | 695 | 696 | def filebytes(fileobj, buffersize=1024): 697 | """ 698 | Convenience for iterating over the bytes in a file. Given a 699 | file-like object (with a read(int) method), returns an iterator 700 | over the bytes of that file. 701 | """ 702 | buff = fileobj.read(buffersize) 703 | while buff: 704 | for byte in buff: yield byte 705 | buff = fileobj.read(buffersize) 706 | 707 | 708 | def readbytes(filename, buffersize=1024): 709 | """ 710 | Opens a file named by filename and iterates over the L{filebytes} 711 | found therein. Will close the file when the bytes run out. 712 | """ 713 | with open(filename, "rb") as infile: 714 | for byte in six.iterbytes(filebytes(infile, buffersize)): 715 | yield six.int2byte(byte) # TODO optimize, we are re-casting to bytes 716 | 717 | 718 | 719 | def writebytes(filename, bytesource): 720 | """ 721 | Convenience for emitting the bytes we generate to a file. Given a 722 | filename, opens and truncates the file, dumps the bytes 723 | from bytesource into it, and closes it 724 | """ 725 | 726 | with open(filename, "wb") as outfile: 727 | for bt in bytesource: 728 | outfile.write(bt) 729 | 730 | 731 | def inttobits(anint, width=None): 732 | """ 733 | Produces an array of booleans representing the given argument as 734 | an unsigned integer, MSB first. If width is given, will pad the 735 | MSBs to the given width (but will NOT truncate overflowing 736 | results) 737 | 738 | >>> import lzw 739 | >>> lzw.inttobits(304, width=16) 740 | [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0] 741 | 742 | """ 743 | remains = anint 744 | retreverse = [] 745 | while remains: 746 | retreverse.append(remains & 1) 747 | remains = remains >> 1 748 | 749 | retreverse.reverse() 750 | 751 | ret = retreverse 752 | if None != width: 753 | ret_head = [ 0 ] * (width - len(ret)) 754 | ret = ret_head + ret 755 | 756 | return ret 757 | 758 | 759 | def intfrombits(bits): 760 | """ 761 | Given a list of boolean values, interprets them as a binary 762 | encoded, MSB-first unsigned integer (with True == 1 and False 763 | == 0) and returns the result. 764 | 765 | >>> import lzw 766 | >>> lzw.intfrombits([ 1, 0, 0, 1, 1, 0, 0, 0, 0 ]) 767 | 304 768 | """ 769 | ret = 0 770 | lsb_first = [ b for b in bits ] 771 | lsb_first.reverse() 772 | 773 | for bit_index in range(len(lsb_first)): 774 | if lsb_first[ bit_index ]: 775 | ret = ret | (1 << bit_index) 776 | 777 | return ret 778 | 779 | 780 | def bytestobits(bytesource): 781 | """ 782 | Breaks a given iterable of bytes into an iterable of boolean 783 | values representing those bytes as unsigned integers. 784 | 785 | >>> import lzw 786 | >>> [ x for x in lzw.bytestobits(b"\\x01\\x30") ] 787 | [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0] 788 | """ 789 | for b in bytesource: 790 | 791 | value = unpackbyte(b) 792 | 793 | for bitplusone in range(8, 0, -1): 794 | bitindex = bitplusone - 1 795 | nextbit = 1 & (value >> bitindex) 796 | yield nextbit 797 | 798 | 799 | def bitstobytes(bits): 800 | """ 801 | Interprets an indexable list of booleans as bits, MSB first, to be 802 | packed into a list of integers from 0 to 256, MSB first, with LSBs 803 | zero-padded. Note this padding behavior means that round-trips of 804 | bytestobits(bitstobytes(x, width=W)) may not yield what you expect 805 | them to if W % 8 != 0 806 | 807 | Does *NOT* pack the returned values into a bytearray or the like. 808 | 809 | >>> import lzw 810 | >>> bitstobytes([0, 0, 0, 0, 0, 0, 0, 0, "Yes, I'm True"]) == [ 0x00, 0x80 ] 811 | True 812 | >>> bitstobytes([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0]) == [ 0x01, 0x30 ] 813 | True 814 | """ 815 | ret = [] 816 | nextbyte = 0 817 | nextbit = 7 818 | for bit in bits: 819 | if bit: 820 | nextbyte = nextbyte | (1 << nextbit) 821 | 822 | if nextbit: 823 | nextbit = nextbit - 1 824 | else: 825 | ret.append(nextbyte) 826 | nextbit = 7 827 | nextbyte = 0 828 | 829 | if nextbit < 7: ret.append(nextbyte) 830 | return ret 831 | 832 | --------------------------------------------------------------------------------