├── LICENSE.txt ├── README.md ├── lzstring.py ├── setup.py └── test.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | lz-string-python 2 | ================ 3 | 4 | LZ-based compression algorithm for Python 3 5 | 6 | Based on the LZ-String javascript library (version 1.4.4) 7 | 8 | http://pieroxy.net/blog/pages/lz-string/index.html 9 | 10 | 11 | Example 12 | ------- 13 | 14 | ```python 15 | >>> from lzstring import LZString 16 | >>> string = "This is my compression test" 17 | >>> compressed = LZString.compressToBase64(string) 18 | >>> LZString.decompressFromBase64(compressed) 19 | ``` 20 | -------------------------------------------------------------------------------- /lzstring.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright © 2017 Marcel Dancak 3 | This work is free. You can redistribute it and/or modify it under the 4 | terms of the Do What The Fuck You Want To Public License, Version 2, 5 | as published by Sam Hocevar. See the COPYING file for more details. 6 | """ 7 | 8 | import math 9 | 10 | 11 | keyStrBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" 12 | keyStrUriSafe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$" 13 | baseReverseDic = {}; 14 | 15 | class Object(object): 16 | def __init__(self, **kwargs): 17 | for k, v in kwargs.items(): 18 | setattr(self, k, v) 19 | 20 | 21 | def getBaseValue(alphabet, character): 22 | if alphabet not in baseReverseDic: 23 | baseReverseDic[alphabet] = {} 24 | for index, i in enumerate(alphabet): 25 | baseReverseDic[alphabet][i] = index 26 | return baseReverseDic[alphabet][character] 27 | 28 | 29 | def _compress(uncompressed, bitsPerChar, getCharFromInt): 30 | if (uncompressed is None): 31 | return "" 32 | 33 | context_dictionary = {} 34 | context_dictionaryToCreate= {} 35 | context_c = "" 36 | context_wc = "" 37 | context_w = "" 38 | context_enlargeIn = 2 # Compensate for the first entry which should not count 39 | context_dictSize = 3 40 | context_numBits = 2 41 | context_data = [] 42 | context_data_val = 0 43 | context_data_position = 0 44 | 45 | for ii in range(len(uncompressed)): 46 | context_c = uncompressed[ii] 47 | if context_c not in context_dictionary: 48 | context_dictionary[context_c] = context_dictSize 49 | context_dictSize += 1 50 | context_dictionaryToCreate[context_c] = True 51 | 52 | context_wc = context_w + context_c 53 | if context_wc in context_dictionary: 54 | context_w = context_wc 55 | else: 56 | if context_w in context_dictionaryToCreate: 57 | if ord(context_w[0]) < 256: 58 | for i in range(context_numBits): 59 | context_data_val = (context_data_val << 1) 60 | if context_data_position == bitsPerChar-1: 61 | context_data_position = 0 62 | context_data.append(getCharFromInt(context_data_val)) 63 | context_data_val = 0 64 | else: 65 | context_data_position += 1 66 | value = ord(context_w[0]) 67 | for i in range(8): 68 | context_data_val = (context_data_val << 1) | (value & 1) 69 | if context_data_position == bitsPerChar - 1: 70 | context_data_position = 0 71 | context_data.append(getCharFromInt(context_data_val)) 72 | context_data_val = 0 73 | else: 74 | context_data_position += 1 75 | value = value >> 1 76 | 77 | else: 78 | value = 1 79 | for i in range(context_numBits): 80 | context_data_val = (context_data_val << 1) | value 81 | if context_data_position == bitsPerChar - 1: 82 | context_data_position = 0 83 | context_data.append(getCharFromInt(context_data_val)) 84 | context_data_val = 0 85 | else: 86 | context_data_position += 1 87 | value = 0 88 | value = ord(context_w[0]) 89 | for i in range(16): 90 | context_data_val = (context_data_val << 1) | (value & 1) 91 | if context_data_position == bitsPerChar - 1: 92 | context_data_position = 0 93 | context_data.append(getCharFromInt(context_data_val)) 94 | context_data_val = 0 95 | else: 96 | context_data_position += 1 97 | value = value >> 1 98 | context_enlargeIn -= 1 99 | if context_enlargeIn == 0: 100 | context_enlargeIn = math.pow(2, context_numBits) 101 | context_numBits += 1 102 | del context_dictionaryToCreate[context_w] 103 | else: 104 | value = context_dictionary[context_w] 105 | for i in range(context_numBits): 106 | context_data_val = (context_data_val << 1) | (value & 1) 107 | if context_data_position == bitsPerChar - 1: 108 | context_data_position = 0 109 | context_data.append(getCharFromInt(context_data_val)) 110 | context_data_val = 0 111 | else: 112 | context_data_position += 1 113 | value = value >> 1 114 | 115 | context_enlargeIn -= 1 116 | if context_enlargeIn == 0: 117 | context_enlargeIn = math.pow(2, context_numBits) 118 | context_numBits += 1 119 | 120 | # Add wc to the dictionary. 121 | context_dictionary[context_wc] = context_dictSize 122 | context_dictSize += 1 123 | context_w = str(context_c) 124 | 125 | # Output the code for w. 126 | if context_w != "": 127 | if context_w in context_dictionaryToCreate: 128 | if ord(context_w[0]) < 256: 129 | for i in range(context_numBits): 130 | context_data_val = (context_data_val << 1) 131 | if context_data_position == bitsPerChar-1: 132 | context_data_position = 0 133 | context_data.append(getCharFromInt(context_data_val)) 134 | context_data_val = 0 135 | else: 136 | context_data_position += 1 137 | value = ord(context_w[0]) 138 | for i in range(8): 139 | context_data_val = (context_data_val << 1) | (value & 1) 140 | if context_data_position == bitsPerChar - 1: 141 | context_data_position = 0 142 | context_data.append(getCharFromInt(context_data_val)) 143 | context_data_val = 0 144 | else: 145 | context_data_position += 1 146 | value = value >> 1 147 | else: 148 | value = 1 149 | for i in range(context_numBits): 150 | context_data_val = (context_data_val << 1) | value 151 | if context_data_position == bitsPerChar - 1: 152 | context_data_position = 0 153 | context_data.append(getCharFromInt(context_data_val)) 154 | context_data_val = 0 155 | else: 156 | context_data_position += 1 157 | value = 0 158 | value = ord(context_w[0]) 159 | for i in range(16): 160 | context_data_val = (context_data_val << 1) | (value & 1) 161 | if context_data_position == bitsPerChar - 1: 162 | context_data_position = 0 163 | context_data.append(getCharFromInt(context_data_val)) 164 | context_data_val = 0 165 | else: 166 | context_data_position += 1 167 | value = value >> 1 168 | context_enlargeIn -= 1 169 | if context_enlargeIn == 0: 170 | context_enlargeIn = math.pow(2, context_numBits) 171 | context_numBits += 1 172 | del context_dictionaryToCreate[context_w] 173 | else: 174 | value = context_dictionary[context_w] 175 | for i in range(context_numBits): 176 | context_data_val = (context_data_val << 1) | (value & 1) 177 | if context_data_position == bitsPerChar - 1: 178 | context_data_position = 0 179 | context_data.append(getCharFromInt(context_data_val)) 180 | context_data_val = 0 181 | else: 182 | context_data_position += 1 183 | value = value >> 1 184 | 185 | context_enlargeIn -= 1 186 | if context_enlargeIn == 0: 187 | context_enlargeIn = math.pow(2, context_numBits) 188 | context_numBits += 1 189 | 190 | # Mark the end of the stream 191 | value = 2 192 | for i in range(context_numBits): 193 | context_data_val = (context_data_val << 1) | (value & 1) 194 | if context_data_position == bitsPerChar - 1: 195 | context_data_position = 0 196 | context_data.append(getCharFromInt(context_data_val)) 197 | context_data_val = 0 198 | else: 199 | context_data_position += 1 200 | value = value >> 1 201 | 202 | # Flush the last char 203 | while True: 204 | context_data_val = (context_data_val << 1) 205 | if context_data_position == bitsPerChar - 1: 206 | context_data.append(getCharFromInt(context_data_val)) 207 | break 208 | else: 209 | context_data_position += 1 210 | 211 | return "".join(context_data) 212 | 213 | 214 | def _decompress(length, resetValue, getNextValue): 215 | dictionary = {} 216 | enlargeIn = 4 217 | dictSize = 4 218 | numBits = 3 219 | entry = "" 220 | result = [] 221 | 222 | data = Object( 223 | val=getNextValue(0), 224 | position=resetValue, 225 | index=1 226 | ) 227 | 228 | for i in range(3): 229 | dictionary[i] = i 230 | 231 | bits = 0 232 | maxpower = math.pow(2, 2) 233 | power = 1 234 | 235 | while power != maxpower: 236 | resb = data.val & data.position 237 | data.position >>= 1 238 | if data.position == 0: 239 | data.position = resetValue 240 | data.val = getNextValue(data.index) 241 | data.index += 1 242 | 243 | bits |= power if resb > 0 else 0 244 | power <<= 1; 245 | 246 | next = bits 247 | if next == 0: 248 | bits = 0 249 | maxpower = math.pow(2, 8) 250 | power = 1 251 | while power != maxpower: 252 | resb = data.val & data.position 253 | data.position >>= 1 254 | if data.position == 0: 255 | data.position = resetValue 256 | data.val = getNextValue(data.index) 257 | data.index += 1 258 | bits |= power if resb > 0 else 0 259 | power <<= 1 260 | c = chr(bits) 261 | elif next == 1: 262 | bits = 0 263 | maxpower = math.pow(2, 16) 264 | power = 1 265 | while power != maxpower: 266 | resb = data.val & data.position 267 | data.position >>= 1 268 | if data.position == 0: 269 | data.position = resetValue; 270 | data.val = getNextValue(data.index) 271 | data.index += 1 272 | bits |= power if resb > 0 else 0 273 | power <<= 1 274 | c = chr(bits) 275 | elif next == 2: 276 | return "" 277 | 278 | # print(bits) 279 | dictionary[3] = c 280 | w = c 281 | result.append(c) 282 | counter = 0 283 | while True: 284 | counter += 1 285 | if data.index > length: 286 | return "" 287 | 288 | bits = 0 289 | maxpower = math.pow(2, numBits) 290 | power = 1 291 | while power != maxpower: 292 | resb = data.val & data.position 293 | data.position >>= 1 294 | if data.position == 0: 295 | data.position = resetValue; 296 | data.val = getNextValue(data.index) 297 | data.index += 1 298 | bits |= power if resb > 0 else 0 299 | power <<= 1 300 | 301 | c = bits 302 | if c == 0: 303 | bits = 0 304 | maxpower = math.pow(2, 8) 305 | power = 1 306 | while power != maxpower: 307 | resb = data.val & data.position 308 | data.position >>= 1 309 | if data.position == 0: 310 | data.position = resetValue 311 | data.val = getNextValue(data.index) 312 | data.index += 1 313 | bits |= power if resb > 0 else 0 314 | power <<= 1 315 | 316 | dictionary[dictSize] = chr(bits) 317 | dictSize += 1 318 | c = dictSize - 1 319 | enlargeIn -= 1 320 | elif c == 1: 321 | bits = 0 322 | maxpower = math.pow(2, 16) 323 | power = 1 324 | while power != maxpower: 325 | resb = data.val & data.position 326 | data.position >>= 1 327 | if data.position == 0: 328 | data.position = resetValue; 329 | data.val = getNextValue(data.index) 330 | data.index += 1 331 | bits |= power if resb > 0 else 0 332 | power <<= 1 333 | dictionary[dictSize] = chr(bits) 334 | dictSize += 1 335 | c = dictSize - 1 336 | enlargeIn -= 1 337 | elif c == 2: 338 | return "".join(result) 339 | 340 | 341 | if enlargeIn == 0: 342 | enlargeIn = math.pow(2, numBits) 343 | numBits += 1 344 | 345 | if c in dictionary: 346 | entry = dictionary[c] 347 | else: 348 | if c == dictSize: 349 | entry = w + w[0] 350 | else: 351 | return None 352 | result.append(entry) 353 | 354 | # Add w+entry[0] to the dictionary. 355 | dictionary[dictSize] = w + entry[0] 356 | dictSize += 1 357 | enlargeIn -= 1 358 | 359 | w = entry 360 | if enlargeIn == 0: 361 | enlargeIn = math.pow(2, numBits) 362 | numBits += 1 363 | 364 | 365 | class LZString(object): 366 | @staticmethod 367 | def compress(uncompressed): 368 | return _compress(uncompressed, 16, chr) 369 | 370 | @staticmethod 371 | def compressToUTF16(uncompressed): 372 | if uncompressed is None: 373 | return "" 374 | return _compress(uncompressed, 15, lambda a: chr(a+32)) + " " 375 | 376 | @staticmethod 377 | def compressToBase64(uncompressed): 378 | if uncompressed is None: 379 | return "" 380 | res = _compress(uncompressed, 6, lambda a: keyStrBase64[a]) 381 | # To produce valid Base64 382 | end = len(res) % 4 383 | print (end) 384 | if end > 0: 385 | res += "="*(4 - end) 386 | return res 387 | 388 | @staticmethod 389 | def compressToEncodedURIComponent(uncompressed): 390 | if uncompressed is None: 391 | return "" 392 | return _compress(uncompressed, 6, lambda a: keyStrUriSafe[a]) 393 | 394 | @staticmethod 395 | def decompress(compressed): 396 | if compressed is None: 397 | return "" 398 | if compressed == "": 399 | return None 400 | return _decompress(len(compressed), 32768, lambda index: ord(compressed[index])) 401 | 402 | @staticmethod 403 | def decompressFromUTF16(compressed): 404 | if compressed is None: 405 | return "" 406 | if compressed == "": 407 | return None 408 | return _decompress(len(compressed), 16384, lambda index: ord(compressed[index]) - 32) 409 | 410 | @staticmethod 411 | def decompressFromBase64(compressed): 412 | if compressed is None: 413 | return "" 414 | if compressed == "": 415 | return None 416 | return _decompress(len(compressed), 32, lambda index: getBaseValue(keyStrBase64, compressed[index])) 417 | 418 | @staticmethod 419 | def decompressFromEncodedURIComponent(compressed): 420 | if compressed is None: 421 | return "" 422 | if compressed == "": 423 | return None 424 | compressed = compressed.replace(" ", "+") 425 | return _decompress(len(compressed), 32, lambda index: getBaseValue(keyStrUriSafe, compressed[index])) 426 | 427 | @staticmethod 428 | def decompressFromUint8Array(compressed): 429 | length_compressed = len(compressed)//2 430 | buf=[] 431 | for i in range(length_compressed): 432 | buf.append(compressed[i*2]*256+compressed[i*2+1]) 433 | result=[] 434 | for i in buf: 435 | result.append(chr(i & 0xffff)) 436 | return decompress(''.join(result)) 437 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | name='lzstring', 6 | version='0.1', 7 | description='lz-string for python', 8 | author='Marcel Dancak', 9 | author_email='dancakm@gmail.com', 10 | py_modules=['lzstring'], 11 | install_requires=[], 12 | url='https://github.com/marcel-dancak/lz-string-python', 13 | download_url='https://github.com/marcel-dancak/lz-string-python', 14 | classifiers=[ 15 | 'Development Status :: 3 - Alpha', 16 | 'Intended Audience :: Developers', 17 | 'Natural Language :: English', 18 | 'License :: OSI Approved :: MIT License', 19 | 'Programming Language :: Python', 20 | 'Programming Language :: Python :: 3' 21 | ] 22 | ) -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright © 2017 Marcel Dancak 3 | This work is free. You can redistribute it and/or modify it under the 4 | terms of the Do What The Fuck You Want To Public License, Version 2, 5 | as published by Sam Hocevar. See the COPYING file for more details. 6 | """ 7 | 8 | import json 9 | import pprint 10 | 11 | from lzstring import LZString 12 | 13 | 14 | if __name__ == '__main__': 15 | 16 | s = 'Žluťoučký kůň úpěl ďábelské ódy!' 17 | 18 | # generated with original js lib 19 | jsLzStringBase64 = 'r6ABsK6KaAD2aLCADWBfgBPQ9oCAlAZAvgDobEARlB4QAEOAjAUxAGd4BL5AZ4BMBPAQiA==' 20 | jsLzStringBase64Json = 'N4Ig5gNg9gzjCGAnAniAXKALgS0xApuiPgB7wC2ADgQASSwIogA0IA4tHACLYBu6WXASIBlFu04wAMthiYBEhgFEAdpiYYQASS6i2AWSniRURJgCCMPYfEcGAFXyJyozPBUATJB5pt8Kp3gIbAAvfB99JABrAFdKGil3MBj4MEJWcwBjRCgVZBc0EBEDIwyAIzLEfH5CrREAeRoADiaAdgBONABGdqaANltJLnwAMwVKJHgicxpyfDcAWnJouJoIJJS05hoYmHCaTCgabPx4THxZlfj1lWTU/BgaGBjMgAsaeEeuKEyAISgoFEAHSDBgifD4cwQGBQdAAbXYNlYAA0bABdAC+rDscHBhEKy0QsUoIAxZLJQA' 21 | 22 | print('String for encode: ' + s) 23 | print() 24 | 25 | print('Compress to base64:') 26 | base2 = LZString.compressToBase64(s) 27 | print('result: ' + base2) 28 | print('result js: ' + jsLzStringBase64) 29 | print('equals: ' + str(base2 == jsLzStringBase64)) 30 | 31 | print() 32 | 33 | print('Decompress from base64:') 34 | print('result: ' + LZString.decompressFromBase64(base2)) 35 | print('result from js: ' + LZString.decompressFromBase64(jsLzStringBase64)) 36 | 37 | print() 38 | 39 | jsonString = '{"glossary":{"title":"example glossary","GlossDiv":{"title":"S","GlossList":{"GlossEntry":{"ID":"SGML","SortAs":"SGML","GlossTerm":"Standard Generalized Markup Language","Acronym":"SGML","Abbrev":"ISO 8879:1986","GlossDef":{"para":"A meta-markup language, used to create markup languages such as DocBook.","GlossSeeAlso":["GML","XML"]},"GlossSee":"markup"}}}}}' 40 | 41 | print('Compress json to base64:') 42 | jresult = LZString.compressToBase64(jsonString) 43 | print('result: ' + jresult) 44 | print() 45 | print('result js: ' + jsLzStringBase64Json) 46 | print() 47 | print('equals: ' + str(jresult == jsLzStringBase64Json)) 48 | 49 | print() 50 | 51 | print('Decompress json from base64:') 52 | pprint.pprint(json.loads(LZString.decompressFromBase64(jsLzStringBase64Json))) 53 | --------------------------------------------------------------------------------