├── .gitignore ├── LICENSE ├── README.md ├── lualzw.lua └── profiling.lua /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Lua sources 2 | luac.out 3 | 4 | # luarocks build files 5 | *.src.rock 6 | *.zip 7 | *.tar.gz 8 | 9 | # Object files 10 | *.o 11 | *.os 12 | *.ko 13 | *.obj 14 | *.elf 15 | 16 | # Precompiled Headers 17 | *.gch 18 | *.pch 19 | 20 | # Libraries 21 | *.lib 22 | *.a 23 | *.la 24 | *.lo 25 | *.def 26 | *.exp 27 | 28 | # Shared objects (inc. Windows DLLs) 29 | *.dll 30 | *.so 31 | *.so.* 32 | *.dylib 33 | 34 | # Executables 35 | *.exe 36 | *.out 37 | *.app 38 | *.i*86 39 | *.x86_64 40 | *.hex 41 | 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lualzw 2 | A relatively fast LZW compression algorithm in pure lua 3 | 4 | # encoding and decoding 5 | Lossless compression for any text. The more repetition in the text, the better. 6 | 7 | 16 bit encoding is used. So each 8 bit character is encoded as 16 bit. 8 | This means that the dictionary size is 65280. 9 | 10 | Any special characters like `äöå` that are represented with multiple characters are supported. The special characters are split up into single characters that are then encoded and decoded. 11 | 12 | While compressing, the algorithm checks if the result size gets over the input. If it does, then the input is not compressed and the algorithm returns the input prematurely as the compressed result. 13 | 14 | The `zeros` branch contains a version that does not add additional null `\0` characters to the input when encoding. Any existing null characters in input string are preserved as nulls however so make sure your input does not contain nulls. 15 | 16 | # usage 17 | ```lua 18 | local lualzw = require("lualzw") 19 | 20 | local input = "foofoofoofoofoofoofoofoofoo" 21 | local compressed = assert(lualzw.compress(input)) 22 | local decompressed = assert(lualzw.decompress(compressed)) 23 | assert(input == decompressed) 24 | ``` 25 | 26 | # errors 27 | Returns nil and an error message when the algorithm fails to compress or decompress. 28 | 29 | # speed 30 | Times are in seconds. 31 | Both have the same generated input. 32 | The values are an average of 10 tries. 33 | 34 | Note that compressing random generated inputs results usually in bigger result than original. In these cases the algorithms do not compress and return input instead and thus compression result is 100% of input. 35 | 36 | lualzw is at an advantage in cases where compression cannot be done as it stops prematurely and LibCompress does not. 37 | Also lualzw is at an advantage in cases where compression can be done as it has a larger dictionary in use. 38 | 39 | Input: 1000000 random generated bytes converted into string 40 | 41 | algorithm|compress|decompress|result % of input 42 | ---------|--------|----------|------------- 43 | lualzw|0.6622|0.0003|100 44 | LibCompress|2.1983|0.0024|100 45 | 46 | Input: 1000000 random generated bytes in ASCII range converted into string 47 | 48 | algorithm|compress|decompress|result % of input 49 | ---------|--------|----------|------------- 50 | lualzw|0.812|0.0022|100 51 | LibCompress|1.782|0.0007|100 52 | 53 | Input: 1000000 random generated repeating bytes converted into string 54 | 55 | algorithm|compress|decompress|result % of input 56 | ---------|--------|----------|------------- 57 | lualzw|0.3975|0.0262|4.5001 58 | LibCompress|0.3907|0.0264|6.6997 59 | 60 | Input: 1000000 of same character 61 | 62 | algorithm|compress|decompress|result % of input 63 | ---------|--------|----------|------------- 64 | lualzw|0.7045|0.0026|0.2829 65 | LibCompress|0.6418|0.0038|0.4241 66 | 67 | Input: "ymn32h8hm8ekrwjkrn9f" repeated 50000 times. In total 1000000 bytes 68 | 69 | algorithm|compress|decompress|result % of input 70 | ---------|--------|----------|------------- 71 | lualzw|0.4788|0.0088|1.2629 72 | LibCompress|0.4426|0.0093|1.8905 73 | -------------------------------------------------------------------------------- /lualzw.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | MIT License 3 | 4 | Copyright (c) 2016 Rochet2 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | ]] 24 | 25 | local char = string.char 26 | local type = type 27 | local select = select 28 | local sub = string.sub 29 | local tconcat = table.concat 30 | 31 | local basedictcompress = {} 32 | local basedictdecompress = {} 33 | for i = 0, 255 do 34 | local ic, iic = char(i), char(i, 0) 35 | basedictcompress[ic] = iic 36 | basedictdecompress[iic] = ic 37 | end 38 | 39 | local function dictAddA(str, dict, a, b) 40 | if a >= 256 then 41 | a, b = 0, b+1 42 | if b >= 256 then 43 | dict = {} 44 | b = 1 45 | end 46 | end 47 | dict[str] = char(a,b) 48 | a = a+1 49 | return dict, a, b 50 | end 51 | 52 | local function compress(input) 53 | if type(input) ~= "string" then 54 | return nil, "string expected, got "..type(input) 55 | end 56 | local len = #input 57 | if len <= 1 then 58 | return "u"..input 59 | end 60 | 61 | local dict = {} 62 | local a, b = 0, 1 63 | 64 | local result = {"c"} 65 | local resultlen = 1 66 | local n = 2 67 | local word = "" 68 | for i = 1, len do 69 | local c = sub(input, i, i) 70 | local wc = word..c 71 | if not (basedictcompress[wc] or dict[wc]) then 72 | local write = basedictcompress[word] or dict[word] 73 | if not write then 74 | return nil, "algorithm error, could not fetch word" 75 | end 76 | result[n] = write 77 | resultlen = resultlen + #write 78 | n = n+1 79 | if len <= resultlen then 80 | return "u"..input 81 | end 82 | dict, a, b = dictAddA(wc, dict, a, b) 83 | word = c 84 | else 85 | word = wc 86 | end 87 | end 88 | result[n] = basedictcompress[word] or dict[word] 89 | resultlen = resultlen+#result[n] 90 | n = n+1 91 | if len <= resultlen then 92 | return "u"..input 93 | end 94 | return tconcat(result) 95 | end 96 | 97 | local function dictAddB(str, dict, a, b) 98 | if a >= 256 then 99 | a, b = 0, b+1 100 | if b >= 256 then 101 | dict = {} 102 | b = 1 103 | end 104 | end 105 | dict[char(a,b)] = str 106 | a = a+1 107 | return dict, a, b 108 | end 109 | 110 | local function decompress(input) 111 | if type(input) ~= "string" then 112 | return nil, "string expected, got "..type(input) 113 | end 114 | 115 | if #input < 1 then 116 | return nil, "invalid input - not a compressed string" 117 | end 118 | 119 | local control = sub(input, 1, 1) 120 | if control == "u" then 121 | return sub(input, 2) 122 | elseif control ~= "c" then 123 | return nil, "invalid input - not a compressed string" 124 | end 125 | input = sub(input, 2) 126 | local len = #input 127 | 128 | if len < 2 then 129 | return nil, "invalid input - not a compressed string" 130 | end 131 | 132 | local dict = {} 133 | local a, b = 0, 1 134 | 135 | local result = {} 136 | local n = 1 137 | local last = sub(input, 1, 2) 138 | result[n] = basedictdecompress[last] or dict[last] 139 | n = n+1 140 | for i = 3, len, 2 do 141 | local code = sub(input, i, i+1) 142 | local lastStr = basedictdecompress[last] or dict[last] 143 | if not lastStr then 144 | return nil, "could not find last from dict. Invalid input?" 145 | end 146 | local toAdd = basedictdecompress[code] or dict[code] 147 | if toAdd then 148 | result[n] = toAdd 149 | n = n+1 150 | dict, a, b = dictAddB(lastStr..sub(toAdd, 1, 1), dict, a, b) 151 | else 152 | local tmp = lastStr..sub(lastStr, 1, 1) 153 | result[n] = tmp 154 | n = n+1 155 | dict, a, b = dictAddB(tmp, dict, a, b) 156 | end 157 | last = code 158 | end 159 | return tconcat(result) 160 | end 161 | 162 | return { 163 | compress = compress, 164 | decompress = decompress, 165 | } 166 | -------------------------------------------------------------------------------- /profiling.lua: -------------------------------------------------------------------------------- 1 | -- Contains some of the profiling code 2 | 3 | local lualzw = require("lualzw") 4 | local LibCompress = require("LibCompress") 5 | local char = string.char 6 | 7 | local function profile(input, comp, decomp) 8 | local compressT = 0 9 | local decompressT = 0 10 | local timesT = 10 11 | local x, dec 12 | local t1,t2,t3 13 | for i = 1, timesT do 14 | t1 = os.clock() 15 | compressed = comp(input) 16 | t2 = os.clock() 17 | decompressed = decomp(compressed) 18 | t3 = os.clock() 19 | compressT = compressT + t2-t1 20 | decompressT = decompressT + t3-t2 21 | end 22 | print(#input, #compressed, #decompressed, input == decompressed) 23 | print(compressT/timesT, decompressT/timesT, #compressed/#input*100) 24 | end 25 | 26 | math.randomseed(1) 27 | local input1 = {} 28 | local input2 = {} 29 | local input3 = {} 30 | local input4 = {} 31 | for i = 1, 1000000 do 32 | input1[i] = char(math.random(0, 255)) 33 | input2[i] = char(math.random(0, 127)) 34 | input3[i] = char(i%256) 35 | input4[i] = char(100) 36 | end 37 | input1 = table.concat(input1) 38 | input2 = table.concat(input2) 39 | input3 = table.concat(input3) 40 | input4 = table.concat(input4) 41 | 42 | profile(input1, LibCompress.CompressLZW, LibCompress.DecompressLZW) 43 | profile(input1, lualzw.compress, lualzw.decompress) 44 | 45 | profile(input2, LibCompress.CompressLZW, LibCompress.DecompressLZW) 46 | profile(input2, lualzw.compress, lualzw.decompress) 47 | 48 | profile(input3, LibCompress.CompressLZW, LibCompress.DecompressLZW) 49 | profile(input3, lualzw.compress, lualzw.decompress) 50 | 51 | profile(input4, LibCompress.CompressLZW, LibCompress.DecompressLZW) 52 | profile(input4, lualzw.compress, lualzw.decompress) 53 | 54 | profile(("ymn32h8hm8ekrwjkrn9f"):rep(50000), LibCompress.CompressLZW, LibCompress.DecompressLZW) 55 | profile(("ymn32h8hm8ekrwjkrn9f"):rep(50000), lualzw.compress, lualzw.decompress) 56 | --------------------------------------------------------------------------------