├── CHANGES.txt ├── COPYRIGHT.txt ├── MANIFEST ├── README.md ├── bin └── gunziplua ├── dist.info ├── lmod ├── bin │ └── gunziplua.lua └── compress │ └── deflatelua.lua ├── rockspec.in ├── share └── compress.deflatelua │ └── hello.txt.gz ├── test └── test.lua └── util.mk /CHANGES.txt: -------------------------------------------------------------------------------- 1 | 0.3.20111128 2 | Renamed DEFLATE.deflate to DEFLATE.inflate (breaks API). 3 | Expanded docs. 4 | 5 | 0.1.20110222 6 | Added zlib header support (RFC1950) 7 | 8 | 0.1.2008 9 | Initial version 10 | -------------------------------------------------------------------------------- /COPYRIGHT.txt: -------------------------------------------------------------------------------- 1 | lua-compress-deflatelua License 2 | 3 | =============================================================================== 4 | 5 | Copyright (C) 2008, David Manura. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | =============================================================================== 26 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | COPYRIGHT.txt 2 | MANIFEST 3 | CHANGES.txt 4 | rockspec.in 5 | dist.info 6 | util.mk 7 | bin/gunziplua 8 | lmod/bin/gunziplua.lua 9 | lmod/compress/deflatelua.lua 10 | share/compress.deflatelua/hello.txt.gz 11 | test/test.lua 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Description 2 | ----------- 3 | 4 | `compress.deflatelua` is a library implementing [DEFLATE], as well as 5 | [zlib] and [gzip] decompression in pure [Lua]. 6 | 7 | Example usage 8 | ------------- 9 | 10 | ./gunziplua lua-5.1.4.tar.gz 11 | 12 | Performance 13 | ----------- 14 | 15 | It's somewhat slow (as expected). To decompress `lua-5.1.4.tar.gz`, it 16 | takes about 9 seconds with CRC-checking enabled (4 with LuaJIT 1.x) or 17 | 3 seconds with CRC checking disabled (2 with LuaJIT 1.x). 18 | 19 | Rationale 20 | --------- 21 | 22 | Possible reasons for this module include: 23 | 24 | 1. for pedagogical reasons 25 | 2. to benchmark Lua 26 | 3. to bootstrap applications like LuaRocks/LuaDist 27 | that might not have a precompiled copy of gunzip, and 28 | 4. because [DEFLATE] has been implemented in other languages too, 29 | including Python, Perl, and Lisp. 30 | 31 | Status 32 | ------ 33 | 34 | It should be robust and there are no known bugs. It successfully 35 | decompresses gzip files but there is currently no compressor implementation. 36 | 37 | Dependencies 38 | ------------ 39 | 40 | * [digest.crc32lua] - CRC-32 checksum implemented in pure Lua 41 | * [bit.numberlua] - bitwise operations implemented in pure Lua as numbers 42 | (not required if using a native bitwise operation library) 43 | * [pythonic.optparse] - command-line processing resembling the Python 44 | optparse module 45 | 46 | Conventions 47 | ----------- 48 | 49 | The modules above follow a somewhat Perl CPAN-like naming convention. 50 | Compare to [`Compress::Zlib::Perl`], [`Digest::CRC32`] and the `Bit::` 51 | modules. 52 | 53 | Author 54 | ------ 55 | 56 | David Manura. See COPYRIGHT file. 57 | 58 | 59 | [DEFLATE]: http://tools.ietf.org/html/rfc1951 60 | [zlib]: http://tools.ietf.org/html/rfc1950 61 | [gzip]: http://tools.ietf.org/html/rfc1952 62 | [Lua]: http://www.lua.org/ 63 | [digest.crc32lua]: http://github.com/davidm/lua-digest-crc32lua 64 | [bit.numberlua]: http://github.com/davidm/lua-bit-numberlua 65 | [pythonic.optparse]: http://github.com/davidm/lua-pythonic-optparse 66 | [`Compress::Zlib::Perl`]: http://search.cpan.org/~nwclark/Compress-Zlib-Perl/Perl.pm 67 | [`Digest::CRC32`]: http://search.cpan.org/~fays/Digest-Crc32/Crc32.pm 68 | -------------------------------------------------------------------------------- /bin/gunziplua: -------------------------------------------------------------------------------- 1 | #!/bin/env lua 2 | -- gunziplua command-line utility 3 | package.path = '../lua-digest-crc32lua/lmod/?.lua;' .. package.path 4 | package.path = '../lua-pythonic-optparse/lmod/?.lua;' .. package.path 5 | package.path = '../lua-bit-numberlua/lmod/?.lua;' .. package.path 6 | package.path = 'lmod/?.lua;' .. package.path 7 | require 'bin.gunziplua' (...) 8 | -------------------------------------------------------------------------------- /dist.info: -------------------------------------------------------------------------------- 1 | type = [[all]] 2 | arch = [[Universal]] 3 | short = [['compress.deflatelua' DEFLATE (RFC1951)/gunzip implemented in pure Lua]] 4 | author = [[David Manura]] 5 | full = [[Note: use lzlib instead for higher performance.]] 6 | maintainer = [[David Manura ]] 7 | version = [[$(_VERSION)]] 8 | homepage = [[http://lua-users.org/wiki/ModuleCompressDeflateLua]] 9 | license = [[MIT]] 10 | name = [[compress.deflatelua]] 11 | dependencies = { 12 | ['lua'] = [[>=5.1]], 13 | ['digest.crc32lua'] = [[>=000.003]], 14 | ['pythonic.optparse'] = [[>=0.1]], 15 | ['bit.numberlua'] = [[>=000.003]], 16 | } 17 | 18 | -------------------------------------------------------------------------------- /lmod/bin/gunziplua.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | bin.gunzip 3 | gunzip command partially reimplemented in Lua. 4 | 5 | Note: this does not implement all of the GNU 6 | gunzip[1] command-line options and might have 7 | slightly different behavior. 8 | 9 | This is designed to be called from a shell script: 10 | 11 | #!/bin/env lua 12 | package.path = '?.lua;' .. package.path 13 | require 'dmlib.command_gunzip' (...) 14 | 15 | References 16 | 17 | [1] http://www.gnu.org/software/gzip/ 18 | 19 | (c) 2008-2011 David Manura. Licensed under the same terms as Lua (MIT). 20 | --]] 21 | 22 | local assert = assert 23 | local error = error 24 | local ipairs = ipairs 25 | local require = require 26 | local xpcall = xpcall 27 | local type = type 28 | local io = io 29 | local os = os 30 | local string = string 31 | local debug = require "debug" 32 | local debug_traceback = debug.traceback 33 | local _G = _G 34 | 35 | local DEFLATE = require "compress.deflatelua" 36 | 37 | local OptionParser = require "pythonic.optparse" . OptionParser 38 | 39 | local version = '0.1' 40 | 41 | 42 | local function runtime_assert(val, msg) 43 | if not val then error({msg}, val) end 44 | return val 45 | end 46 | 47 | 48 | local function runtime_error(s, level) 49 | level = level or 1 50 | error({s}, level+1) 51 | end 52 | 53 | 54 | local function file_exists(filename) 55 | local fh = io.open(filename) 56 | if fh then fh:close(); return true end 57 | return false 58 | end 59 | 60 | 61 | -- Run gunzip command, given command-line arguments. 62 | local function call(...) 63 | local opt = OptionParser{usage="%prog [options] [gzip-file...]", 64 | version=string.format("gunzip %s", version), 65 | add_help_option=false} 66 | opt.add_option{"-h", "--help", action="store_true", dest="help", 67 | help="give this help"} 68 | opt.add_option{ 69 | "-c", "--stdout", dest="stdout", action="store_true", 70 | help="write on standard output, keep original files unchanged"} 71 | opt.add_option{ 72 | "-f", "--force", dest="force", action="store_true", 73 | help="force overwrite of output file"} 74 | opt.add_option{ 75 | "--disable-crc", dest="disable_crc", action="store_true", 76 | help="skip CRC check (faster performance)"} 77 | 78 | 79 | local options, args = opt.parse_args() 80 | 81 | local gzipfiles = args 82 | 83 | if options.help then 84 | opt.print_help() 85 | os.exit() 86 | end 87 | 88 | local ok, err = xpcall(function() 89 | local outfile_of = {} 90 | local out_of = {} 91 | 92 | for _,gzipfile in ipairs(gzipfiles) do 93 | local base = gzipfile:match('(.+)%.[gG][zZ]$') 94 | if not base then 95 | runtime_error(gzipfile .. ': unknown suffix') 96 | end 97 | outfile_of[gzipfile] = base 98 | 99 | out_of[gzipfile] = 100 | (options.stdout or not gzipfile) and assert(io.stdout) 101 | or outfile_of[gzipfile] 102 | 103 | if type(out_of[gzipfile]) == 'string' then 104 | if file_exists(out_of[gzipfile]) then 105 | io.stderr:write(out_of[gzipfile] .. 106 | ' already exists; do you wish to overwrite(y or n)? ') 107 | if not io.stdin:read'*l':match'^[yY]' then 108 | runtime_error 'not overwritten' 109 | end 110 | end 111 | end 112 | end 113 | 114 | for _,gzipfile in ipairs(gzipfiles) do 115 | local fh = gzipfile and runtime_assert(io.open(gzipfile, 'rb')) 116 | or assert(io.stdin) 117 | local ofh = type(out_of[gzipfile]) == 'string' and 118 | runtime_assert(io.open(out_of[gzipfile], 'wb')) 119 | or out_of[gzipfile] 120 | 121 | DEFLATE.gunzip {input=fh, output=ofh, 122 | disable_crc=options.disable_crc} 123 | end 124 | 125 | if not options.stdout then 126 | for _,gzipfile in ipairs(gzipfiles) do 127 | assert(os.remove(gzipfile)) 128 | end 129 | end 130 | 131 | end, debug_traceback) 132 | if not ok then 133 | if type(err) == 'table' then err = err[1] end 134 | io.stderr:write('error: ' .. err, '\n') 135 | end 136 | end 137 | 138 | 139 | return call 140 | 141 | --[[ 142 | LICENSE 143 | 144 | Copyright (C) 2008, David Manura. 145 | 146 | Permission is hereby granted, free of charge, to any person obtaining a copy 147 | of this software and associated documentation files (the "Software"), to deal 148 | in the Software without restriction, including without limitation the rights 149 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 150 | copies of the Software, and to permit persons to whom the Software is 151 | furnished to do so, subject to the following conditions: 152 | 153 | The above copyright notice and this permission notice shall be included in 154 | all copies or substantial portions of the Software. 155 | 156 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 157 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 158 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 159 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 160 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 161 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 162 | THE SOFTWARE. 163 | 164 | (end license) 165 | --]] 166 | -------------------------------------------------------------------------------- /lmod/compress/deflatelua.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | LUA MODULE 4 | 5 | compress.deflatelua - deflate (and gunzip/zlib) implemented in Lua. 6 | 7 | SYNOPSIS 8 | 9 | local DEFLATE = require 'compress.deflatelua' 10 | -- uncompress gzip file 11 | local fh = assert(io.open'foo.txt.gz', 'rb') 12 | local ofh = assert(io.open'foo.txt', 'wb') 13 | DEFLATE.gunzip {input=fh, output=ofh} 14 | fh:close(); ofh:close() 15 | -- can also uncompress from string including zlib and raw DEFLATE formats. 16 | 17 | DESCRIPTION 18 | 19 | This is a pure Lua implementation of decompressing the DEFLATE format, 20 | including the related zlib and gzip formats. 21 | 22 | Note: This library only supports decompression. 23 | Compression is not currently implemented. 24 | 25 | API 26 | 27 | Note: in the following functions, input stream `fh` may be 28 | a file handle, string, or an iterator function that returns strings. 29 | Output stream `ofh` may be a file handle or a function that 30 | consumes one byte (number 0..255) per call. 31 | 32 | DEFLATE.inflate {input=fh, output=ofh} 33 | 34 | Decompresses input stream `fh` in the DEFLATE format 35 | while writing to output stream `ofh`. 36 | DEFLATE is detailed in http://tools.ietf.org/html/rfc1951 . 37 | 38 | DEFLATE.gunzip {input=fh, output=ofh, disable_crc=disable_crc} 39 | 40 | Decompresses input stream `fh` with the gzip format 41 | while writing to output stream `ofh`. 42 | `disable_crc` (defaults to `false`) will disable CRC-32 checking 43 | to increase speed. 44 | gzip is detailed in http://tools.ietf.org/html/rfc1952 . 45 | 46 | DEFLATE.inflate_zlib {input=fh, output=ofh, disable_crc=disable_crc} 47 | 48 | Decompresses input stream `fh` with the zlib format 49 | while writing to output stream `ofh`. 50 | `disable_crc` (defaults to `false`) will disable CRC-32 checking 51 | to increase speed. 52 | zlib is detailed in http://tools.ietf.org/html/rfc1950 . 53 | 54 | DEFLATE.adler32(byte, crc) --> rcrc 55 | 56 | Returns adler32 checksum of byte `byte` (number 0..255) appended 57 | to string with adler32 checksum `crc`. This is internally used by 58 | `inflate_zlib`. 59 | ADLER32 in detailed in http://tools.ietf.org/html/rfc1950 . 60 | 61 | COMMAND LINE UTILITY 62 | 63 | A `gunziplua` command line utility (in folder `bin`) is also provided. 64 | This mimicks the *nix `gunzip` utility but is a pure Lua implementation 65 | that invokes this library. For help do 66 | 67 | gunziplua -h 68 | 69 | DEPENDENCIES 70 | 71 | Requires 'digest.crc32lua' (used for optional CRC-32 checksum checks). 72 | https://github.com/davidm/lua-digest-crc32lua 73 | 74 | Will use a bit library ('bit', 'bit32', 'bit.numberlua') if available. This 75 | is not that critical for this library but is required by digest.crc32lua. 76 | 77 | 'pythonic.optparse' is only required by the optional `gunziplua` 78 | command-line utilty for command line parsing. 79 | https://github.com/davidm/lua-pythonic-optparse 80 | 81 | INSTALLATION 82 | 83 | Copy the `compress` directory into your LUA_PATH. 84 | 85 | REFERENCES 86 | 87 | [1] DEFLATE Compressed Data Format Specification version 1.3 88 | http://tools.ietf.org/html/rfc1951 89 | [2] GZIP file format specification version 4.3 90 | http://tools.ietf.org/html/rfc1952 91 | [3] http://en.wikipedia.org/wiki/DEFLATE 92 | [4] pyflate, by Paul Sladen 93 | http://www.paul.sladen.org/projects/pyflate/ 94 | [5] Compress::Zlib::Perl - partial pure Perl implementation of 95 | Compress::Zlib 96 | http://search.cpan.org/~nwclark/Compress-Zlib-Perl/Perl.pm 97 | 98 | LICENSE 99 | 100 | (c) 2008-2011 David Manura. Licensed under the same terms as Lua (MIT). 101 | 102 | Permission is hereby granted, free of charge, to any person obtaining a copy 103 | of this software and associated documentation files (the "Software"), to deal 104 | in the Software without restriction, including without limitation the rights 105 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 106 | copies of the Software, and to permit persons to whom the Software is 107 | furnished to do so, subject to the following conditions: 108 | 109 | The above copyright notice and this permission notice shall be included in 110 | all copies or substantial portions of the Software. 111 | 112 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 113 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 114 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 115 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 116 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 117 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 118 | THE SOFTWARE. 119 | (end license) 120 | --]] 121 | 122 | local M = {_TYPE='module', _NAME='compress.deflatelua', _VERSION='0.3.20111128'} 123 | 124 | local assert = assert 125 | local error = error 126 | local ipairs = ipairs 127 | local pairs = pairs 128 | local print = print 129 | local require = require 130 | local tostring = tostring 131 | local type = type 132 | local setmetatable = setmetatable 133 | local io = io 134 | local math = math 135 | local table_sort = table.sort 136 | local math_max = math.max 137 | local string_char = string.char 138 | 139 | --[[ 140 | Requires the first module listed that exists, else raises like `require`. 141 | If a non-string is encountered, it is returned. 142 | Second return value is module name loaded (or ''). 143 | --]] 144 | local function requireany(...) 145 | local errs = {} 146 | for i = 1, select('#', ...) do local name = select(i, ...) 147 | if type(name) ~= 'string' then return name, '' end 148 | local ok, mod = pcall(require, name) 149 | if ok then return mod, name end 150 | errs[#errs+1] = mod 151 | end 152 | error(table.concat(errs, '\n'), 2) 153 | end 154 | 155 | 156 | local crc32 = require "digest.crc32lua" . crc32_byte 157 | local bit, name_ = requireany('bit', 'bit32', 'bit.numberlua', nil) 158 | 159 | local DEBUG = false 160 | 161 | -- Whether to use `bit` library functions in current module. 162 | -- Unlike the crc32 library, it doesn't make much difference in this module. 163 | local NATIVE_BITOPS = (bit ~= nil) 164 | 165 | local band, lshift, rshift 166 | if NATIVE_BITOPS then 167 | band = bit.band 168 | lshift = bit.lshift 169 | rshift = bit.rshift 170 | end 171 | 172 | 173 | local function warn(s) 174 | io.stderr:write(s, '\n') 175 | end 176 | 177 | 178 | local function debug(...) 179 | print('DEBUG', ...) 180 | end 181 | 182 | 183 | local function runtime_error(s, level) 184 | level = level or 1 185 | error({s}, level+1) 186 | end 187 | 188 | 189 | local function make_outstate(outbs) 190 | local outstate = {} 191 | outstate.outbs = outbs 192 | outstate.window = {} 193 | outstate.window_pos = 1 194 | return outstate 195 | end 196 | 197 | 198 | local function output(outstate, byte) 199 | -- debug('OUTPUT:', s) 200 | local window_pos = outstate.window_pos 201 | outstate.outbs(byte) 202 | outstate.window[window_pos] = byte 203 | outstate.window_pos = window_pos % 32768 + 1 -- 32K 204 | end 205 | 206 | 207 | local function noeof(val) 208 | return assert(val, 'unexpected end of file') 209 | end 210 | 211 | 212 | local function hasbit(bits, bit) 213 | return bits % (bit + bit) >= bit 214 | end 215 | 216 | 217 | local function memoize(f) 218 | local mt = {} 219 | local t = setmetatable({}, mt) 220 | function mt:__index(k) 221 | local v = f(k) 222 | t[k] = v 223 | return v 224 | end 225 | return t 226 | end 227 | 228 | 229 | -- small optimization (lookup table for powers of 2) 230 | local pow2 = memoize(function(n) return 2^n end) 231 | 232 | --local tbits = memoize( 233 | -- function(bits) 234 | -- return memoize( function(bit) return getbit(bits, bit) end ) 235 | -- end ) 236 | 237 | 238 | -- weak metatable marking objects as bitstream type 239 | local is_bitstream = setmetatable({}, {__mode='k'}) 240 | 241 | 242 | -- DEBUG 243 | -- prints LSB first 244 | --[[ 245 | local function bits_tostring(bits, nbits) 246 | local s = '' 247 | local tmp = bits 248 | local function f() 249 | local b = tmp % 2 == 1 and 1 or 0 250 | s = s .. b 251 | tmp = (tmp - b) / 2 252 | end 253 | if nbits then 254 | for i=1,nbits do f() end 255 | else 256 | while tmp ~= 0 do f() end 257 | end 258 | 259 | return s 260 | end 261 | --]] 262 | 263 | local function bytestream_from_file(fh) 264 | local o = {} 265 | function o:read() 266 | local sb = fh:read(1) 267 | if sb then return sb:byte() end 268 | end 269 | return o 270 | end 271 | 272 | 273 | local function bytestream_from_string(s) 274 | local i = 1 275 | local o = {} 276 | function o:read() 277 | local by 278 | if i <= #s then 279 | by = s:byte(i) 280 | i = i + 1 281 | end 282 | return by 283 | end 284 | return o 285 | end 286 | 287 | 288 | local function bytestream_from_function(f) 289 | local i = 0 290 | local buffer = '' 291 | local o = {} 292 | function o:read() 293 | i = i + 1 294 | if i > #buffer then 295 | buffer = f() 296 | if not buffer then return end 297 | i = 1 298 | end 299 | return buffer:byte(i,i) 300 | end 301 | return o 302 | end 303 | 304 | 305 | local function bitstream_from_bytestream(bys) 306 | local buf_byte = 0 307 | local buf_nbit = 0 308 | local o = {} 309 | 310 | function o:nbits_left_in_byte() 311 | return buf_nbit 312 | end 313 | 314 | if NATIVE_BITOPS then 315 | function o:read(nbits) 316 | nbits = nbits or 1 317 | while buf_nbit < nbits do 318 | local byte = bys:read() 319 | if not byte then return end -- note: more calls also return nil 320 | buf_byte = buf_byte + lshift(byte, buf_nbit) 321 | buf_nbit = buf_nbit + 8 322 | end 323 | local bits 324 | if nbits == 0 then 325 | bits = 0 326 | elseif nbits == 32 then 327 | bits = buf_byte 328 | buf_byte = 0 329 | else 330 | bits = band(buf_byte, rshift(0xffffffff, 32 - nbits)) 331 | buf_byte = rshift(buf_byte, nbits) 332 | end 333 | buf_nbit = buf_nbit - nbits 334 | return bits 335 | end 336 | else 337 | function o:read(nbits) 338 | nbits = nbits or 1 339 | while buf_nbit < nbits do 340 | local byte = bys:read() 341 | if not byte then return end -- note: more calls also return nil 342 | buf_byte = buf_byte + pow2[buf_nbit] * byte 343 | buf_nbit = buf_nbit + 8 344 | end 345 | local m = pow2[nbits] 346 | local bits = buf_byte % m 347 | buf_byte = (buf_byte - bits) / m 348 | buf_nbit = buf_nbit - nbits 349 | return bits 350 | end 351 | end 352 | 353 | is_bitstream[o] = true 354 | 355 | return o 356 | end 357 | 358 | 359 | local function get_bitstream(o) 360 | local bs 361 | if is_bitstream[o] then 362 | return o 363 | elseif io.type(o) == 'file' then 364 | bs = bitstream_from_bytestream(bytestream_from_file(o)) 365 | elseif type(o) == 'string' then 366 | bs = bitstream_from_bytestream(bytestream_from_string(o)) 367 | elseif type(o) == 'function' then 368 | bs = bitstream_from_bytestream(bytestream_from_function(o)) 369 | else 370 | runtime_error 'unrecognized type' 371 | end 372 | return bs 373 | end 374 | 375 | 376 | local function get_obytestream(o) 377 | local bs 378 | if io.type(o) == 'file' then 379 | bs = function(sbyte) o:write(string_char(sbyte)) end 380 | elseif type(o) == 'function' then 381 | bs = o 382 | else 383 | runtime_error('unrecognized type: ' .. tostring(o)) 384 | end 385 | return bs 386 | end 387 | 388 | 389 | local function HuffmanTable(init, is_full) 390 | local t = {} 391 | if is_full then 392 | for val,nbits in pairs(init) do 393 | if nbits ~= 0 then 394 | t[#t+1] = {val=val, nbits=nbits} 395 | --debug('*',val,nbits) 396 | end 397 | end 398 | else 399 | for i=1,#init-2,2 do 400 | local firstval, nbits, nextval = init[i], init[i+1], init[i+2] 401 | --debug(val, nextval, nbits) 402 | if nbits ~= 0 then 403 | for val=firstval,nextval-1 do 404 | t[#t+1] = {val=val, nbits=nbits} 405 | end 406 | end 407 | end 408 | end 409 | table_sort(t, function(a,b) 410 | return a.nbits == b.nbits and a.val < b.val or a.nbits < b.nbits 411 | end) 412 | 413 | -- assign codes 414 | local code = 1 -- leading 1 marker 415 | local nbits = 0 416 | for i,s in ipairs(t) do 417 | if s.nbits ~= nbits then 418 | code = code * pow2[s.nbits - nbits] 419 | nbits = s.nbits 420 | end 421 | s.code = code 422 | --debug('huffman code:', i, s.nbits, s.val, code, bits_tostring(code)) 423 | code = code + 1 424 | end 425 | 426 | local minbits = math.huge 427 | local look = {} 428 | for i,s in ipairs(t) do 429 | minbits = math.min(minbits, s.nbits) 430 | look[s.code] = s.val 431 | end 432 | 433 | --for _,o in ipairs(t) do 434 | -- debug(':', o.nbits, o.val) 435 | --end 436 | 437 | -- function t:lookup(bits) return look[bits] end 438 | 439 | local msb = NATIVE_BITOPS and function(bits, nbits) 440 | local res = 0 441 | for i=1,nbits do 442 | res = lshift(res, 1) + band(bits, 1) 443 | bits = rshift(bits, 1) 444 | end 445 | return res 446 | end or function(bits, nbits) 447 | local res = 0 448 | for i=1,nbits do 449 | local b = bits % 2 450 | bits = (bits - b) / 2 451 | res = res * 2 + b 452 | end 453 | return res 454 | end 455 | 456 | local tfirstcode = memoize( 457 | function(bits) return pow2[minbits] + msb(bits, minbits) end) 458 | 459 | function t:read(bs) 460 | local code = 1 -- leading 1 marker 461 | local nbits = 0 462 | while 1 do 463 | if nbits == 0 then -- small optimization (optional) 464 | code = tfirstcode[noeof(bs:read(minbits))] 465 | nbits = nbits + minbits 466 | else 467 | local b = noeof(bs:read()) 468 | nbits = nbits + 1 469 | code = code * 2 + b -- MSB first 470 | --[[NATIVE_BITOPS 471 | code = lshift(code, 1) + b -- MSB first 472 | --]] 473 | end 474 | --debug('code?', code, bits_tostring(code)) 475 | local val = look[code] 476 | if val then 477 | --debug('FOUND', val) 478 | return val 479 | end 480 | end 481 | end 482 | 483 | return t 484 | end 485 | 486 | 487 | local function parse_gzip_header(bs) 488 | -- local FLG_FTEXT = 2^0 489 | local FLG_FHCRC = 2^1 490 | local FLG_FEXTRA = 2^2 491 | local FLG_FNAME = 2^3 492 | local FLG_FCOMMENT = 2^4 493 | 494 | local id1 = bs:read(8) 495 | local id2 = bs:read(8) 496 | if id1 ~= 31 or id2 ~= 139 then 497 | runtime_error 'not in gzip format' 498 | end 499 | local cm = bs:read(8) -- compression method 500 | local flg = bs:read(8) -- FLaGs 501 | local mtime = bs:read(32) -- Modification TIME 502 | local xfl = bs:read(8) -- eXtra FLags 503 | local os = bs:read(8) -- Operating System 504 | 505 | if DEBUG then 506 | debug("CM=", cm) 507 | debug("FLG=", flg) 508 | debug("MTIME=", mtime) 509 | -- debug("MTIME_str=",os.date("%Y-%m-%d %H:%M:%S",mtime)) -- non-portable 510 | debug("XFL=", xfl) 511 | debug("OS=", os) 512 | end 513 | 514 | if not os then runtime_error 'invalid header' end 515 | 516 | if hasbit(flg, FLG_FEXTRA) then 517 | local xlen = bs:read(16) 518 | local extra = 0 519 | for i=1,xlen do 520 | extra = bs:read(8) 521 | end 522 | if not extra then runtime_error 'invalid header' end 523 | end 524 | 525 | local function parse_zstring(bs) 526 | repeat 527 | local by = bs:read(8) 528 | if not by then runtime_error 'invalid header' end 529 | until by == 0 530 | end 531 | 532 | if hasbit(flg, FLG_FNAME) then 533 | parse_zstring(bs) 534 | end 535 | 536 | if hasbit(flg, FLG_FCOMMENT) then 537 | parse_zstring(bs) 538 | end 539 | 540 | if hasbit(flg, FLG_FHCRC) then 541 | local crc16 = bs:read(16) 542 | if not crc16 then runtime_error 'invalid header' end 543 | -- IMPROVE: check CRC. where is an example .gz file that 544 | -- has this set? 545 | if DEBUG then 546 | debug("CRC16=", crc16) 547 | end 548 | end 549 | end 550 | 551 | local function parse_zlib_header(bs) 552 | local cm = bs:read(4) -- Compression Method 553 | local cinfo = bs:read(4) -- Compression info 554 | local fcheck = bs:read(5) -- FLaGs: FCHECK (check bits for CMF and FLG) 555 | local fdict = bs:read(1) -- FLaGs: FDICT (present dictionary) 556 | local flevel = bs:read(2) -- FLaGs: FLEVEL (compression level) 557 | local cmf = cinfo * 16 + cm -- CMF (Compresion Method and flags) 558 | local flg = fcheck + fdict * 32 + flevel * 64 -- FLaGs 559 | 560 | if cm ~= 8 then -- not "deflate" 561 | runtime_error("unrecognized zlib compression method: " + cm) 562 | end 563 | if cinfo > 7 then 564 | runtime_error("invalid zlib window size: cinfo=" + cinfo) 565 | end 566 | local window_size = 2^(cinfo + 8) 567 | 568 | if (cmf*256 + flg) % 31 ~= 0 then 569 | runtime_error("invalid zlib header (bad fcheck sum)") 570 | end 571 | 572 | if fdict == 1 then 573 | runtime_error("FIX:TODO - FDICT not currently implemented") 574 | local dictid_ = bs:read(32) 575 | end 576 | 577 | return window_size 578 | end 579 | 580 | local function parse_huffmantables(bs) 581 | local hlit = bs:read(5) -- # of literal/length codes - 257 582 | local hdist = bs:read(5) -- # of distance codes - 1 583 | local hclen = noeof(bs:read(4)) -- # of code length codes - 4 584 | 585 | local ncodelen_codes = hclen + 4 586 | local codelen_init = {} 587 | local codelen_vals = { 588 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 589 | for i=1,ncodelen_codes do 590 | local nbits = bs:read(3) 591 | local val = codelen_vals[i] 592 | codelen_init[val] = nbits 593 | end 594 | local codelentable = HuffmanTable(codelen_init, true) 595 | 596 | local function decode(ncodes) 597 | local init = {} 598 | local nbits 599 | local val = 0 600 | while val < ncodes do 601 | local codelen = codelentable:read(bs) 602 | --FIX:check nil? 603 | local nrepeat 604 | if codelen <= 15 then 605 | nrepeat = 1 606 | nbits = codelen 607 | --debug('w', nbits) 608 | elseif codelen == 16 then 609 | nrepeat = 3 + noeof(bs:read(2)) 610 | -- nbits unchanged 611 | elseif codelen == 17 then 612 | nrepeat = 3 + noeof(bs:read(3)) 613 | nbits = 0 614 | elseif codelen == 18 then 615 | nrepeat = 11 + noeof(bs:read(7)) 616 | nbits = 0 617 | else 618 | error 'ASSERT' 619 | end 620 | for i=1,nrepeat do 621 | init[val] = nbits 622 | val = val + 1 623 | end 624 | end 625 | local huffmantable = HuffmanTable(init, true) 626 | return huffmantable 627 | end 628 | 629 | local nlit_codes = hlit + 257 630 | local ndist_codes = hdist + 1 631 | 632 | local littable = decode(nlit_codes) 633 | local disttable = decode(ndist_codes) 634 | 635 | return littable, disttable 636 | end 637 | 638 | 639 | local tdecode_len_base 640 | local tdecode_len_nextrabits 641 | local tdecode_dist_base 642 | local tdecode_dist_nextrabits 643 | local function parse_compressed_item(bs, outstate, littable, disttable) 644 | local val = littable:read(bs) 645 | --debug(val, val < 256 and string_char(val)) 646 | if val < 256 then -- literal 647 | output(outstate, val) 648 | elseif val == 256 then -- end of block 649 | return true 650 | else 651 | if not tdecode_len_base then 652 | local t = {[257]=3} 653 | local skip = 1 654 | for i=258,285,4 do 655 | for j=i,i+3 do t[j] = t[j-1] + skip end 656 | if i ~= 258 then skip = skip * 2 end 657 | end 658 | t[285] = 258 659 | tdecode_len_base = t 660 | --for i=257,285 do debug('T1',i,t[i]) end 661 | end 662 | if not tdecode_len_nextrabits then 663 | local t = {} 664 | if NATIVE_BITOPS then 665 | for i=257,285 do 666 | local j = math_max(i - 261, 0) 667 | t[i] = rshift(j, 2) 668 | end 669 | else 670 | for i=257,285 do 671 | local j = math_max(i - 261, 0) 672 | t[i] = (j - (j % 4)) / 4 673 | end 674 | end 675 | t[285] = 0 676 | tdecode_len_nextrabits = t 677 | --for i=257,285 do debug('T2',i,t[i]) end 678 | end 679 | local len_base = tdecode_len_base[val] 680 | local nextrabits = tdecode_len_nextrabits[val] 681 | local extrabits = bs:read(nextrabits) 682 | local len = len_base + extrabits 683 | 684 | if not tdecode_dist_base then 685 | local t = {[0]=1} 686 | local skip = 1 687 | for i=1,29,2 do 688 | for j=i,i+1 do t[j] = t[j-1] + skip end 689 | if i ~= 1 then skip = skip * 2 end 690 | end 691 | tdecode_dist_base = t 692 | --for i=0,29 do debug('T3',i,t[i]) end 693 | end 694 | if not tdecode_dist_nextrabits then 695 | local t = {} 696 | if NATIVE_BITOPS then 697 | for i=0,29 do 698 | local j = math_max(i - 2, 0) 699 | t[i] = rshift(j, 1) 700 | end 701 | else 702 | for i=0,29 do 703 | local j = math_max(i - 2, 0) 704 | t[i] = (j - (j % 2)) / 2 705 | end 706 | end 707 | tdecode_dist_nextrabits = t 708 | --for i=0,29 do debug('T4',i,t[i]) end 709 | end 710 | local dist_val = disttable:read(bs) 711 | local dist_base = tdecode_dist_base[dist_val] 712 | local dist_nextrabits = tdecode_dist_nextrabits[dist_val] 713 | local dist_extrabits = bs:read(dist_nextrabits) 714 | local dist = dist_base + dist_extrabits 715 | 716 | --debug('BACK', len, dist) 717 | for i=1,len do 718 | local pos = (outstate.window_pos - 1 - dist) % 32768 + 1 -- 32K 719 | output(outstate, assert(outstate.window[pos], 'invalid distance')) 720 | end 721 | end 722 | return false 723 | end 724 | 725 | 726 | local function parse_block(bs, outstate) 727 | local bfinal = bs:read(1) 728 | local btype = bs:read(2) 729 | 730 | local BTYPE_NO_COMPRESSION = 0 731 | local BTYPE_FIXED_HUFFMAN = 1 732 | local BTYPE_DYNAMIC_HUFFMAN = 2 733 | local BTYPE_RESERVED_ = 3 734 | 735 | if DEBUG then 736 | debug('bfinal=', bfinal) 737 | debug('btype=', btype) 738 | end 739 | 740 | if btype == BTYPE_NO_COMPRESSION then 741 | bs:read(bs:nbits_left_in_byte()) 742 | local len = bs:read(16) 743 | local nlen_ = noeof(bs:read(16)) 744 | 745 | for i=1,len do 746 | local by = noeof(bs:read(8)) 747 | output(outstate, by) 748 | end 749 | elseif btype == BTYPE_FIXED_HUFFMAN or btype == BTYPE_DYNAMIC_HUFFMAN then 750 | local littable, disttable 751 | if btype == BTYPE_DYNAMIC_HUFFMAN then 752 | littable, disttable = parse_huffmantables(bs) 753 | else 754 | littable = HuffmanTable {0,8, 144,9, 256,7, 280,8, 288,nil} 755 | disttable = HuffmanTable {0,5, 32,nil} 756 | end 757 | 758 | repeat 759 | local is_done = parse_compressed_item( 760 | bs, outstate, littable, disttable) 761 | until is_done 762 | else 763 | runtime_error 'unrecognized compression type' 764 | end 765 | 766 | return bfinal ~= 0 767 | end 768 | 769 | 770 | function M.inflate(t) 771 | local bs = get_bitstream(t.input) 772 | local outbs = get_obytestream(t.output) 773 | local outstate = make_outstate(outbs) 774 | 775 | repeat 776 | local is_final = parse_block(bs, outstate) 777 | until is_final 778 | end 779 | local inflate = M.inflate 780 | 781 | 782 | function M.gunzip(t) 783 | local bs = get_bitstream(t.input) 784 | local outbs = get_obytestream(t.output) 785 | local disable_crc = t.disable_crc 786 | if disable_crc == nil then disable_crc = false end 787 | 788 | parse_gzip_header(bs) 789 | 790 | local data_crc32 = 0 791 | 792 | inflate{input=bs, output= 793 | disable_crc and outbs or 794 | function(byte) 795 | data_crc32 = crc32(byte, data_crc32) 796 | outbs(byte) 797 | end 798 | } 799 | 800 | bs:read(bs:nbits_left_in_byte()) 801 | 802 | local expected_crc32 = bs:read(32) 803 | local isize = bs:read(32) -- ignored 804 | if DEBUG then 805 | debug('crc32=', expected_crc32) 806 | debug('isize=', isize) 807 | end 808 | if not disable_crc and data_crc32 then 809 | if data_crc32 ~= expected_crc32 then 810 | runtime_error('invalid compressed data--crc error') 811 | end 812 | end 813 | if bs:read() then 814 | warn 'trailing garbage ignored' 815 | end 816 | end 817 | 818 | 819 | function M.adler32(byte, crc) 820 | local s1 = crc % 65536 821 | local s2 = (crc - s1) / 65536 822 | s1 = (s1 + byte) % 65521 823 | s2 = (s2 + s1) % 65521 824 | return s2*65536 + s1 825 | end -- 65521 is the largest prime smaller than 2^16 826 | 827 | 828 | function M.inflate_zlib(t) 829 | local bs = get_bitstream(t.input) 830 | local outbs = get_obytestream(t.output) 831 | local disable_crc = t.disable_crc 832 | if disable_crc == nil then disable_crc = false end 833 | 834 | local window_size_ = parse_zlib_header(bs) 835 | 836 | local data_adler32 = 1 837 | 838 | inflate{input=bs, output= 839 | disable_crc and outbs or 840 | function(byte) 841 | data_adler32 = M.adler32(byte, data_adler32) 842 | outbs(byte) 843 | end 844 | } 845 | 846 | bs:read(bs:nbits_left_in_byte()) 847 | 848 | local b3 = bs:read(8) 849 | local b2 = bs:read(8) 850 | local b1 = bs:read(8) 851 | local b0 = bs:read(8) 852 | local expected_adler32 = ((b3*256 + b2)*256 + b1)*256 + b0 853 | if DEBUG then 854 | debug('alder32=', expected_adler32) 855 | end 856 | if not disable_crc then 857 | if data_adler32 ~= expected_adler32 then 858 | runtime_error('invalid compressed data--crc error') 859 | end 860 | end 861 | if bs:read() then 862 | warn 'trailing garbage ignored' 863 | end 864 | end 865 | 866 | 867 | return M 868 | -------------------------------------------------------------------------------- /rockspec.in: -------------------------------------------------------------------------------- 1 | package = "lua-compress-deflatelua" 2 | version = "$(_VERSION)" 3 | source = { 4 | --url = "https://github.com/davidm/lua-compress-deflatelua/zipball/v$(_VERSION)", 5 | url = "git://github.com/davidm/lua-compress-deflatelua.git", 6 | branch='$(_VERSION)' 7 | } 8 | description = { 9 | summary = "'compress.deflatelua' DEFLATE (RFC1951)/gunzip implemented in pure Lua", 10 | detailed = [[ 11 | Note: use lzlib instead for higher performance. 12 | ]], 13 | license = "MIT/X11", 14 | homepage = "http://lua-users.org/wiki/ModuleCompressDeflateLua", 15 | -- https://github.com/davidm/lua-compress-deflatelua 16 | maintainer = "David Manura ", 17 | } 18 | dependencies = { 19 | "lua >= 5.1", -- including 5.2 20 | "lua-digest-crc32lua >= 0.3", -- somewhat optional 21 | --"lua-pythonic-optparse >= 0.3", -- optional 22 | -- one of these bitwise operator libraries: 23 | "lua-bit-numberlua >= 0.3", -- fallback 24 | --"LuaBitOp", -- included in LuaJIT 25 | --"Lua >= 5.2", -- included 'bit32' 26 | --"bit32", -- included in Lua 5.2 27 | } 28 | build = { 29 | type = "none", 30 | install = { 31 | lua = { 32 | ["compress.deflatelua"] = "lmod/compress/deflatelua.lua", 33 | ["bin.gunziplua"] = "lmod/bin/gunziplua.lua", 34 | }, 35 | bin = { 36 | ["gunziplua"] = "bin/gunziplua" 37 | } 38 | } 39 | } 40 | -- _VERSION from lmod/compress/deflatelua.lua 41 | -------------------------------------------------------------------------------- /share/compress.deflatelua/hello.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidm/lua-compress-deflatelua/6ea7c952e992502bc850e026963cd4929b8459de/share/compress.deflatelua/hello.txt.gz -------------------------------------------------------------------------------- /test/test.lua: -------------------------------------------------------------------------------- 1 | -- tests of lua-compress-deflatelua 2 | 3 | package.path = '../lua-digest-crc32lua/lmod/?.lua;' .. package.path 4 | package.path = '../lua-pythonic-optparse/lmod/?.lua;' .. package.path 5 | package.path = '../lua-bit-numberlua/lmod/?.lua;' .. package.path 6 | package.path = 'lmod/?.lua;' .. package.path 7 | 8 | deflate = require("compress.deflatelua") 9 | 10 | local output = {} 11 | deflate.inflate_zlib { 12 | input = "\120\156\203\072\205\201\201\087\040\207\047\202\073\001\000\026\011\004\093", 13 | output = function(byte) output[#output+1] = string.char(byte) end 14 | } 15 | assert(table.concat(output) == "hello world") 16 | 17 | local output = {} 18 | deflate.gunzip { 19 | input = "\031\139\008\000\217\124\100\077\000\003\203\072\205\201\201\087\040\207\047\202\073\001\000\133\017\074\013\011\000\000\000", 20 | output = function(byte) output[#output+1] = string.char(byte) end 21 | } 22 | assert(table.concat(output) == "hello world") 23 | 24 | print 'DONE' 25 | 26 | -------------------------------------------------------------------------------- /util.mk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # utility commands for package maintainers 3 | 4 | VERSIONFROM:=$(shell sed -n 's,.*_VERSION \+from \+\([^ ]\+\).*,\1,p' rockspec.in) 5 | VERSION:=$(shell sed -n "s,.*_VERSION='\([^']*\)'.*,\1,p" $(VERSIONFROM))-1 6 | NAME=$(shell lua -e 'dofile"rockspec.in"; print(package)') 7 | 8 | dist : 9 | rm -fr tmp/$(NAME)-$(VERSION) tmp/$(NAME)-$(VERSION).zip 10 | for x in `cat MANIFEST`; do install -D $$x tmp/$(NAME)-$(VERSION)/$$x || exit; done 11 | sed 's,$$(_VERSION),$(VERSION),g' tmp/$(NAME)-$(VERSION)/rockspec.in > tmp/$(NAME)-$(VERSION)/$(NAME)-$(VERSION).rockspec 12 | cd tmp && zip -r $(NAME)-$(VERSION).zip $(NAME)-$(VERSION) 13 | 14 | install : dist 15 | cd tmp/$(NAME)-$(VERSION) && luarocks make 16 | 17 | test : 18 | @if [ -e test.lua ]; then lua test.lua; fi 19 | @if [ -e test/test.lua ]; then lua test/test.lua; fi 20 | 21 | tag : 22 | git tag -f v$(VERSION) 23 | 24 | version : 25 | @echo $(NAME)-$(VERSION) 26 | 27 | .PHONY : dist install test tag version 28 | --------------------------------------------------------------------------------