├── .gitignore ├── README.md ├── zopfli ├── blocksplitter.go ├── cache.go ├── deflate.go ├── gzip_container.go ├── hash.go ├── katajainen.go ├── lz77.go ├── squeeze.go ├── tree.go ├── util.go ├── zlib_container.go ├── zopfli.go └── zopfli_lib.go └── zopfli_bin.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-zopfli 2 | ========= 3 | 4 | Go port of Zopfli, a zlib-compatible compression library. 5 | 6 | Zopfli compresses data more effectively than zlib does, at the expense of 7 | compression speed. The go-zopfli port is 2-3 times slower than the C version, 8 | and compresses at approximately 100 kB/s. 9 | 10 | It can be used to compress files that will not change often or if you need a 11 | lower-level interface to Deflate compression. 12 | 13 | [See the package documentation.](http://godoc.org/github.com/foobaz/go-zopfli/zopfli) 14 | -------------------------------------------------------------------------------- /zopfli/blocksplitter.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "fmt" 24 | "math" 25 | "os" 26 | ) 27 | 28 | // The "f" for the findMinimum function below. 29 | // i: the current parameter of f(i) 30 | // context: for your implementation 31 | type findMinimumFun func(i int, context interface{}) uint64 32 | 33 | // Finds minimum of function f(i) where i is of type int, f(i) is of type 34 | // float64, i is in range start-end (excluding end). 35 | func findMinimum(f findMinimumFun, context interface{}, start, end int) int { 36 | if end-start < 1024 { 37 | best := uint64(math.MaxUint64) 38 | result := start 39 | for i := start; i < end; i++ { 40 | v := f(i, context) 41 | if v < best { 42 | best = v 43 | result = i 44 | } 45 | } 46 | return result 47 | } 48 | 49 | // Try to find minimum faster by recursively checking multiple points. 50 | const NUM = 9 // Good value: 9. 51 | var p [NUM]int 52 | var vp [NUM]uint64 53 | lastBest := uint64(math.MaxUint64) 54 | pos := start 55 | 56 | for end-start > NUM { 57 | for i := 0; i < NUM; i++ { 58 | p[i] = start + (i+1)*((end-start)/(NUM+1)) 59 | vp[i] = f(p[i], context) 60 | } 61 | var bestIndex int 62 | best := vp[0] 63 | for i := 1; i < NUM; i++ { 64 | if vp[i] < best { 65 | best = vp[i] 66 | bestIndex = i 67 | } 68 | } 69 | if best > lastBest { 70 | break 71 | } 72 | 73 | if bestIndex > 0 { 74 | start = p[bestIndex-1] 75 | } 76 | if bestIndex < NUM-1 { 77 | end = p[bestIndex+1] 78 | } 79 | 80 | pos = p[bestIndex] 81 | lastBest = best 82 | } 83 | return pos 84 | } 85 | 86 | // Returns estimated cost of a block in bits. It includes the size to encode the 87 | // tree and the size to encode all literal, length and distance symbols and their 88 | // extra bits. 89 | // 90 | // litLens: lz77 lit/lengths 91 | // dists: ll77 distances 92 | func (store LZ77Store) estimateCost() uint64 { 93 | return store.CalculateBlockSize(2) 94 | } 95 | 96 | type splitCostContext struct { 97 | store LZ77Store 98 | start, end int 99 | } 100 | 101 | // Gets the cost which is the sum of the cost of the left and the right section 102 | // of the data. 103 | // type: findMinimumFun 104 | func splitCost(i int, context interface{}) uint64 { 105 | c := context.(*splitCostContext) 106 | a := c.store[c.start:i] 107 | b := c.store[i:c.end] 108 | return a.estimateCost() + b.estimateCost() 109 | } 110 | 111 | func addSorted(splitPoints []int, value int) []int { 112 | oldSize := len(splitPoints) 113 | splitPoints = append(splitPoints, value) 114 | for i := 0; i < oldSize; i++ { 115 | if splitPoints[i] > value { 116 | copy(splitPoints[i+1:], splitPoints[i:]) 117 | splitPoints[i] = value 118 | break 119 | } 120 | } 121 | return splitPoints 122 | } 123 | 124 | // Prints the block split points as decimal and hex values in the terminal. 125 | func (store LZ77Store) printBlockSplitPoints(lz77SplitPoints []int) { 126 | llSize := len(store) 127 | nLZ77Points := len(lz77SplitPoints) 128 | splitPoints := make([]int, 0, nLZ77Points) 129 | // The input is given as lz77 indices, but we want to see the 130 | // uncompressed index values. 131 | if nLZ77Points > 0 { 132 | var pos int 133 | for i := 0; i < llSize; i++ { 134 | var length int 135 | if store[i].dist == 0 { 136 | length = 1 137 | } else { 138 | length = int(store[i].litLen) 139 | } 140 | if lz77SplitPoints[len(splitPoints)] == i { 141 | splitPoints = append(splitPoints, pos) 142 | if len(splitPoints) >= nLZ77Points { 143 | break 144 | } 145 | } 146 | pos += length 147 | } 148 | } 149 | if len(splitPoints) != nLZ77Points { 150 | panic("number of points do not match") 151 | } 152 | 153 | fmt.Fprintf(os.Stderr, "block split points: ") 154 | for _, point := range splitPoints { 155 | fmt.Fprintf(os.Stderr, "%d ", point) 156 | } 157 | fmt.Fprintf(os.Stderr, "(hex:") 158 | for _, point := range splitPoints { 159 | fmt.Fprintf(os.Stderr, " %x", point) 160 | } 161 | fmt.Fprintf(os.Stderr, ")\n") 162 | } 163 | 164 | // Finds next block to try to split, the largest of the available ones. 165 | // The largest is chosen to make sure that if only a limited amount of blocks is 166 | // requested, their sizes are spread evenly. 167 | // llSize: the size of the LL77 data, which is the size of the done array here. 168 | // done: array indicating which blocks starting at that position are no longer 169 | // splittable (splitting them increases rather than decreases cost). 170 | // splitPoints: the splitpoints found so far. 171 | // nPoints: the amount of splitpoints found so far. 172 | // lStart: output variable, giving start of block. 173 | // lEnd: output variable, giving end of block. 174 | // returns 1 if a block was found, 0 if no block found (all are done). 175 | func findLargestSplittableBlock(llSize int, done []bool, splitPoints []int) (lStart, lEnd int, found bool) { 176 | var longest int 177 | nPoints := len(splitPoints) 178 | for i := 0; i <= nPoints; i++ { 179 | var start, end int 180 | if i != 0 { 181 | start = splitPoints[i-1] 182 | } 183 | if i == nPoints { 184 | end = llSize - 1 185 | } else { 186 | end = splitPoints[i] 187 | } 188 | if !done[start] && end > longest+start { 189 | lStart = start 190 | lEnd = end 191 | found = true 192 | longest = end - start 193 | } 194 | } 195 | return lStart, lEnd, found 196 | } 197 | 198 | func (store LZ77Store) blockSplitLZ77(options *Options, maxBlocks int) []int { 199 | llSize := len(store) 200 | if llSize < 10 { 201 | // This code fails on tiny files. 202 | return nil 203 | } 204 | 205 | done := make([]bool, llSize) 206 | 207 | var splitPoints []int 208 | var lStart int 209 | lEnd := llSize 210 | for { 211 | if maxBlocks > 0 && len(splitPoints)+1 >= maxBlocks { 212 | break 213 | } 214 | 215 | var c splitCostContext 216 | c.store = store 217 | c.start = lStart 218 | c.end = lEnd 219 | if lStart >= lEnd { 220 | panic("overrun") 221 | } 222 | llPos := findMinimum(splitCost, &c, lStart+1, lEnd) 223 | 224 | if llPos <= lStart { 225 | panic("underrun") 226 | } 227 | if llPos >= lEnd { 228 | panic("overrun") 229 | } 230 | 231 | a := store[lStart:llPos] 232 | b := store[llPos:lEnd] 233 | splitCost := a.estimateCost() + b.estimateCost() 234 | both := store[lStart:lEnd] 235 | origCost := both.estimateCost() 236 | 237 | if splitCost > origCost || llPos == lStart+1 || llPos == lEnd { 238 | done[lStart] = true 239 | } else { 240 | splitPoints = addSorted(splitPoints, llPos) 241 | } 242 | 243 | var found bool 244 | lStart, lEnd, found = findLargestSplittableBlock(llSize, done, splitPoints) 245 | if !found { 246 | // No further split will probably reduce compression. 247 | break 248 | } 249 | 250 | if lEnd < lStart+10 { 251 | break 252 | } 253 | } 254 | 255 | if options.Verbose { 256 | store.printBlockSplitPoints(splitPoints) 257 | } 258 | return splitPoints 259 | } 260 | 261 | // Does blocksplitting on uncompressed data. 262 | // The output splitpoints are indices in the uncompressed bytes. 263 | // 264 | // options: general program options. 265 | // in: uncompressed input data 266 | // inStart: where to start splitting 267 | // inEnd: where to end splitting (not inclusive) 268 | // maxBlocks: maximum amount of blocks to split into, or 0 for no limit 269 | // splitPoints: dynamic array to put the resulting split point coordinates into. 270 | // The coordinates are indices in the input array. 271 | func blockSplit(options *Options, in []byte, inStart, inEnd, maxBlocks int) []int { 272 | s := NewBlockState(options, in, inStart, inEnd) 273 | 274 | // Unintuitively, using a simple LZ77 method here instead of LZ77Optimal 275 | // results in better blocks. 276 | store := s.LZ77Greedy(inStart, inEnd) 277 | lz77SplitPoints := store.blockSplitLZ77(options, maxBlocks) 278 | 279 | // Convert LZ77 positions to positions in the uncompressed input. 280 | var splitPoints []int 281 | pos := inStart 282 | if len(lz77SplitPoints) > 0 { 283 | storeSize := len(store) 284 | for i := 0; i < storeSize; i++ { 285 | var length int 286 | if store[i].dist == 0 { 287 | length = 1 288 | } else { 289 | length = int(store[i].litLen) 290 | } 291 | if lz77SplitPoints[len(splitPoints)] == i { 292 | splitPoints = append(splitPoints, pos) 293 | if len(splitPoints) == len(lz77SplitPoints) { 294 | break 295 | } 296 | } 297 | pos += length 298 | } 299 | } 300 | if len(splitPoints) != len(lz77SplitPoints) { 301 | panic("number of points do not match") 302 | } 303 | return splitPoints 304 | } 305 | 306 | // Divides the input into equal blocks, does not even take LZ77 lengths into 307 | // account. 308 | func blockSplitSimple(inStart, inEnd, blockSize int) (splitPoints []int) { 309 | i := inStart 310 | for i < inEnd { 311 | splitPoints = append(splitPoints, i) 312 | i += blockSize 313 | } 314 | return splitPoints 315 | } 316 | -------------------------------------------------------------------------------- /zopfli/cache.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | // Cache used by FindLongestMatch to remember previously found length/dist 23 | // values. 24 | // This is needed because the squeeze runs will ask these values multiple times for 25 | // the same position. 26 | // Uses large amounts of memory, since it has to remember the distance belonging 27 | // to every possible shorter-than-the-best length (the so called "sublen" array). 28 | type longestMatchCache struct { 29 | store LZ77Store 30 | sublen []uint8 31 | active bool 32 | } 33 | 34 | // Initialize a LongestMatchCache. 35 | func newCache(blockSize int) (lmc longestMatchCache) { 36 | lmc.store = make(LZ77Store, blockSize) 37 | // Rather large amount of memory. 38 | lmc.sublen = make([]uint8, CACHE_LENGTH*3*blockSize) 39 | lmc.active = true 40 | 41 | // length > 0 and dist 0 is invalid combination, which indicates on 42 | // purpose that this cache value is not filled in yet. 43 | for i := 0; i < blockSize; i++ { 44 | lmc.store[i].litLen = 1 45 | } 46 | 47 | return lmc 48 | } 49 | 50 | // Stores sublen array in the cache 51 | func (lmc longestMatchCache) sublenToCache(sublen []uint16, 52 | pos int, length uint16) { 53 | var j, bestLength uint16 54 | 55 | if CACHE_LENGTH == 0 { 56 | return 57 | } 58 | 59 | cache := lmc.sublen[CACHE_LENGTH*pos*3:] 60 | if length < 3 { 61 | return 62 | } 63 | for i := uint16(3); i <= length; i++ { 64 | if i == length || sublen[i] != sublen[i+1] { 65 | cache[j*3] = uint8(i - 3) 66 | cache[j*3+1] = uint8(sublen[i]) 67 | cache[j*3+2] = uint8(sublen[i] >> 8) 68 | bestLength = i 69 | j++ 70 | if j >= CACHE_LENGTH { 71 | break 72 | } 73 | } 74 | } 75 | if j < CACHE_LENGTH { 76 | if bestLength != length { 77 | panic("couldn't find best length") 78 | } 79 | cache[(CACHE_LENGTH-1)*3] = uint8(bestLength - 3) 80 | } else { 81 | if bestLength > length { 82 | panic("impossible length") 83 | } 84 | } 85 | if bestLength != lmc.maxCachedSublen(pos) { 86 | panic("didn't cache sublen") 87 | } 88 | } 89 | 90 | // Extracts sublen array from the cache. 91 | func (lmc longestMatchCache) cacheToSublen(pos int, length uint16, sublen []uint16) { 92 | if CACHE_LENGTH == 0 { 93 | return 94 | } 95 | 96 | if length < 3 { 97 | return 98 | } 99 | 100 | var prevLength uint16 101 | maxLength := lmc.maxCachedSublen(pos) 102 | cache := CACHE_LENGTH * pos * 3 103 | for j := 0; j < CACHE_LENGTH; j++ { 104 | length = uint16(lmc.sublen[cache+j*3]) + 3 105 | dist := uint16(lmc.sublen[cache+j*3+1]) + 256*uint16(lmc.sublen[cache+j*3+2]) 106 | for i := prevLength; i <= length; i++ { 107 | sublen[i] = dist 108 | } 109 | if length == maxLength { 110 | break 111 | } 112 | prevLength = length + 1 113 | } 114 | } 115 | 116 | // Returns the length up to which could be stored in the cache. 117 | func (lmc longestMatchCache) maxCachedSublen(pos int) uint16 { 118 | if CACHE_LENGTH == 0 { 119 | return 0 120 | } 121 | //cache := lmc.sublen[CACHE_LENGTH*pos*3:] 122 | cache := CACHE_LENGTH * pos * 3 123 | if lmc.sublen[cache+1] == 0 && lmc.sublen[cache+2] == 0 { 124 | //cache := lmc.sublen[CACHE_LENGTH*pos*3:] 125 | //if cache[1] == 0 && cache[2] == 0 { 126 | // No sublen cached. 127 | return 0 128 | } 129 | //return uint16(cache[(CACHE_LENGTH-1)*3]) + 3 130 | return uint16(lmc.sublen[cache+(CACHE_LENGTH-1)*3]) + 3 131 | //return uint16(lmc.sublen[(CACHE_LENGTH * (pos + 1) - 1) * 3]) + 3 132 | } 133 | -------------------------------------------------------------------------------- /zopfli/deflate.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "bufio" 24 | "fmt" 25 | "io" 26 | "os" 27 | ) 28 | 29 | type lz77Lengths struct { 30 | litLen, dist []uint 31 | } 32 | 33 | type lz77Symbols struct { 34 | litLen, dist []uint 35 | } 36 | 37 | type Deflator struct { 38 | // out: pointer to the dynamic output array to which the result 39 | // is appended. Must be freed after use. 40 | out *bufio.Writer 41 | 42 | // bp: number of bits written. This is because deflate appends 43 | // blocks as bit-based data, rather than on byte boundaries. 44 | bp uint 45 | 46 | next byte 47 | options *Options 48 | } 49 | 50 | type nullWriter struct{} 51 | 52 | func (w *nullWriter) Write(p []byte) (int, error) { 53 | return len(p), nil 54 | } 55 | 56 | func NewDeflator(wr io.Writer, options *Options) Deflator { 57 | out := bufio.NewWriter(wr) 58 | return Deflator{out, 0, 0, options} 59 | } 60 | 61 | func (z *Deflator) writeBit(bit byte) { 62 | withinByte := z.bp & 7 63 | z.next |= (bit << withinByte) 64 | if withinByte == 7 { 65 | err := z.out.WriteByte(z.next) 66 | if err != nil { 67 | panic(err) 68 | } 69 | z.next = 0 70 | } 71 | z.bp++ 72 | } 73 | 74 | func (z *Deflator) writeBits(symbol, length uint) { 75 | // TODO(lode): make more efficient (add more bits at once). 76 | for i := uint(0); i < length; i++ { 77 | bit := byte((symbol >> i) & 1) 78 | z.writeBit(bit) 79 | } 80 | } 81 | 82 | // Adds bits, like AddBits, but the order is inverted. The deflate specification 83 | // uses both orders in one standard. 84 | func (z *Deflator) writeHuffmanBits(symbol, length uint) { 85 | // TODO(lode): make more efficient (add more bits at once). 86 | for i := uint(0); i < length; i++ { 87 | bit := byte((symbol >> (length - i - 1)) & 1) 88 | z.writeBit(bit) 89 | } 90 | } 91 | 92 | func (z *Deflator) flush() { 93 | withinByte := z.bp & 7 94 | if withinByte > 0 { 95 | z.writeByte(z.next) 96 | z.next = 0 97 | z.bp += 8 - withinByte 98 | } 99 | err := z.out.Flush() 100 | if err != nil { 101 | panic(err) 102 | } 103 | } 104 | 105 | func (z *Deflator) writeByte(c byte) { 106 | err := z.out.WriteByte(c) 107 | if err != nil { 108 | panic(err) 109 | } 110 | z.bp += 8 111 | } 112 | 113 | func (z *Deflator) write(p []byte) { 114 | _, err := z.out.Write(p) 115 | if err != nil { 116 | panic(err) 117 | } 118 | z.bp += uint(len(p)) * 8 119 | } 120 | 121 | // Ensures there are at least 2 distance codes to support buggy decoders. 122 | // Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1 123 | // distance code (with length > 0), even though it's valid according to the 124 | // deflate spec to have 0 distance codes. On top of that, some mobile phones 125 | // require at least two distance codes. To support these decoders too (but 126 | // potentially at the cost of a few bytes), add dummy code lengths of 1. 127 | // References to this bug can be found in the changelog of 128 | // Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0. 129 | // 130 | // dLengths: the 32 lengths of the distance codes. 131 | func (lengths lz77Lengths) patchDistanceCodesForBuggyDecoders() { 132 | var numDistCodes uint // Amount of non-zero distance codes 133 | dLengths := lengths.dist 134 | for i := 0; i < 30; /* Ignore the two unused codes from the spec */ i++ { 135 | if dLengths[i] > 0 { 136 | numDistCodes++ 137 | } 138 | if numDistCodes >= 2 { 139 | // Two or more codes is fine. 140 | return 141 | } 142 | } 143 | 144 | if numDistCodes == 0 { 145 | dLengths[0] = 1 146 | dLengths[1] = 1 147 | } else if numDistCodes == 1 { 148 | var i int 149 | if dLengths[0] != 0 { 150 | i = 1 151 | } 152 | dLengths[i] = 1 153 | } 154 | } 155 | 156 | // The order in which code length code lengths are encoded as per deflate. 157 | var clclOrder [19]uint8 = [19]uint8{ 158 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 159 | } 160 | 161 | func (z *Deflator) writeDynamicTree(lengths lz77Lengths) { 162 | hLit := uint(29) // 286 - 257 163 | hDist := uint(29) // 32 - 1, but gzip does not like hDist > 29.*/ 164 | // Trim zeros. 165 | for hLit > 0 && lengths.litLen[257+hLit-1] == 0 { 166 | hLit-- 167 | } 168 | for hDist > 0 && lengths.dist[1+hDist-1] == 0 { 169 | hDist-- 170 | } 171 | 172 | // Size of lldLengths. 173 | lldTotal := hLit + 257 + hDist + 1 174 | 175 | // All litLen and dist lengths with ending 176 | // zeros trimmed together in one array. 177 | lldLengths := make([]uint, lldTotal) 178 | 179 | for i := uint(0); i < lldTotal; i++ { 180 | if i < 257+hLit { 181 | lldLengths[i] = lengths.litLen[i] 182 | } else { 183 | lldLengths[i] = lengths.dist[i-257-hLit] 184 | } 185 | if lldLengths[i] >= 16 { 186 | panic("length too large") 187 | } 188 | } 189 | 190 | // Runlength encoded version of lengths of litLen and dist trees. 191 | var rle []uint 192 | // Extra bits for rle values 16, 17 and 18. 193 | var rleBits []uint 194 | for i := uint(0); i < lldTotal; i++ { 195 | var count uint 196 | for j := i; j < lldTotal && lldLengths[i] == lldLengths[j]; j++ { 197 | count++ 198 | } 199 | if count >= 4 || (count >= 3 && lldLengths[i] == 0) { 200 | if lldLengths[i] == 0 { 201 | if count > 10 { 202 | if count > 138 { 203 | count = 138 204 | } 205 | rle = append(rle, 18) 206 | rleBits = append(rleBits, count-11) 207 | } else { 208 | rle = append(rle, 17) 209 | rleBits = append(rleBits, count-3) 210 | } 211 | } else { 212 | rle = append(rle, lldLengths[i]) 213 | rleBits = append(rleBits, 0) 214 | repeat := count - 1 // Since the first one is hardcoded. 215 | for repeat >= 6 { 216 | rle = append(rle, 16) 217 | rleBits = append(rleBits, 6-3) 218 | repeat -= 6 219 | } 220 | if repeat >= 3 { 221 | rle = append(rle, 16) 222 | rleBits = append(rleBits, repeat-3) 223 | repeat = 0 224 | } 225 | for repeat > 0 { 226 | rle = append(rle, lldLengths[i]) 227 | rleBits = append(rleBits, 0) 228 | repeat-- 229 | } 230 | } 231 | 232 | i += count - 1 233 | } else { 234 | rle = append(rle, lldLengths[i]) 235 | rleBits = append(rleBits, 0) 236 | } 237 | if rle[len(rle)-1] > 18 { 238 | panic("last rle too large") 239 | } 240 | } 241 | 242 | rleSize := len(rle) 243 | var clCounts [19]uint 244 | for i := 0; i < rleSize; i++ { 245 | clCounts[rle[i]]++ 246 | } 247 | 248 | // Code length code lengths. 249 | clcl := lengthLimitedCodeLengths(clCounts[:], 7) 250 | clSymbols := lengthsToSymbols(clcl, 7) 251 | 252 | // Trim zeros. 253 | hcLen := uint(15) 254 | for hcLen > 0 && clCounts[clclOrder[hcLen+4-1]] == 0 { 255 | hcLen-- 256 | } 257 | 258 | z.writeBits(hLit, 5) 259 | z.writeBits(hDist, 5) 260 | z.writeBits(hcLen, 4) 261 | 262 | for i := uint(0); i < hcLen+4; i++ { 263 | z.writeBits(clcl[clclOrder[i]], 3) 264 | } 265 | 266 | for i := 0; i < rleSize; i++ { 267 | symbol := clSymbols[rle[i]] 268 | z.writeHuffmanBits(symbol, clcl[rle[i]]) 269 | // Extra bits. 270 | if rle[i] == 16 { 271 | z.writeBits(rleBits[i], 2) 272 | } else if rle[i] == 17 { 273 | z.writeBits(rleBits[i], 3) 274 | } else if rle[i] == 18 { 275 | z.writeBits(rleBits[i], 7) 276 | } 277 | } 278 | } 279 | 280 | // Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE. 281 | func (lengths lz77Lengths) calculateTreeSize() uint { 282 | var w *nullWriter 283 | z := NewDeflator(w, nil) 284 | z.writeDynamicTree(lengths) 285 | return z.bp 286 | } 287 | 288 | func (lengths lz77Lengths) symbols(maxBits uint) (symbols lz77Symbols) { 289 | symbols.litLen = lengthsToSymbols(lengths.litLen, maxBits) 290 | symbols.dist = lengthsToSymbols(lengths.dist, maxBits) 291 | return symbols 292 | } 293 | 294 | // Adds all lit/len and dist codes from the lists as huffman symbols. Does not 295 | // add end code 256. expectedDataSize is the uncompressed block size, used for 296 | // assert, but you can set it to 0 to not do the assertion. 297 | func (z *Deflator) writeLZ77Data(store LZ77Store, 298 | expectedDataSize int, 299 | symbols lz77Symbols, lengths lz77Lengths) { 300 | var testLength int 301 | lEnd := len(store) 302 | for i := 0; i < lEnd; i++ { 303 | pair := store[i] 304 | if pair.dist == 0 { 305 | if pair.litLen >= 256 { 306 | panic("litLen too large") 307 | } 308 | if lengths.litLen[pair.litLen] <= 0 { 309 | panic("length is zero") 310 | } 311 | z.writeHuffmanBits(symbols.litLen[pair.litLen], lengths.litLen[pair.litLen]) 312 | testLength++ 313 | } else { 314 | lls := pair.lengthSymbol() 315 | ds := pair.distSymbol() 316 | if pair.litLen < 3 || pair.litLen > 288 { 317 | panic("litLen out of range") 318 | } 319 | if lengths.litLen[lls] <= 0 { 320 | panic("length is zero") 321 | } 322 | if lengths.dist[ds] <= 0 { 323 | panic("length is zero") 324 | } 325 | z.writeHuffmanBits(symbols.litLen[lls], lengths.litLen[lls]) 326 | z.writeBits(uint(pair.lengthExtraBitsValue()), uint(pair.lengthExtraBits())) 327 | z.writeHuffmanBits(symbols.dist[ds], lengths.dist[ds]) 328 | z.writeBits(uint(pair.distExtraBitsValue()), uint(pair.distExtraBits())) 329 | testLength += int(pair.litLen) 330 | } 331 | } 332 | if expectedDataSize != 0 && testLength != expectedDataSize { 333 | panic("actual size did not match expected size") 334 | } 335 | } 336 | 337 | func getFixedTree() (lengths lz77Lengths) { 338 | lengths.litLen = make([]uint, 288) 339 | lengths.dist = make([]uint, 32) 340 | for i := 0; i < 144; i++ { 341 | lengths.litLen[i] = 8 342 | } 343 | for i := 144; i < 256; i++ { 344 | lengths.litLen[i] = 9 345 | } 346 | for i := 256; i < 280; i++ { 347 | lengths.litLen[i] = 7 348 | } 349 | for i := 280; i < 288; i++ { 350 | lengths.litLen[i] = 8 351 | } 352 | for i := 0; i < 32; i++ { 353 | lengths.dist[i] = 5 354 | } 355 | return lengths 356 | } 357 | 358 | // Calculates size of the part after the header and tree of an LZ77 block, in bits. 359 | func (store LZ77Store) calculateBlockSymbolSize(lengths lz77Lengths) uint64 { 360 | var result uint64 361 | lEnd := len(store) 362 | for i := 0; i < lEnd; i++ { 363 | if store[i].dist == 0 { 364 | result += uint64(lengths.litLen[store[i].litLen]) 365 | } else { 366 | result += uint64(lengths.litLen[store[i].lengthSymbol()]) 367 | result += uint64(lengths.dist[store[i].distSymbol()]) 368 | result += uint64(store[i].lengthExtraBits()) 369 | result += uint64(store[i].distExtraBits()) 370 | } 371 | } 372 | result += uint64(lengths.litLen[256]) // end symbol 373 | return result 374 | } 375 | 376 | // Calculates block size in bits. 377 | // litLens: lz77 lit/lengths 378 | // dists: ll77 distances 379 | func (store LZ77Store) CalculateBlockSize(blockType byte) uint64 { 380 | if blockType != FIXED_BLOCK && blockType != DYNAMIC_BLOCK { 381 | panic("this is not for uncompressed blocks") 382 | } 383 | 384 | var lengths lz77Lengths 385 | result := uint64(3) // bFinal and blockType bits 386 | if blockType == FIXED_BLOCK { 387 | lengths = getFixedTree() 388 | } else { 389 | counts := store.lz77Counts() 390 | lengths.litLen = lengthLimitedCodeLengths(counts.litLen, 15) 391 | lengths.dist = lengthLimitedCodeLengths(counts.dist, 15) 392 | lengths.patchDistanceCodesForBuggyDecoders() 393 | result += uint64(lengths.calculateTreeSize()) 394 | } 395 | 396 | result += store.calculateBlockSymbolSize(lengths) 397 | return result 398 | } 399 | 400 | // Adds a deflate block with the given LZ77 data to the output. 401 | // z: the stream to write to 402 | // blockType: the block type, must be 1 or 2 403 | // final: whether to set the "final" bit on this block, must be the last block 404 | // store: literal/length/distance array of the LZ77 data 405 | // expectedDataSize: the uncompressed block size, used for panic, but you can 406 | // set it to 0 to not do the assertion. 407 | func (z *Deflator) WriteLZ77Block(blockType byte, final bool, store LZ77Store, expectedDataSize int) { 408 | var finalByte byte 409 | if final { 410 | finalByte = 1 411 | } 412 | z.writeBit(finalByte) 413 | z.writeBit(blockType & 1) 414 | z.writeBit((blockType & 2) >> 1) 415 | 416 | var lengths lz77Lengths 417 | if blockType == FIXED_BLOCK { 418 | // Fixed block. 419 | lengths = getFixedTree() 420 | } else { 421 | // Dynamic block. 422 | if blockType != DYNAMIC_BLOCK { 423 | panic("illegal block type") 424 | } 425 | counts := store.lz77Counts() 426 | lengths.litLen = lengthLimitedCodeLengths(counts.litLen, 15) 427 | lengths.dist = lengthLimitedCodeLengths(counts.dist, 15) 428 | lengths.patchDistanceCodesForBuggyDecoders() 429 | detectTreeSize := z.bp 430 | z.writeDynamicTree(lengths) 431 | if z.options.Verbose { 432 | fmt.Fprintf(os.Stderr, "treesize: %d bits\n", z.bp-detectTreeSize) 433 | } 434 | 435 | // Assert that for every present symbol, the code length is non-zero. 436 | // TODO(lode): remove this in release version. 437 | for i := 0; i < 288; i++ { 438 | if counts.litLen[i] != 0 && lengths.litLen[i] <= 0 { 439 | panic("length is zero") 440 | } 441 | } 442 | for i := 0; i < 32; i++ { 443 | if counts.dist[i] != 0 && lengths.dist[i] <= 0 { 444 | panic("length is zero") 445 | } 446 | } 447 | } 448 | 449 | symbols := lengths.symbols(15) 450 | 451 | detectBlockSize := z.bp 452 | z.writeLZ77Data(store, expectedDataSize, symbols, lengths) 453 | // End symbol. 454 | z.writeHuffmanBits(symbols.litLen[256], lengths.litLen[256]) 455 | if final { 456 | // write last byte 457 | z.flush() 458 | } 459 | 460 | if z.options.Verbose { 461 | var uncompressedSize uint 462 | lEnd := len(store) 463 | for i := 0; i < lEnd; i++ { 464 | if store[i].dist == 0 { 465 | uncompressedSize += 1 466 | } else { 467 | uncompressedSize += uint(store[i].litLen) 468 | } 469 | } 470 | compressedSize := z.bp - detectBlockSize 471 | var places int 472 | if compressedSize&1 != 0 { 473 | places = 3 474 | } else if compressedSize&2 != 0 { 475 | places = 2 476 | } else if compressedSize&4 != 0 { 477 | places = 1 478 | } 479 | fmt.Fprintf( 480 | os.Stderr, 481 | "compressed block size: %.*f (%dkB) (unc: %d (%dkB)\n", 482 | places, 483 | float64(compressedSize)/8, 484 | (compressedSize+4000)/8000, 485 | uncompressedSize, 486 | (uncompressedSize+500)/1000, 487 | ) 488 | } 489 | } 490 | 491 | func (z *Deflator) deflateDynamicBlock(final bool, in []byte, inStart, inEnd int) { 492 | s := NewBlockState(z.options, in, inStart, inEnd) 493 | store := s.LZ77Optimal(inStart, inEnd) 494 | 495 | // For small block, encoding with fixed tree can be smaller. For large block, 496 | // don't bother doing this expensive test, dynamic tree will be better. 497 | blockType := byte(DYNAMIC_BLOCK) 498 | if len(store) < 1000 { 499 | fixedStore := s.LZ77OptimalFixed(inStart, inEnd) 500 | dynCost := store.CalculateBlockSize(2) 501 | fixedCost := fixedStore.CalculateBlockSize(1) 502 | if fixedCost < dynCost { 503 | blockType = FIXED_BLOCK 504 | store = fixedStore 505 | } 506 | } 507 | 508 | blockSize := inEnd - inStart 509 | z.WriteLZ77Block(blockType, final, store, blockSize) 510 | } 511 | 512 | func (z *Deflator) deflateFixedBlock(final bool, in []byte, inStart, inEnd int) { 513 | blockSize := inEnd - inStart 514 | 515 | s := NewBlockState(z.options, in, inStart, inEnd) 516 | store := s.LZ77OptimalFixed(inStart, inEnd) 517 | z.WriteLZ77Block(FIXED_BLOCK, final, store, blockSize) 518 | } 519 | 520 | func (z *Deflator) deflateNonCompressedBlock(final bool, in []byte) { 521 | blockSize := len(in) 522 | if blockSize >= 65536 { 523 | panic("Non compressed blocks are max this size.") 524 | } 525 | nLen := uint16(^blockSize) 526 | 527 | var finalByte byte 528 | if final { 529 | finalByte = 1 530 | } 531 | z.writeBit(finalByte) 532 | // blockType 00 533 | z.writeBit(0) 534 | z.writeBit(0) 535 | // Any bits of input up to the next byte boundary are ignored. 536 | z.flush() 537 | 538 | z.writeByte(byte(blockSize)) 539 | z.writeByte(byte(blockSize / 256)) 540 | z.writeByte(byte(nLen)) 541 | z.writeByte(byte(nLen / 256)) 542 | 543 | z.write(in) 544 | } 545 | 546 | func (z *Deflator) deflateBlock(final bool, in []byte, inStart, inEnd int) { 547 | switch z.options.BlockType { 548 | case UNCOMPRESSED_BLOCK: 549 | z.deflateNonCompressedBlock(final, in[inStart:inEnd]) 550 | case FIXED_BLOCK: 551 | z.deflateFixedBlock(final, in, inStart, inEnd) 552 | case DYNAMIC_BLOCK: 553 | z.deflateDynamicBlock(final, in, inStart, inEnd) 554 | default: 555 | panic("illegal block type") 556 | } 557 | } 558 | 559 | // Does squeeze strategy where first block splitting is done, then each block is 560 | // squeezed. 561 | // Parameters: see description of the Deflate function. 562 | func (z *Deflator) deflateSplittingFirst(final bool, in []byte, inStart, inEnd int) { 563 | var splitPoints []int 564 | switch z.options.BlockType { 565 | case UNCOMPRESSED_BLOCK: 566 | splitPoints = blockSplitSimple(inStart, inEnd, 65535) 567 | case FIXED_BLOCK: 568 | // If all blocks are fixed tree, splitting into separate blocks only 569 | // increases the total size. Leave splitPoints nil, this represents 1 block. 570 | case DYNAMIC_BLOCK: 571 | splitPoints = blockSplit(z.options, in, inStart, inEnd, z.options.BlockSplittingMax) 572 | } 573 | 574 | nPoints := len(splitPoints) 575 | for i := 0; i <= nPoints; i++ { 576 | var start, end int 577 | if i == 0 { 578 | start = inStart 579 | } else { 580 | start = splitPoints[i-1] 581 | } 582 | if i == nPoints { 583 | end = inEnd 584 | } else { 585 | end = splitPoints[i] 586 | } 587 | z.deflateBlock(i == nPoints && final, in, start, end) 588 | } 589 | } 590 | 591 | // Does squeeze strategy where first the best possible lz77 is done, and then based 592 | // on that data, block splitting is done. 593 | // Parameters: see description of the Deflate function. 594 | func (z *Deflator) deflateSplittingLast(final bool, in []byte, inStart, inEnd int) { 595 | blockType := z.options.BlockType 596 | if blockType == UNCOMPRESSED_BLOCK { 597 | // This function only supports LZ77 compression. deflateSplittingFirst 598 | // supports the special case of noncompressed data. Punt it to that one. 599 | z.deflateSplittingFirst(final, in, inStart, inEnd) 600 | return 601 | } 602 | if blockType != FIXED_BLOCK && blockType != DYNAMIC_BLOCK { 603 | panic("illegal block type") 604 | } 605 | 606 | s := NewBlockState(z.options, in, inStart, inEnd) 607 | 608 | var store LZ77Store 609 | if blockType == DYNAMIC_BLOCK { 610 | store = s.LZ77Optimal(inStart, inEnd) 611 | } else { 612 | if blockType != FIXED_BLOCK { 613 | panic("illegal block type") 614 | } 615 | store = s.LZ77OptimalFixed(inStart, inEnd) 616 | } 617 | 618 | // If all blocks are fixed tree, splitting into separate blocks only 619 | // increases the total size. Leave nPoints at 0, this represents 1 block. 620 | var splitPoints []int 621 | if blockType != FIXED_BLOCK { 622 | splitPoints = store.blockSplitLZ77(z.options, z.options.BlockSplittingMax) 623 | } 624 | 625 | storeSize := len(store) 626 | nPoints := len(splitPoints) 627 | for i := 0; i <= nPoints; i++ { 628 | var start, end int 629 | if i > 0 { 630 | start = splitPoints[i-1] 631 | } 632 | if i >= nPoints { 633 | end = storeSize 634 | } else { 635 | end = splitPoints[i] 636 | } 637 | z.WriteLZ77Block(blockType, i == nPoints && final, store[start:end], 0) 638 | } 639 | } 640 | 641 | // Deflate a part, to allow Deflate() to use multiple master blocks if 642 | // needed. 643 | // 644 | // Like Deflate, but allows to specify start and end byte with inStart and 645 | // inEnd. Only that part is compressed, but earlier bytes are still used for the 646 | // back window. 647 | // 648 | // It is possible to call this function multiple times in a row, shifting 649 | // inStart and inEnd to next bytes of the data. If inStart is larger than 0, then 650 | // previous bytes are used as the initial dictionary for LZ77. 651 | // This function will usually output multiple deflate blocks. If final is 1, then 652 | // the final bit will be set on the last block. 653 | func (z *Deflator) DeflatePart(final bool, in []byte, inStart, inEnd int) (err error) { 654 | defer func() { 655 | problem := recover() 656 | if problem != nil { 657 | err = problem.(error) 658 | } 659 | }() 660 | 661 | if z.options.BlockSplitting { 662 | if z.options.BlockSplittingLast { 663 | z.deflateSplittingLast(final, in, inStart, inEnd) 664 | } else { 665 | z.deflateSplittingFirst(final, in, inStart, inEnd) 666 | } 667 | } else { 668 | z.deflateBlock(final, in, inStart, inEnd) 669 | } 670 | return err 671 | } 672 | 673 | // Compresses according to the deflate specification and append the compressed 674 | // result to the output. 675 | // This function will usually output multiple deflate blocks. If final is 1, then 676 | // the final bit will be set on the last block. 677 | // 678 | // final: whether this is the last section of the input, sets the final bit to the 679 | // last deflate block. 680 | // in: the input bytes 681 | func (z *Deflator) Deflate(final bool, in []byte) (err error) { 682 | defer func() { 683 | problem := recover() 684 | if problem != nil { 685 | err = problem.(error) 686 | } 687 | }() 688 | 689 | if MASTER_BLOCK_SIZE == 0 { 690 | err := z.DeflatePart(true, in, 0, len(in)) 691 | if err != nil { 692 | return err 693 | } 694 | } else { 695 | var i int 696 | inSize := len(in) 697 | for i < inSize { 698 | var size int 699 | masterFinal := i+MASTER_BLOCK_SIZE >= inSize 700 | final2 := final && masterFinal 701 | if masterFinal { 702 | size = inSize - i 703 | } else { 704 | size = MASTER_BLOCK_SIZE 705 | } 706 | err := z.DeflatePart(final2, in, i, i+size) 707 | if err != nil { 708 | return err 709 | } 710 | i += size 711 | } 712 | } 713 | if z.options.Verbose { 714 | inSize := len(in) 715 | outSize := z.bp / 8 716 | fmt.Fprintf( 717 | os.Stderr, 718 | "Original Size: %d, Deflate: %d, Compression: %f%% Removed\n", 719 | inSize, outSize, 720 | 100*(float64(inSize)-float64(outSize))/float64(inSize), 721 | ) 722 | } 723 | return nil 724 | } 725 | -------------------------------------------------------------------------------- /zopfli/gzip_container.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "fmt" 24 | "hash/crc32" 25 | "io" 26 | "os" 27 | ) 28 | 29 | // Compresses according to the gzip specification and writes the compressed 30 | // result to the output. 31 | // 32 | // options: global program options 33 | // out: writer to which the result is appended 34 | func GzipCompress(options *Options, in []byte, out io.Writer) error { 35 | var counter countingWriter 36 | if options.Verbose { 37 | counter = newCountingWriter(out) 38 | out = &counter 39 | } 40 | 41 | header := []byte{ 42 | // ID 43 | 31, 44 | 139, 45 | // CM 46 | 8, 47 | // FLG 48 | 0, 49 | // MTIME 50 | 0, 51 | 0, 52 | 0, 53 | 0, 54 | // XFL, 2 indicates best compression. 55 | 2, 56 | // OS follows Unix conventions. 57 | 3, 58 | } 59 | _, headerErr := out.Write(header) 60 | if headerErr != nil { 61 | return headerErr 62 | } 63 | 64 | z := NewDeflator(out, options) 65 | writeErr := z.Deflate(true, in) 66 | if writeErr != nil { 67 | return writeErr 68 | } 69 | 70 | checksum := crc32.NewIEEE() 71 | checksum.Write(in) 72 | crcValue := checksum.Sum32() 73 | inSize := len(in) 74 | footer := []byte{ 75 | // CRC 76 | byte(crcValue), 77 | byte(crcValue >> 8), 78 | byte(crcValue >> 16), 79 | byte(crcValue >> 24), 80 | // ISIZE 81 | byte(inSize), 82 | byte(inSize >> 8), 83 | byte(inSize >> 16), 84 | byte(inSize >> 24), 85 | } 86 | _, footerErr := out.Write(footer) 87 | if footerErr != nil { 88 | return footerErr 89 | } 90 | 91 | if options.Verbose { 92 | inSize := len(in) 93 | outSize := counter.written 94 | fmt.Fprintf(os.Stderr, 95 | "Original Size: %d, Gzip: %d, Compression: %f%% Removed\n", 96 | inSize, outSize, 97 | 100*float64(inSize-outSize)/float64(inSize)) 98 | } 99 | return nil 100 | } 101 | -------------------------------------------------------------------------------- /zopfli/hash.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | const ( 23 | HASH_SHIFT = 5 24 | HASH_MASK = 32767 25 | ) 26 | 27 | type hash struct { 28 | head []int // Hash value to index of its most recent occurance. 29 | prev []uint16 // Index to index of prev. occurance of same hash. 30 | hashVal []int // Index to hash value at this index. 31 | val int // Current hash value. 32 | 33 | // Only used when HASH_SAME_HASH is true 34 | // Fields with similar purpose as the above hash, but for the second 35 | // hash with a value that is calculated differently. 36 | head2 []int // Hash value to index of its most recent occurance. 37 | prev2 []uint16 // Index to index of prev. occurance of same hash. 38 | hashVal2 []int // Index to hash value at this index. 39 | val2 int // Current hash value. 40 | 41 | // Only used when HASH_SAME is true 42 | same []uint16 // Amount of repetitions of same byte after this. 43 | } 44 | 45 | // Allocates and initializes all fields of Hash. 46 | func newHash(a, b byte) (h hash) { 47 | h.head = make([]int, 65536) 48 | h.prev = make([]uint16, WINDOW_SIZE) 49 | h.hashVal = make([]int, WINDOW_SIZE) 50 | for i := 0; i < 65536; i++ { 51 | h.head[i] = -1 // -1 indicates no head so far. 52 | } 53 | for i := uint16(0); i < WINDOW_SIZE; i++ { 54 | h.prev[i] = i // If prev[j] == j, then prev[j] is uninitialized. 55 | h.hashVal[i] = -1 56 | } 57 | 58 | if HASH_SAME { 59 | h.same = make([]uint16, WINDOW_SIZE) 60 | } 61 | 62 | if HASH_SAME_HASH { 63 | h.head2 = make([]int, 65536) 64 | h.prev2 = make([]uint16, WINDOW_SIZE) 65 | h.hashVal2 = make([]int, WINDOW_SIZE) 66 | for i := 0; i < 65536; i++ { 67 | h.head2[i] = -1 68 | } 69 | for i := uint16(0); i < WINDOW_SIZE; i++ { 70 | h.prev2[i] = i 71 | h.hashVal2[i] = -1 72 | } 73 | } 74 | 75 | h.warmup(a, b) 76 | return h 77 | } 78 | 79 | // Update the sliding hash value with the given byte. All calls to this function 80 | // must be made on consecutive input characters. Since the hash value exists out 81 | // of multiple input bytes, a few warmups with this function are needed initially. 82 | func (h *hash) updateValue(c byte) { 83 | h.val = ((h.val << HASH_SHIFT) ^ int(c)) & HASH_MASK 84 | } 85 | 86 | // Updates the hash values based on the current position in the array. All calls 87 | // to this must be made for consecutive bytes. 88 | func (h *hash) update(slice []byte, pos, end int) { 89 | hPos := pos & WINDOW_MASK 90 | 91 | var hashValue byte 92 | if pos+MIN_MATCH <= end { 93 | hashValue = slice[pos+MIN_MATCH-1] 94 | } 95 | h.updateValue(hashValue) 96 | h.hashVal[hPos] = h.val 97 | if h.head[h.val] != -1 && h.hashVal[h.head[h.val]] == h.val { 98 | h.prev[hPos] = uint16(h.head[h.val]) 99 | } else { 100 | h.prev[hPos] = uint16(hPos) 101 | } 102 | h.head[h.val] = hPos 103 | 104 | if HASH_SAME { 105 | // Update "same". 106 | var amount int 107 | if h.same[(pos-1)&WINDOW_MASK] > 1 { 108 | amount = int(h.same[(pos-1)&WINDOW_MASK]) - 1 109 | } 110 | for pos+amount+1 < end && 111 | slice[pos] == slice[pos+amount+1] && amount < 0xFFFF { 112 | amount++ 113 | } 114 | h.same[hPos] = uint16(amount) 115 | } 116 | 117 | if HASH_SAME_HASH { 118 | h.val2 = int((h.same[hPos]-MIN_MATCH)&255) ^ h.val 119 | h.hashVal2[hPos] = h.val2 120 | if h.head2[h.val2] != -1 && h.hashVal2[h.head2[h.val2]] == h.val2 { 121 | h.prev2[hPos] = uint16(h.head2[h.val2]) 122 | } else { 123 | h.prev2[hPos] = uint16(hPos) 124 | } 125 | h.head2[h.val2] = hPos 126 | } 127 | } 128 | 129 | // Prepopulates hash: 130 | // Fills in the initial values in the hash, before Update can be used 131 | // correctly. 132 | func (h *hash) warmup(a, b byte) { 133 | h.updateValue(a) 134 | h.updateValue(b) 135 | } 136 | -------------------------------------------------------------------------------- /zopfli/katajainen.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | /* 21 | Bounded package merge algorithm, based on the paper 22 | "A Fast and Space-Economical Algorithm for Length-Limited Coding 23 | Jyrki Katajainen, Alistair Moffat, Andrew Turpin". 24 | */ 25 | 26 | package zopfli 27 | 28 | import ( 29 | "sort" 30 | ) 31 | 32 | // Nodes forming chains. Also used to represent leaves. 33 | type node struct { 34 | weight uint // Total weight (symbol count) of this chain. 35 | tail *node // Previous node(s) of this chain, or nil if none. 36 | count int // Leaf symbol index, or number of leaves before this chain. 37 | } 38 | 39 | type symbolLeaves []*node 40 | 41 | // Initializes a chain node with the given values and marks it as in use. 42 | func newNode(weight uint, count int, tail *node) *node { 43 | var node node 44 | node.weight = weight 45 | node.count = count 46 | node.tail = tail 47 | return &node 48 | } 49 | 50 | // Performs a Boundary Package-Merge step. Puts a new chain in the given list. 51 | // The new chain is, depending on the weights, a leaf or a combination of two 52 | // chains from the previous list. 53 | // lists: The lists of chains. 54 | // maxBits: Number of lists. 55 | // leaves: The leaves, one per symbol. 56 | // numSymbols: Number of leaves. 57 | // index: The index of the list in which a new chain or leaf is required. 58 | // final: Whether this is the last time this function is called. If it is then 59 | // it is no more needed to recursively call self. 60 | func boundaryPM(lists [][2]*node, leaves symbolLeaves, index int, final bool) { 61 | lastCount := lists[index][1].count // Count of last chain of list. 62 | 63 | numSymbols := len(leaves) 64 | if index == 0 && lastCount >= numSymbols { 65 | return 66 | } 67 | 68 | lists[index][0] = lists[index][1] 69 | 70 | if index == 0 { 71 | // New leaf node in list 0. 72 | lists[index][1] = newNode(leaves[lastCount].weight, lastCount+1, nil) 73 | } else { 74 | sum := lists[index-1][0].weight + lists[index-1][1].weight 75 | if lastCount < numSymbols && sum > leaves[lastCount].weight { 76 | // New leaf inserted in list, so count is incremented. 77 | lists[index][1] = newNode(leaves[lastCount].weight, 78 | lastCount+1, lists[index][1].tail) 79 | } else { 80 | lists[index][1] = newNode(sum, lastCount, lists[index-1][1]) 81 | if !final { 82 | // Two lookahead chains of previous list used up, create new ones. 83 | boundaryPM(lists, leaves, index-1, false) 84 | boundaryPM(lists, leaves, index-1, false) 85 | } 86 | } 87 | } 88 | } 89 | 90 | // Initializes each list with as lookahead chains the two leaves with lowest 91 | // weights. 92 | func newLists(leaves symbolLeaves, maxBits int) (lists [][2]*node) { 93 | lists = make([][2]*node, maxBits) 94 | node0 := newNode(leaves[0].weight, 1, nil) 95 | node1 := newNode(leaves[1].weight, 2, nil) 96 | for i := 0; i < maxBits; i++ { 97 | lists[i][0] = node0 98 | lists[i][1] = node1 99 | } 100 | return lists 101 | } 102 | 103 | // Converts result of boundary package-merge to the bitLengths. The result in the 104 | // last chain of the last list contains the amount of active leaves in each list. 105 | // chain: Chain to extract the bit length from (last chain from last list). 106 | func extractBitLengths(chain *node, leaves symbolLeaves, bitLengths []uint) { 107 | for node := chain; node != nil; node = node.tail { 108 | for i := 0; i < node.count; i++ { 109 | bitLengths[leaves[i].count]++ 110 | } 111 | } 112 | } 113 | 114 | func (leaves *symbolLeaves) Len() int { 115 | return len(*leaves) 116 | } 117 | 118 | // Comparator for sorting the leaves. Has the function signature for qsort. 119 | func (leaves *symbolLeaves) Less(i, j int) bool { 120 | return (*leaves)[i].weight < (*leaves)[j].weight 121 | } 122 | 123 | func (leaves *symbolLeaves) Swap(i, j int) { 124 | (*leaves)[j], (*leaves)[i] = (*leaves)[i], (*leaves)[j] 125 | } 126 | 127 | // Outputs minimum-redundancy length-limited code bitLengths for symbols with the 128 | // given counts. The bitLengths are limited by maxBits. 129 | // 130 | // The output is tailored for DEFLATE: symbols that never occur, get a bit length 131 | // of 0, and if only a single symbol occurs at least once, its bitlength will be 1, 132 | // and not 0 as would theoretically be needed for a single symbol. 133 | // 134 | // frequencies: The amount of occurances of each symbol. 135 | // n: The amount of symbols. 136 | // maxBits: Maximum bit length, inclusive. 137 | // bitLengths: Output, the bitlengths for the symbol prefix codes. 138 | // return: 0 for OK, non-0 for error. 139 | func lengthLimitedCodeLengths(frequencies []uint, maxBits int) []uint { 140 | n := len(frequencies) 141 | // One leaf per symbol. Only numSymbols leaves will be used. 142 | leaves := make(symbolLeaves, 0, n) 143 | // Count used symbols and place them in the leaves. 144 | for i := 0; i < n; i++ { 145 | if frequencies[i] > 0 { 146 | node := newNode(frequencies[i], i, nil) 147 | leaves = append(leaves, node) 148 | } 149 | } 150 | 151 | // Amount of symbols with frequency > 0. 152 | numSymbols := len(leaves) 153 | // Check special cases and error conditions. 154 | if (1 << uint(maxBits)) < numSymbols { 155 | // Error, too few maxBits to represent symbols. 156 | panic("couldn't calculate code lengths") 157 | } 158 | 159 | // Initialize all bitlengths at 0. 160 | bitLengths := make([]uint, n) 161 | if numSymbols == 0 { 162 | // No symbols at all. OK. 163 | return bitLengths 164 | } 165 | if numSymbols == 1 { 166 | // Only one symbol, give it bitLength 1, not 0. OK. 167 | bitLengths[leaves[0].count] = 1 168 | return bitLengths 169 | } 170 | 171 | // Sort the leaves from lightest to heaviest. 172 | sort.Sort(&leaves) 173 | 174 | // Array of lists of chains. Each list requires only two lookahead 175 | // chains at a time, so each list is a array of two node*'s. 176 | lists := newLists(leaves, maxBits) 177 | 178 | // In the last list, 2 * numSymbols - 2 active chains need to be created. Two 179 | // are already created in the initialization. Each boundaryPM run creates one. 180 | numBoundaryPMRuns := 2*numSymbols - 4 181 | for i := 0; i < numBoundaryPMRuns; i++ { 182 | final := i == numBoundaryPMRuns-1 183 | boundaryPM(lists, leaves, maxBits-1, final) 184 | } 185 | 186 | extractBitLengths(lists[maxBits-1][1], leaves, bitLengths) 187 | return bitLengths 188 | } 189 | -------------------------------------------------------------------------------- /zopfli/lz77.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | // litLen: Contains the literal symbol or length value. 23 | // dists: Indicates the distance, or 0 to indicate that there is no distance and 24 | // litLens contains a literal instead of a length. 25 | type lz77Pair struct { 26 | // Lit or len. 27 | litLen uint16 28 | 29 | // If 0: indicates literal in corresponding litLens, 30 | // if > 0: length in corresponding litLens, this is the distance. 31 | dist uint16 32 | } 33 | 34 | // Stores lit/length and dist pairs for LZ77. 35 | type LZ77Store []lz77Pair 36 | 37 | // Some state information for compressing a block. 38 | // This is currently a bit under-used (with mainly only the longest match cache), 39 | // but is kept for easy future expansion. 40 | type BlockState struct { 41 | options *Options 42 | block []byte 43 | 44 | // The start (inclusive) and end (not inclusive) of the current block. 45 | blockStart, blockEnd int 46 | 47 | // Cache for length/distance pairs found so far. 48 | lmc longestMatchCache 49 | } 50 | 51 | type lz77Counts struct { 52 | litLen, dist []uint 53 | } 54 | 55 | // Gets a score of the length given the distance. Typically, the score of the 56 | // length is the length itself, but if the distance is very long, decrease the 57 | // score of the length a bit to make up for the fact that long distances use 58 | // large amounts of extra bits. 59 | 60 | // This is not an accurate score, it is a heuristic only for the greedy LZ77 61 | // implementation. More accurate cost models are employed later. Making this 62 | // heuristic more accurate may hurt rather than improve compression. 63 | 64 | // The two direct uses of this heuristic are: 65 | // -avoid using a length of 3 in combination with a long distance. This only has 66 | // an effect if length == 3. 67 | // -make a slightly better choice between the two options of the lazy matching. 68 | 69 | // Indirectly, this affects: 70 | // -the block split points if the default of block splitting first is used, in a 71 | // rather unpredictable way 72 | // -the first zopfli run, so it affects the chance of the first run being closer 73 | // to the optimal output 74 | func (pair lz77Pair) lengthScore() uint16 { 75 | // At 1024, the distance uses 9+ extra bits and this seems to be the 76 | // sweet spot on tested files. 77 | if pair.dist > 1024 { 78 | return pair.litLen - 1 79 | } 80 | return pair.litLen 81 | } 82 | 83 | // Verifies if length and dist are indeed valid, only used for assertion. 84 | func (pair lz77Pair) Verify(data []byte, pos int) { 85 | 86 | // TODO(lode): make this only run in a debug compile, it's for assert only. 87 | 88 | dataSize := len(data) 89 | if pos+int(pair.litLen) > dataSize { 90 | panic("overrun") 91 | } 92 | for i := 0; i < int(pair.litLen); i++ { 93 | if data[pos-int(pair.dist)+i] != data[pos+i] { 94 | panic("mismatch") 95 | } 96 | } 97 | } 98 | 99 | // Finds how long the match of scan and match is. Can be used to find how many 100 | // bytes starting from scan, and from match, are equal. Returns the last byte 101 | // after scan, which is still equal to the corresponding byte after match. 102 | // scan is the position to compare 103 | // match is the earlier position to compare. 104 | // end is the last possible position, beyond which to stop looking. 105 | func getMatch(slice []byte, scan, match, end int) int { 106 | for scan < end && slice[scan] == slice[match] { 107 | scan++ 108 | match++ 109 | } 110 | 111 | return scan 112 | } 113 | 114 | func NewBlockState(options *Options, in []byte, inStart, inEnd int) (s BlockState) { 115 | s.options = options 116 | s.block = in 117 | s.blockStart = inStart 118 | s.blockEnd = inEnd 119 | if LONGEST_MATCH_CACHE { 120 | blockSize := inEnd - inStart 121 | s.lmc = newCache(blockSize) 122 | } 123 | return s 124 | } 125 | 126 | // Gets distance, length and sublen values from the cache if possible. 127 | // Returns 1 if it got the values from the cache, 0 if not. 128 | // Updates the limit value to a smaller one if possible with more limited 129 | // information from the cache. 130 | func (s *BlockState) tryGetFromLongestMatchCache(pos int, limit *uint16, sublen []uint16) (pair lz77Pair, ok bool) { 131 | // The LMC cache starts at the beginning of the block rather than the 132 | // beginning of the whole array. 133 | lmcPos := pos - s.blockStart 134 | 135 | // Length > 0 and dist 0 is invalid combination, which indicates on 136 | // purpose that this cache value is not filled in yet. 137 | cacheAvailable := s.lmc.active && (s.lmc.store[lmcPos].litLen == 0 || s.lmc.store[lmcPos].dist != 0) 138 | var maxSublen uint16 139 | if cacheAvailable && sublen != nil { 140 | maxSublen = s.lmc.maxCachedSublen(lmcPos) 141 | } 142 | limitOkForCache := cacheAvailable && (*limit == MAX_MATCH || s.lmc.store[lmcPos].litLen <= *limit || (sublen != nil && maxSublen >= *limit)) 143 | if s.lmc.active && limitOkForCache && cacheAvailable { 144 | if sublen == nil || s.lmc.store[lmcPos].litLen <= s.lmc.maxCachedSublen(lmcPos) { 145 | pair.litLen = s.lmc.store[lmcPos].litLen 146 | if pair.litLen > *limit { 147 | pair.litLen = *limit 148 | } 149 | if sublen != nil { 150 | s.lmc.cacheToSublen(lmcPos, pair.litLen, sublen) 151 | pair.dist = sublen[pair.litLen] 152 | if *limit == MAX_MATCH && pair.litLen >= MIN_MATCH { 153 | if pair.dist != s.lmc.store[lmcPos].dist { 154 | panic("bad sublen") 155 | } 156 | } 157 | } else { 158 | pair.dist = s.lmc.store[lmcPos].dist 159 | } 160 | return pair, true 161 | } 162 | // Can't use much of the cache, since the "sublens" need to 163 | // be calculated, but at least we already know when to stop. 164 | *limit = s.lmc.store[lmcPos].litLen 165 | } 166 | 167 | return pair, false 168 | } 169 | 170 | // Stores the found sublen, distance and length in the longest match cache, if 171 | // possible. 172 | func (s *BlockState) storeInLongestMatchCache(pos int, limit uint16, 173 | sublen []uint16, pair lz77Pair) { 174 | if !s.lmc.active { 175 | return 176 | } 177 | 178 | // The LMC cache starts at the beginning of the block rather than the 179 | // beginning of the whole array. 180 | lmcPos := pos - s.blockStart 181 | lmcPair := s.lmc.store[lmcPos] 182 | 183 | // Length > 0 and dist 0 is invalid combination, which indicates on purpose 184 | // that this cache value is not filled in yet. 185 | cacheAvailable := lmcPair.litLen == 0 || lmcPair.dist != 0 186 | if cacheAvailable { 187 | return 188 | } 189 | 190 | if limit != MAX_MATCH || sublen == nil { 191 | return 192 | } 193 | 194 | if lmcPair.litLen != 1 || lmcPair.dist != 0 { 195 | panic("overrun") 196 | } 197 | if pair.litLen < MIN_MATCH { 198 | lmcPair = lz77Pair{} 199 | } else { 200 | lmcPair = pair 201 | } 202 | s.lmc.store[lmcPos] = lmcPair 203 | if lmcPair.litLen == 1 && lmcPair.dist == 0 { 204 | panic("cached invalid combination") 205 | } 206 | s.lmc.sublenToCache(sublen, lmcPos, pair.litLen) 207 | } 208 | 209 | // Finds the longest match (length and corresponding distance) for LZ77 210 | // compression. 211 | // Even when not using "sublen", it can be more efficient to provide an array, 212 | // because only then the caching is used. 213 | // 214 | // slice: the data 215 | // 216 | // pos: position in the data to find the match for 217 | // 218 | // size: size of the data 219 | // 220 | // limit: limit length to maximum this value (default should be 258). This allows 221 | // finding a shorter dist for that length (= less extra bits). Must be 222 | // in the range [MIN_MATCH, MAX_MATCH]. 223 | // 224 | // sublen: output array of 259 elements, or null. Has, for each length, the 225 | // smallest distance required to reach this length. Only 256 of its 259 values 226 | // are used, the first 3 are ignored (the shortest length is 3. It is purely 227 | // for convenience that the array is made 3 longer). 228 | func (s *BlockState) findLongestMatch(h *hash, slice []byte, pos, size int, limit uint16, sublen []uint16) lz77Pair { 229 | hPos := uint16(pos & WINDOW_MASK) 230 | bestPair := lz77Pair{1, 0} 231 | chainCounter := MAX_CHAIN_HITS // For quitting early. 232 | hPrev := h.prev 233 | 234 | if LONGEST_MATCH_CACHE { 235 | pair, ok := s.tryGetFromLongestMatchCache(pos, &limit, sublen) 236 | if ok { 237 | /* 238 | if pos+int(pair.litLen) > size { 239 | panic("overrun") 240 | } 241 | */ 242 | return pair 243 | } 244 | } 245 | 246 | if limit > MAX_MATCH { 247 | panic("limit is too large") 248 | } else if limit < MIN_MATCH { 249 | panic("limit is too small") 250 | } 251 | if pos >= size { 252 | panic("overrun") 253 | } 254 | 255 | if size < pos+MIN_MATCH { 256 | // The rest of the code assumes there are at least MIN_MATCH 257 | // bytes to try. 258 | return lz77Pair{} 259 | } 260 | 261 | if pos+int(limit) > size { 262 | limit = uint16(size - pos) 263 | } 264 | arrayEnd := pos + int(limit) 265 | 266 | if h.val >= 65536 { 267 | panic("hash value too large") 268 | } 269 | 270 | pp := uint16(h.head[h.val]) // During the whole loop, p == h.prev[pp]. 271 | p := hPrev[pp] 272 | 273 | if pp != hPos { 274 | panic("invalid pp") 275 | } 276 | 277 | var dist int // Not uint16 on purpose. 278 | if p < pp { 279 | dist = int(pp - p) 280 | } else { 281 | dist = WINDOW_SIZE + int(pp) - int(p) 282 | } 283 | 284 | // Go through all distances. 285 | same0 := h.same[hPos] 286 | scanned := slice[pos] 287 | for dist < WINDOW_SIZE { 288 | scan := pos 289 | match := pos - dist 290 | 291 | // Testing the byte at position bestLength first, goes slightly faster. 292 | var currentLength uint16 293 | bestLitLen := bestPair.litLen 294 | bestPos := pos + int(bestLitLen) 295 | bestMatch := match + int(bestLitLen) 296 | if bestPos >= size || slice[bestPos] == slice[bestMatch] { 297 | if HASH_SAME { 298 | if same0 > 2 && scanned == slice[match] { 299 | same1 := h.same[match&WINDOW_MASK] 300 | var same uint16 301 | if same0 < same1 { 302 | same = same0 303 | } else { 304 | same = same1 305 | } 306 | if same > limit { 307 | same = limit 308 | } 309 | scan += int(same) 310 | match += int(same) 311 | } 312 | } 313 | scan = getMatch(slice, scan, match, arrayEnd) 314 | currentLength = uint16(scan - pos) // The found length. 315 | 316 | if currentLength > bestLitLen { 317 | if sublen != nil { 318 | for j := bestLitLen + 1; j <= currentLength; j++ { 319 | sublen[j] = uint16(dist) 320 | } 321 | } 322 | bestPair = lz77Pair{currentLength, uint16(dist)} 323 | if currentLength >= limit { 324 | break 325 | } 326 | } 327 | } 328 | 329 | if HASH_SAME_HASH { 330 | // Switch to the other hash once this will be more efficient. 331 | if bestPair.litLen >= same0 && h.val2 == h.hashVal2[p] { 332 | // Now use the hash that encodes the length and first byte. 333 | hPrev = h.prev2 334 | } 335 | } 336 | 337 | pp = p 338 | p = hPrev[p] 339 | if p == pp { 340 | // Uninited prev value. 341 | break 342 | } 343 | 344 | if p >= pp { 345 | dist += WINDOW_SIZE 346 | } 347 | dist += int(pp) - int(p) 348 | 349 | if MAX_CHAIN_HITS < WINDOW_SIZE { 350 | chainCounter-- 351 | if chainCounter <= 0 { 352 | break 353 | } 354 | } 355 | } 356 | 357 | if LONGEST_MATCH_CACHE { 358 | s.storeInLongestMatchCache(pos, limit, sublen, bestPair) 359 | } 360 | 361 | if bestPair.litLen > limit { 362 | panic("overrun") 363 | } 364 | 365 | if pos+int(bestPair.litLen) > size { 366 | panic("overrun") 367 | } 368 | return bestPair 369 | } 370 | 371 | // Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than 372 | // with the slow but better "squeeze" implementation. 373 | // The result is placed in the LZ77Store. 374 | // If inStart is larger than 0, it uses values before inStart as starting 375 | // dictionary. 376 | func (s *BlockState) LZ77Greedy(inStart, inEnd int) (store LZ77Store) { 377 | var windowStart int 378 | if inStart > WINDOW_SIZE { 379 | windowStart = inStart - WINDOW_SIZE 380 | } 381 | 382 | // Lazy matching. 383 | var prevPair lz77Pair 384 | var matchAvailable bool 385 | 386 | if inStart == inEnd { 387 | return 388 | } 389 | 390 | h := newHash(s.block[windowStart], s.block[windowStart+1]) 391 | for i := windowStart; i < inStart; i++ { 392 | h.update(s.block, i, inEnd) 393 | } 394 | 395 | dummySublen := make([]uint16, 259) 396 | for i := inStart; i < inEnd; i++ { 397 | h.update(s.block, i, inEnd) 398 | 399 | pair := s.findLongestMatch(&h, s.block, i, inEnd, MAX_MATCH, dummySublen[:]) 400 | lengthScore := pair.lengthScore() 401 | 402 | if LAZY_MATCHING { 403 | // Lazy matching. 404 | prevLengthScore := prevPair.lengthScore() 405 | if matchAvailable { 406 | matchAvailable = false 407 | if lengthScore > prevLengthScore+1 { 408 | store = append(store, lz77Pair{uint16(s.block[i-1]), 0}) 409 | if lengthScore >= MIN_MATCH && pair.litLen < MAX_MATCH { 410 | matchAvailable = true 411 | prevPair = pair 412 | continue 413 | } 414 | } else { 415 | // Add previous to output. 416 | pair = prevPair 417 | lengthScore = prevLengthScore 418 | // Add to output. 419 | pair.Verify(s.block, i-1) 420 | store = append(store, pair) 421 | for j := uint16(2); j < pair.litLen; j++ { 422 | if i >= inEnd { 423 | panic("overrun") 424 | } 425 | i++ 426 | h.update(s.block, i, inEnd) 427 | } 428 | continue 429 | } 430 | } else if lengthScore >= MIN_MATCH && pair.litLen < MAX_MATCH { 431 | matchAvailable = true 432 | prevPair = pair 433 | continue 434 | } 435 | // End of lazy matching. 436 | } 437 | 438 | // Add to output. 439 | if lengthScore >= MIN_MATCH { 440 | pair.Verify(s.block, i) 441 | store = append(store, pair) 442 | } else { 443 | pair.litLen = 1 444 | store = append(store, lz77Pair{uint16(s.block[i]), 0}) 445 | } 446 | for j := uint16(1); j < pair.litLen; j++ { 447 | if i >= inEnd { 448 | panic("overrun") 449 | } 450 | i++ 451 | h.update(s.block, i, inEnd) 452 | } 453 | } 454 | return store 455 | } 456 | 457 | // Counts the number of literal, length and distance symbols in the given lz77 458 | // arrays. 459 | // litLens: lz77 lit/lengths 460 | // dists: ll77 distances 461 | // start: where to begin counting in litLens and dists 462 | // end: where to stop counting in litLens and dists (not inclusive) 463 | // llCount: count of each lit/len symbol, must have size 288 (see deflate 464 | // standard) 465 | // dCount: count of each dist symbol, must have size 32 (see deflate standard) 466 | func (store LZ77Store) lz77Counts() (counts lz77Counts) { 467 | counts.litLen = make([]uint, 288) 468 | counts.dist = make([]uint, 32) 469 | end := len(store) 470 | for i := 0; i < end; i++ { 471 | pair := store[i] 472 | if pair.dist == 0 { 473 | counts.litLen[pair.litLen]++ 474 | } else { 475 | counts.litLen[pair.lengthSymbol()]++ 476 | counts.dist[pair.distSymbol()]++ 477 | } 478 | } 479 | 480 | counts.litLen[256] = 1 // End symbol. 481 | return counts 482 | } 483 | -------------------------------------------------------------------------------- /zopfli/squeeze.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "fmt" 24 | "math" 25 | "os" 26 | ) 27 | 28 | type symbolStats struct { 29 | // The literal and length symbols. 30 | litLens []float64 31 | // The 32 unique dist symbols, not the 32768 possible dists. 32 | dists []float64 33 | 34 | // Length of each lit/len symbol in bits. 35 | llSymbols []float64 36 | // Length of each dist symbol in bits. 37 | dSymbols []float64 38 | } 39 | 40 | // Adds the bit lengths. 41 | func addWeightedFreqs(stats1 symbolStats, w1 float64, 42 | stats2 symbolStats, w2 float64) (result symbolStats) { 43 | result.litLens = make([]float64, 288) 44 | result.dists = make([]float64, 32) 45 | for i := 0; i < 288; i++ { 46 | litlen1 := stats1.litLens[i] * w1 47 | litlen2 := stats2.litLens[i] * w2 48 | result.litLens[i] = litlen1 + litlen2 49 | } 50 | for i := 0; i < 32; i++ { 51 | dist1 := stats1.dists[i] * w1 52 | dist2 := stats2.dists[i] * w2 53 | result.dists[i] = dist1 + dist2 54 | } 55 | result.litLens[256] = 1 // End symbol. 56 | return result 57 | } 58 | 59 | type ranState struct { 60 | m_w, m_z uint32 61 | } 62 | 63 | func newRanState() (state ranState) { 64 | state.m_w = 1 65 | state.m_z = 2 66 | return state 67 | } 68 | 69 | /* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */ 70 | func (state ranState) ran() uint32 { 71 | state.m_z = 36969*(state.m_z&65535) + (state.m_z >> 16) 72 | state.m_w = 18000*(state.m_w&65535) + (state.m_w >> 16) 73 | return (state.m_z << 16) + state.m_w // 32-bit result. 74 | } 75 | 76 | func (state ranState) randomizeFreqs(freqs []float64) { 77 | n := uint32(len(freqs)) 78 | for i := uint32(0); i < n; i++ { 79 | if (state.ran()>>4)%3 == 0 { 80 | freqs[i] = freqs[state.ran()%n] 81 | } 82 | } 83 | } 84 | 85 | func (stats symbolStats) randomizeFreqs(state ranState) { 86 | state.randomizeFreqs(stats.litLens) 87 | state.randomizeFreqs(stats.dists) 88 | stats.litLens[256] = 1 // End symbol. 89 | } 90 | 91 | // Function that calculates a cost based on a model for the given LZ77 symbol. 92 | // litlen: means literal symbol if dist is 0, length otherwise. 93 | type costModelFun func(pair lz77Pair, context interface{}) float64 94 | 95 | // Cost model which should exactly match fixed tree. 96 | // type: costModelFun 97 | func costFixed(pair lz77Pair, unused interface{}) float64 { 98 | if pair.dist == 0 { 99 | if pair.litLen <= 143 { 100 | return 8 101 | } 102 | return 9 103 | } 104 | dBits := pair.distExtraBits() 105 | lBits := pair.lengthExtraBits() 106 | lSym := pair.lengthSymbol() 107 | cost := float64(5) // Every dist symbol has length 5. 108 | if lSym <= 279 { 109 | cost += 7 110 | } else { 111 | cost += 8 112 | } 113 | return cost + float64(dBits+lBits) 114 | } 115 | 116 | // Cost model based on symbol statistics. 117 | // type: costModelFun 118 | func costStat(pair lz77Pair, context interface{}) float64 { 119 | stats := context.(symbolStats) 120 | if pair.dist == 0 { 121 | return stats.llSymbols[pair.litLen] 122 | } 123 | lSym := pair.lengthSymbol() 124 | lBits := pair.lengthExtraBits() 125 | dSym := pair.distSymbol() 126 | dBits := pair.distExtraBits() 127 | return stats.llSymbols[lSym] + float64(lBits) + stats.dSymbols[dSym] + float64(dBits) 128 | } 129 | 130 | var dSymbolTable [30]uint16 = [30]uint16{ 131 | 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 132 | 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 133 | } 134 | 135 | // Finds the minimum possible cost this cost model can return for valid length and 136 | // distance symbols. 137 | func (costModel costModelFun) minCost(costContext interface{}) float64 { 138 | var minCost float64 139 | 140 | // Table of distances that have a different distance symbol in the deflate 141 | // specification. Each value is the first distance that has a new symbol. Only 142 | // different symbols affect the cost model so only these need to be checked. 143 | // See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes). 144 | 145 | // bestPair has lowest cost in the cost model 146 | var bestPair, pair lz77Pair 147 | pair.dist = 1 148 | minCost = math.Inf(1) 149 | for pair.litLen = uint16(3); pair.litLen < 259; pair.litLen++ { 150 | c := costModel(pair, costContext) 151 | if c < minCost { 152 | bestPair.litLen = pair.litLen 153 | minCost = c 154 | } 155 | } 156 | 157 | // TODO: try using bestPair.litlen instead of 3 158 | pair.litLen = 3 159 | minCost = math.Inf(1) 160 | for i := 0; i < 30; i++ { 161 | pair.dist = dSymbolTable[i] 162 | c := costModel(pair, costContext) 163 | if c < minCost { 164 | bestPair.dist = pair.dist 165 | minCost = c 166 | } 167 | } 168 | 169 | return costModel(bestPair, costContext) 170 | } 171 | 172 | // Performs the forward pass for "squeeze". Gets the most optimal length to reach 173 | // every byte from a previous byte, using cost calculations. 174 | // s: the BlockState 175 | // inStart: where to start 176 | // inEnd: where to stop (not inclusive) 177 | // costModel: function to calculate the cost of some lit/len/dist pair. 178 | // costContext: abstract context for the costmodel function 179 | // lengths: output slice of size (inEnd - instart) which will receive the best length to reach this byte from a previous byte. 180 | // returns the cost that was, according to the costmodel, needed to get to the end. 181 | func (s *BlockState) bestLengths(inStart, inEnd int, costModel costModelFun, costContext interface{}) (lengths []uint16) { 182 | // Best cost to get here so far. 183 | if inStart == inEnd { 184 | return nil 185 | } 186 | 187 | blockSize := inEnd - inStart 188 | lengths = make([]uint16, blockSize+1) 189 | costs := make([]float64, blockSize+1) 190 | 191 | var windowStart int 192 | if inStart > WINDOW_SIZE { 193 | windowStart = inStart - WINDOW_SIZE 194 | } 195 | h := newHash(s.block[windowStart], s.block[windowStart+1]) 196 | for i := windowStart; i < inStart; i++ { 197 | h.update(s.block, i, inEnd) 198 | } 199 | 200 | minCost := costModel.minCost(costContext) 201 | infinity := math.Inf(1) 202 | for i := 1; i <= blockSize; i++ { 203 | costs[i] = infinity 204 | } 205 | 206 | sublen := make([]uint16, 259) 207 | for i := inStart; i < inEnd; i++ { 208 | j := i - inStart // Index in the costs slice and lengths. 209 | h.update(s.block, i, inEnd) 210 | cost := costs[j] 211 | 212 | if SHORTCUT_LONG_REPETITIONS { 213 | // If we're in a long repetition of the same character and have 214 | // more than MAX_MATCH characters before and after our position. 215 | if h.same[i&WINDOW_MASK] > MAX_MATCH*2 && 216 | i > inStart+MAX_MATCH+1 && 217 | i+MAX_MATCH*2+1 < inEnd && 218 | h.same[(i-MAX_MATCH)&WINDOW_MASK] > MAX_MATCH { 219 | symbolCost := costModel(lz77Pair{MAX_MATCH, 1}, costContext) 220 | // Set the length to reach each one to MAX_MATCH, and the cost 221 | // to the cost corresponding to that length. Doing this, we 222 | // skip MAX_MATCH values to avoid calling findLongestMatch. 223 | for k := 0; k < MAX_MATCH; k++ { 224 | costs[j+MAX_MATCH] = cost + symbolCost 225 | lengths[j+MAX_MATCH] = MAX_MATCH 226 | i++ 227 | j++ 228 | h.update(s.block, i, inEnd) 229 | cost = costs[j] 230 | } 231 | } 232 | } 233 | 234 | pair := s.findLongestMatch(&h, s.block, i, inEnd, MAX_MATCH, sublen) 235 | leng := pair.litLen 236 | 237 | // Literal. 238 | if i+1 <= inEnd { 239 | newCost := cost + costModel(lz77Pair{uint16(s.block[i]), 0}, costContext) 240 | if !(newCost >= 0) { 241 | panic("new cost is not positive") 242 | } 243 | if newCost < costs[j+1] { 244 | costs[j+1] = newCost 245 | lengths[j+1] = 1 246 | } 247 | } 248 | // Lengths. 249 | for k := uint16(3); k <= leng && i+int(k) <= inEnd; k++ { 250 | // Calling the cost model is expensive, avoid this if we are 251 | // already at the minimum possible cost that it can return. 252 | nextCost := costs[j+int(k)] 253 | if nextCost <= minCost+cost { 254 | continue 255 | } 256 | 257 | newCost := cost + costModel(lz77Pair{k, sublen[k]}, costContext) 258 | if !(newCost >= 0) { 259 | panic("new cost is not positive") 260 | } 261 | if newCost < nextCost { 262 | if k > MAX_MATCH { 263 | panic("k is larger than MAX_MATCH") 264 | } 265 | costs[j+int(k)] = newCost 266 | lengths[j+int(k)] = k 267 | } 268 | } 269 | } 270 | 271 | cost := costs[blockSize] 272 | if !(cost >= 0) { 273 | panic("cost is not positive") 274 | } 275 | if math.IsNaN(cost) { 276 | panic("cost is NaN") 277 | } 278 | if math.IsInf(cost, 0) { 279 | panic("cost is infinite") 280 | } 281 | return lengths 282 | } 283 | 284 | // Calculates the optimal path of lz77 lengths to use, from the calculated 285 | // lengths. The lengths must contain the optimal length to reach that 286 | // byte. The path will be filled with the lengths to use, so its data size will be 287 | // the amount of lz77 symbols. 288 | func traceBackwards(size int, lengths []uint16) []uint16 { 289 | if size == 0 { 290 | return nil 291 | } 292 | 293 | var path []uint16 294 | index := size 295 | for { 296 | path = append(path, lengths[index]) 297 | if int(lengths[index]) > index { 298 | panic("length is greater than index") 299 | } 300 | if lengths[index] > MAX_MATCH { 301 | panic("length is greater than MAX_MATCH") 302 | } 303 | if lengths[index] == 0 { 304 | panic("length is zero") 305 | } 306 | index -= int(lengths[index]) 307 | if index == 0 { 308 | break 309 | } 310 | } 311 | 312 | // Mirror result. 313 | pathSize := len(path) 314 | for index = 0; index < pathSize/2; index++ { 315 | path[index], path[pathSize-index-1] = path[pathSize-index-1], path[index] 316 | } 317 | 318 | return path 319 | } 320 | 321 | func (s *BlockState) followPath(inStart, inEnd int, 322 | path []uint16) LZ77Store { 323 | var store LZ77Store 324 | if inStart == inEnd { 325 | return store 326 | } 327 | 328 | var windowStart int 329 | if inStart > WINDOW_SIZE { 330 | windowStart = inStart - WINDOW_SIZE 331 | } 332 | h := newHash(s.block[windowStart], s.block[windowStart+1]) 333 | for i := windowStart; i < inStart; i++ { 334 | h.update(s.block, i, inEnd) 335 | } 336 | 337 | pos := inStart 338 | for _, length := range path { 339 | if pos >= inEnd { 340 | panic("position overrun") 341 | } 342 | 343 | h.update(s.block, pos, inEnd) 344 | 345 | // Add to output. 346 | if length >= MIN_MATCH { 347 | // Get the distance by recalculating longest match. The 348 | // found length should match the length from the path. 349 | pair := s.findLongestMatch(&h, s.block, pos, inEnd, length, nil) 350 | if pair.litLen != length && length > 2 && pair.litLen > 2 { 351 | panic("dummy length is invalid") 352 | } 353 | pair.Verify(s.block, pos) 354 | store = append(store, pair) 355 | } else { 356 | length = 1 357 | store = append(store, lz77Pair{uint16(s.block[pos]), 0}) 358 | } 359 | 360 | if pos+int(length) > inEnd { 361 | panic("position overrun") 362 | } 363 | for j := 1; j < int(length); j++ { 364 | h.update(s.block, pos+j, inEnd) 365 | } 366 | 367 | pos += int(length) 368 | } 369 | return store 370 | } 371 | 372 | // Calculates the entropy of the statistics 373 | func (stats *symbolStats) calculate() { 374 | stats.llSymbols = CalculateEntropy(stats.litLens) 375 | stats.dSymbols = CalculateEntropy(stats.dists) 376 | } 377 | 378 | // Appends the symbol statistics from the store. 379 | func (store LZ77Store) statistics() (stats symbolStats) { 380 | stats.litLens = make([]float64, 288) 381 | stats.dists = make([]float64, 32) 382 | storeSize := len(store) 383 | for i := 0; i < storeSize; i++ { 384 | if store[i].dist == 0 { 385 | stats.litLens[store[i].litLen]++ 386 | } else { 387 | stats.litLens[store[i].lengthSymbol()]++ 388 | stats.dists[store[i].distSymbol()]++ 389 | } 390 | } 391 | stats.litLens[256] = 1 // End symbol. 392 | 393 | stats.calculate() 394 | return stats 395 | } 396 | 397 | // Does a single run for LZ77Optimal. For good compression, repeated runs 398 | // with updated statistics should be performed. 399 | // 400 | // s: the block state 401 | // inStart: where to start 402 | // inEnd: where to stop (not inclusive) 403 | // lengths: slice of size (inEnd - inStart) used to store lengths 404 | // costModel: function to use as the cost model for this squeeze run 405 | // costContext: abstract context for the costmodel function 406 | // store: place to output the LZ77 data 407 | // returns the cost that was, according to the costmodel, needed to get to the end. 408 | // This is not the actual cost. 409 | func (s *BlockState) lz77OptimalRun(inStart, inEnd int, costModel costModelFun, costContext interface{}) (store LZ77Store) { 410 | lengths := s.bestLengths(inStart, inEnd, costModel, costContext) 411 | path := traceBackwards(inEnd-inStart, lengths) 412 | store = s.followPath(inStart, inEnd, path) 413 | return store 414 | } 415 | 416 | // Calculates lit/len and dist pairs for given data. 417 | // If instart is larger than 0, it uses values before instart as starting 418 | // dictionary. 419 | func (s *BlockState) LZ77Optimal(inStart, inEnd int) LZ77Store { 420 | // Dist to get to here with smallest cost. 421 | bestCost := uint64(math.MaxUint64) 422 | var lastCost uint64 423 | // Try randomizing the costs a bit once the size stabilizes. 424 | var randomize bool 425 | 426 | ranState := newRanState() 427 | 428 | // Do regular deflate, then loop multiple shortest path 429 | // runs, each time using the statistics of the previous run. 430 | 431 | // Initial run. 432 | bestStore := s.LZ77Greedy(inStart, inEnd) 433 | bestStats := bestStore.statistics() 434 | lastStats := bestStats 435 | 436 | // Repeat statistics with each time the cost model 437 | // from the previous stat run. 438 | for i := 0; i < s.options.NumIterations; i++ { 439 | store := s.lz77OptimalRun(inStart, inEnd, costStat, lastStats) 440 | cost := store.CalculateBlockSize(2) 441 | if s.options.VerboseMore || (s.options.Verbose && cost < bestCost) { 442 | fmt.Fprintf(os.Stderr, "Iteration %d: %d bit\n", i, int(cost)) 443 | } 444 | stats := store.statistics() 445 | if cost < bestCost { 446 | // Copy to the output store. 447 | bestStore = store 448 | bestStats = stats 449 | bestCost = cost 450 | } 451 | if i > 5 && cost == lastCost { 452 | lastStats = bestStats 453 | lastStats.randomizeFreqs(ranState) 454 | lastStats.calculate() 455 | randomize = true 456 | } else { 457 | if randomize { 458 | // This makes it converge slower but better. Do it only once the 459 | // randomness kicks in so that if the user does few iterations, it gives a 460 | // better result sooner. 461 | stats = addWeightedFreqs(stats, 1.0, lastStats, 0.5) 462 | stats.calculate() 463 | } 464 | lastStats = stats 465 | } 466 | lastCost = cost 467 | } 468 | return bestStore 469 | } 470 | 471 | // Does the same as LZ77Optimal, but optimized for the fixed tree of the 472 | // deflate standard. 473 | // The fixed tree never gives the best compression. But this gives the best 474 | // possible LZ77 encoding possible with the fixed tree. 475 | // This does not create or output any fixed tree, only LZ77 data optimized for 476 | // using with a fixed tree. 477 | // If inStart is larger than 0, it uses values before inStart as starting 478 | // dictionary. 479 | func (s *BlockState) LZ77OptimalFixed(inStart, inEnd int) LZ77Store { 480 | // Dist to get to here with smallest cost. 481 | // Shortest path for fixed tree This one should give the shortest possible 482 | // result for fixed tree, no repeated runs are needed since the tree is known. 483 | return s.lz77OptimalRun(inStart, inEnd, costFixed, nil) 484 | } 485 | -------------------------------------------------------------------------------- /zopfli/tree.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "math" 24 | ) 25 | 26 | // Converts a series of Huffman tree bitLengths, to the bit values of the symbols. 27 | func lengthsToSymbols(lengths []uint, maxBits uint) (symbols []uint) { 28 | n := len(lengths) 29 | blCount := make([]uint, maxBits+1) 30 | nextCode := make([]uint, maxBits+1) 31 | 32 | symbols = make([]uint, n) 33 | 34 | // 1) Count the number of codes for each code length. 35 | // Let blCount[N] be the number of codes of length N, N >= 1. 36 | for bits := uint(0); bits <= maxBits; bits++ { 37 | blCount[bits] = 0 38 | } 39 | for i := 0; i < n; i++ { 40 | if lengths[i] > maxBits { 41 | panic("length is too large") 42 | } 43 | blCount[lengths[i]]++ 44 | } 45 | // 2) Find the numerical value of the smallest code for each code length. 46 | var code uint 47 | blCount[0] = 0 48 | for bits := uint(1); bits <= maxBits; bits++ { 49 | code = (code + blCount[bits-1]) << 1 50 | nextCode[bits] = code 51 | } 52 | // 3) Assign numerical values to all codes, using consecutive values for 53 | // all codes of the same length with the base values determined at step 2. 54 | for i := 0; i < n; i++ { 55 | len := lengths[i] 56 | if len != 0 { 57 | symbols[i] = nextCode[len] 58 | nextCode[len]++ 59 | } 60 | } 61 | return symbols 62 | } 63 | 64 | // Calculates the entropy of each symbol, based on the counts of each symbol. The 65 | // result is similar to the result of CalculateBitLengths, but with the 66 | // actual theoritical bit lengths according to the entropy. Since the resulting 67 | // values are fractional, they cannot be used to encode the tree specified by 68 | // DEFLATE. 69 | func CalculateEntropy(count []float64) (bitLengths []float64) { 70 | var sum, log2sum float64 71 | n := len(count) 72 | for i := 0; i < n; i++ { 73 | sum += count[i] 74 | } 75 | if sum == 0 { 76 | log2sum = math.Log2(float64(n)) 77 | } else { 78 | log2sum = math.Log2(sum) 79 | } 80 | bitLengths = make([]float64, n) 81 | for i := 0; i < n; i++ { 82 | // When the count of the symbol is 0, but its cost is requested anyway, it 83 | // means the symbol will appear at least once anyway, so give it the cost as if 84 | // its count is 1. 85 | if count[i] == 0 { 86 | bitLengths[i] = log2sum 87 | } else { 88 | bitLengths[i] = math.Log2(sum / count[i]) 89 | } 90 | if !(bitLengths[i] >= 0) { 91 | panic("bit length is not positive") 92 | } 93 | } 94 | return bitLengths 95 | } 96 | -------------------------------------------------------------------------------- /zopfli/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | const ( 23 | // Minimum and maximum length that can be encoded in deflate. 24 | MAX_MATCH = 258 25 | MIN_MATCH = 3 26 | 27 | // The window size for deflate. Must be a power of two. This should be 28 | // 32768, the maximum possible by the deflate spec. Anything less hurts 29 | // compression more than speed. 30 | WINDOW_SIZE = 32768 31 | 32 | // The window mask used to wrap indices into the window. This is why the 33 | // window size must be a power of two. 34 | WINDOW_MASK = (WINDOW_SIZE - 1) 35 | 36 | // A block structure of huge, non-smart, blocks to divide the input into, to allow 37 | // operating on huge files without exceeding memory, such as the 1GB wiki9 corpus. 38 | // The whole compression algorithm, including the smarter block splitting, will 39 | // be executed independently on each huge block. 40 | // Dividing into huge blocks hurts compression, but not much relative to the size. 41 | // Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks. 42 | MASTER_BLOCK_SIZE = 20000000 43 | 44 | // For longest match cache. max 256. Uses huge amounts of memory but makes it 45 | // faster. Uses this many times three bytes per single byte of the input data. 46 | // This is so because longest match finding has to find the exact distance 47 | // that belongs to each length for the best lz77 strategy. 48 | // Good values: e.g. 5, 8. 49 | CACHE_LENGTH = 8 50 | 51 | // limit the max hash chain hits for this hash value. This has an effect only 52 | // on files where the hash value is the same very often. On these files, this 53 | // gives worse compression (the value should ideally be 32768, which is the 54 | // WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it 55 | // faster on some specific files. 56 | // Good value: e.g. 8192. 57 | MAX_CHAIN_HITS = 8192 58 | 59 | // Whether to use the longest match cache for FindLongestMatch. This cache 60 | // consumes a lot of memory but speeds it up. No effect on compression size. 61 | LONGEST_MATCH_CACHE = true 62 | 63 | // Enable to remember amount of successive identical bytes in the hash chain for 64 | // finding longest match 65 | // required for HASH_SAME_HASH and SHORTCUT_LONG_REPETITIONS 66 | // This has no effect on the compression result, and enabling it increases speed. 67 | HASH_SAME = true 68 | 69 | // Switch to a faster hash based on the info from HASH_SAME once the 70 | // best length so far is long enough. This is way faster for files with lots of 71 | // identical bytes, on which the compressor is otherwise too slow. Regular files 72 | // are unaffected or maybe a tiny bit slower. 73 | // This has no effect on the compression result, only on speed. 74 | HASH_SAME_HASH = true 75 | 76 | // Enable this, to avoid slowness for files which are a repetition of the same 77 | // character more than a multiple of MAX_MATCH times. This should not affect 78 | // the compression result. 79 | SHORTCUT_LONG_REPETITIONS = true 80 | 81 | // Whether to use lazy matching in the greedy LZ77 implementation. This gives a 82 | // better result of LZ77Greedy, but the effect this has on the optimal LZ77 83 | // varies from file to file. 84 | LAZY_MATCHING = true 85 | ) 86 | 87 | // Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. 88 | func (pair lz77Pair) distExtraBits() uint16 { 89 | dist := pair.dist 90 | if dist < 5 { 91 | return 0 92 | } else if dist < 9 { 93 | return 1 94 | } else if dist < 17 { 95 | return 2 96 | } else if dist < 33 { 97 | return 3 98 | } else if dist < 65 { 99 | return 4 100 | } else if dist < 129 { 101 | return 5 102 | } else if dist < 257 { 103 | return 6 104 | } else if dist < 513 { 105 | return 7 106 | } else if dist < 1025 { 107 | return 8 108 | } else if dist < 2049 { 109 | return 9 110 | } else if dist < 4097 { 111 | return 10 112 | } else if dist < 8193 { 113 | return 11 114 | } else if dist < 16385 { 115 | return 12 116 | } 117 | return 13 118 | } 119 | 120 | // Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. 121 | func (pair lz77Pair) distExtraBitsValue() uint16 { 122 | dist := pair.dist 123 | switch { 124 | case dist < 5: 125 | return 0 126 | case dist < 9: 127 | return (dist - 5) & 1 128 | case dist < 17: 129 | return (dist - 9) & 3 130 | case dist < 33: 131 | return (dist - 17) & 7 132 | case dist < 65: 133 | return (dist - 33) & 15 134 | case dist < 129: 135 | return (dist - 65) & 31 136 | case dist < 257: 137 | return (dist - 129) & 63 138 | case dist < 513: 139 | return (dist - 257) & 127 140 | case dist < 1025: 141 | return (dist - 513) & 255 142 | case dist < 2049: 143 | return (dist - 1025) & 511 144 | case dist < 4097: 145 | return (dist - 2049) & 1023 146 | case dist < 8193: 147 | return (dist - 4097) & 2047 148 | case dist < 16385: 149 | return (dist - 8193) & 4095 150 | } 151 | return dist - 16385&8191 152 | } 153 | 154 | // Gets the symbol for the given dist, cfr. the DEFLATE spec. 155 | func (pair lz77Pair) distSymbol() uint16 { 156 | dist := pair.dist 157 | if dist < 193 { 158 | if dist < 13 { 159 | // dist 0..13. 160 | if dist < 5 { 161 | return dist - 1 162 | } else if dist < 7 { 163 | return 4 164 | } else if dist < 9 { 165 | return 5 166 | } 167 | return 6 168 | } else { 169 | // dist 13..193. 170 | if dist < 17 { 171 | return 7 172 | } else if dist < 25 { 173 | return 8 174 | } else if dist < 33 { 175 | return 9 176 | } else if dist < 49 { 177 | return 10 178 | } else if dist < 65 { 179 | return 11 180 | } else if dist < 97 { 181 | return 12 182 | } else if dist < 129 { 183 | return 13 184 | } 185 | return 14 186 | } 187 | } 188 | if dist < 2049 { 189 | // dist 193..2049. 190 | if dist < 257 { 191 | return 15 192 | } else if dist < 385 { 193 | return 16 194 | } else if dist < 513 { 195 | return 17 196 | } else if dist < 769 { 197 | return 18 198 | } else if dist < 1025 { 199 | return 19 200 | } else if dist < 1537 { 201 | return 20 202 | } 203 | return 21 204 | } 205 | // dist 2049..32768. 206 | if dist < 3073 { 207 | return 22 208 | } else if dist < 4097 { 209 | return 23 210 | } else if dist < 6145 { 211 | return 24 212 | } else if dist < 8193 { 213 | return 25 214 | } else if dist < 12289 { 215 | return 26 216 | } else if dist < 16385 { 217 | return 27 218 | } else if dist < 24577 { 219 | return 28 220 | } 221 | return 29 222 | } 223 | 224 | var lengthExtraBitsTable [259]uint16 = [259]uint16{ 225 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 226 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 227 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 228 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 229 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 230 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 231 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 232 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 233 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 234 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 235 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 236 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 237 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 238 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 239 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 240 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 241 | } 242 | 243 | // Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. 244 | func (pair lz77Pair) lengthExtraBits() uint16 { 245 | return lengthExtraBitsTable[pair.litLen] 246 | } 247 | 248 | var lengthExtraBitsValueTable [259]uint16 = [259]uint16{ 249 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0, 250 | 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 251 | 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 252 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 253 | 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 254 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 256 | 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 257 | 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 258 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 259 | 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 260 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 261 | } 262 | 263 | // Gets value of the extra bits for the given length, cfr. the DEFLATE spec. 264 | func (pair lz77Pair) lengthExtraBitsValue() uint16 { 265 | return lengthExtraBitsValueTable[pair.litLen] 266 | } 267 | 268 | var lengthSymbolTable [259]uint16 = [259]uint16{ 269 | 0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264, 270 | 265, 265, 266, 266, 267, 267, 268, 268, 271 | 269, 269, 269, 269, 270, 270, 270, 270, 272 | 271, 271, 271, 271, 272, 272, 272, 272, 273 | 273, 273, 273, 273, 273, 273, 273, 273, 274 | 274, 274, 274, 274, 274, 274, 274, 274, 275 | 275, 275, 275, 275, 275, 275, 275, 275, 276 | 276, 276, 276, 276, 276, 276, 276, 276, 277 | 277, 277, 277, 277, 277, 277, 277, 277, 278 | 277, 277, 277, 277, 277, 277, 277, 277, 279 | 278, 278, 278, 278, 278, 278, 278, 278, 280 | 278, 278, 278, 278, 278, 278, 278, 278, 281 | 279, 279, 279, 279, 279, 279, 279, 279, 282 | 279, 279, 279, 279, 279, 279, 279, 279, 283 | 280, 280, 280, 280, 280, 280, 280, 280, 284 | 280, 280, 280, 280, 280, 280, 280, 280, 285 | 281, 281, 281, 281, 281, 281, 281, 281, 286 | 281, 281, 281, 281, 281, 281, 281, 281, 287 | 281, 281, 281, 281, 281, 281, 281, 281, 288 | 281, 281, 281, 281, 281, 281, 281, 281, 289 | 282, 282, 282, 282, 282, 282, 282, 282, 290 | 282, 282, 282, 282, 282, 282, 282, 282, 291 | 282, 282, 282, 282, 282, 282, 282, 282, 292 | 282, 282, 282, 282, 282, 282, 282, 282, 293 | 283, 283, 283, 283, 283, 283, 283, 283, 294 | 283, 283, 283, 283, 283, 283, 283, 283, 295 | 283, 283, 283, 283, 283, 283, 283, 283, 296 | 283, 283, 283, 283, 283, 283, 283, 283, 297 | 284, 284, 284, 284, 284, 284, 284, 284, 298 | 284, 284, 284, 284, 284, 284, 284, 284, 299 | 284, 284, 284, 284, 284, 284, 284, 284, 300 | 284, 284, 284, 284, 284, 284, 284, 285, 301 | } 302 | 303 | // Gets the symbol for the given length, cfr. the DEFLATE spec. 304 | // Returns the symbol in the range [257-285] (inclusive) 305 | func (pair lz77Pair) lengthSymbol() uint16 { 306 | return lengthSymbolTable[pair.litLen] 307 | } 308 | 309 | func DefaultOptions() (options Options) { 310 | options.Verbose = false 311 | options.VerboseMore = false 312 | options.NumIterations = 15 313 | options.BlockSplitting = true 314 | options.BlockSplittingLast = false 315 | options.BlockSplittingMax = 15 316 | options.BlockType = DYNAMIC_BLOCK 317 | return options 318 | } 319 | -------------------------------------------------------------------------------- /zopfli/zlib_container.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "fmt" 24 | "hash/adler32" 25 | "io" 26 | "os" 27 | ) 28 | 29 | func ZlibCompress(options *Options, in []byte, out io.Writer) error { 30 | var counter countingWriter 31 | if options.Verbose { 32 | counter = newCountingWriter(out) 33 | out = &counter 34 | } 35 | 36 | const cmf = 120 /* CM 8, CINFO 7. See zlib spec.*/ 37 | const flevel = 0 38 | const fdict = 0 39 | var cmfflg uint16 = 256*cmf + fdict*32 + flevel*64 40 | fcheck := 31 - cmfflg%31 41 | cmfflg += fcheck 42 | flagBytes := []byte{ 43 | byte(cmfflg >> 8), 44 | byte(cmfflg), 45 | } 46 | _, flagErr := out.Write(flagBytes) 47 | if flagErr != nil { 48 | return flagErr 49 | } 50 | 51 | z := NewDeflator(out, options) 52 | writeErr := z.Deflate(true, in) 53 | if writeErr != nil { 54 | return writeErr 55 | } 56 | 57 | checksum := adler32.New() 58 | checksum.Write(in) 59 | final := checksum.Sum32() 60 | checksumBytes := []byte{ 61 | byte(final >> 24), 62 | byte(final >> 16), 63 | byte(final >> 8), 64 | byte(final), 65 | } 66 | _, checksumErr := out.Write(checksumBytes) 67 | if checksumErr != nil { 68 | return checksumErr 69 | } 70 | 71 | if options.Verbose { 72 | inSize := len(in) 73 | outSize := counter.written 74 | fmt.Fprintf(os.Stderr, 75 | "Original Size: %d, Zlib: %d, Compression: %f%% Removed\n", 76 | inSize, outSize, 77 | 100*float64(inSize-outSize)/float64(inSize)) 78 | } 79 | return nil 80 | } 81 | -------------------------------------------------------------------------------- /zopfli/zopfli.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | // Options used throughout the program. 23 | type Options struct { 24 | // Whether to print output 25 | Verbose bool 26 | 27 | // Whether to print more detailed output 28 | VerboseMore bool 29 | 30 | // Maximum amount of times to rerun forward and backward pass to optimize 31 | // LZ77 compression cost. Good values: 10, 15 for small files, 5 for files 32 | // over several MB in size or it will be too slow. 33 | NumIterations int 34 | 35 | // If true, splits the data in multiple deflate blocks with optimal choice 36 | // for the block boundaries. Block splitting gives better compression. Default: 37 | // true. 38 | BlockSplitting bool 39 | 40 | // If true, chooses the optimal block split points only after doing the iterative 41 | // LZ77 compression. If false, chooses the block split points first, then does 42 | // iterative LZ77 on each individual block. Depending on the file, either first 43 | // or last gives the best compression. Default: false. 44 | BlockSplittingLast bool 45 | 46 | // Maximum amount of blocks to split into (0 for unlimited, but this can give 47 | // extreme results that hurt compression on some files). Default value: 15. 48 | BlockSplittingMax int 49 | 50 | // The deflate block type. Use 2 for best compression. 51 | // -0: non compressed blocks (00) 52 | // -1: blocks with fixed tree (01) 53 | // -2: blocks with dynamic tree (10) 54 | BlockType byte 55 | } 56 | 57 | // Output format 58 | const ( 59 | FORMAT_GZIP = iota 60 | FORMAT_ZLIB 61 | FORMAT_DEFLATE 62 | ) 63 | 64 | // Block type 65 | const ( 66 | UNCOMPRESSED_BLOCK = iota 67 | FIXED_BLOCK 68 | DYNAMIC_BLOCK 69 | ) 70 | -------------------------------------------------------------------------------- /zopfli/zopfli_lib.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | package zopfli 21 | 22 | import ( 23 | "io" 24 | ) 25 | 26 | type countingWriter struct { 27 | w io.Writer 28 | written int 29 | } 30 | 31 | func newCountingWriter(w io.Writer) countingWriter { 32 | return countingWriter{w, 0} 33 | } 34 | 35 | func (cw *countingWriter) Write(p []byte) (int, error) { 36 | cw.written += len(p) 37 | return cw.w.Write(p) 38 | } 39 | 40 | func Compress(options *Options, outputType int, in []byte, out io.Writer) error { 41 | switch outputType { 42 | case FORMAT_GZIP: 43 | return GzipCompress(options, in, out) 44 | case FORMAT_ZLIB: 45 | return ZlibCompress(options, in, out) 46 | case FORMAT_DEFLATE: 47 | return DeflateCompress(options, in, out) 48 | } 49 | panic("Unknown output type") 50 | } 51 | 52 | func DeflateCompress(options *Options, in []byte, out io.Writer) error { 53 | z := NewDeflator(out, options) 54 | deflateErr := z.Deflate(true, in) 55 | if deflateErr != nil { 56 | return deflateErr 57 | } 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /zopfli_bin.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011 Google Inc. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18 | */ 19 | 20 | /* 21 | Zopfli compressor program. It can output gzip-, zlib- or deflate-compatible 22 | data. By default it creates a .gz file. This tool can only compress, not 23 | decompress. Decompression can be done by any standard gzip, zlib or deflate 24 | decompressor. 25 | */ 26 | 27 | package main 28 | 29 | import ( 30 | "bytes" 31 | "flag" 32 | "fmt" 33 | "github.com/foobaz/go-zopfli/zopfli" 34 | "io" 35 | "io/ioutil" 36 | "os" 37 | "runtime" 38 | "runtime/pprof" 39 | ) 40 | 41 | var parallel bool 42 | 43 | // outfilename: filename to write output to, or 0 to write to stdout instead 44 | func compressFile(options *zopfli.Options, outputType int, 45 | inFileName, outFileName string) error { 46 | in, inErr := ioutil.ReadFile(inFileName) 47 | if inErr != nil { 48 | return inErr 49 | } 50 | 51 | var out io.WriteCloser 52 | if outFileName == "" { 53 | out = os.Stdout 54 | } else { 55 | var outErr error 56 | out, outErr = os.Create(outFileName) 57 | if outErr != nil { 58 | return outErr 59 | } 60 | defer out.Close() 61 | } 62 | 63 | nJobs := 1 64 | if parallel { 65 | nJobs = runtime.GOMAXPROCS(-1) 66 | } 67 | chunk := len(in) / nJobs 68 | type job struct { 69 | in []byte 70 | w *bytes.Buffer 71 | err error 72 | done chan struct{} 73 | } 74 | jobs := make([]job, nJobs) 75 | 76 | offset := 0 77 | for jbnum := 0; jbnum < nJobs; jbnum++ { 78 | end := offset + chunk 79 | if end > len(in) { 80 | end = len(in) 81 | } 82 | 83 | jobs[jbnum].in = in[offset:end] 84 | jobs[jbnum].w = new(bytes.Buffer) 85 | jobs[jbnum].done = make(chan struct{}) 86 | 87 | go func(j *job) { 88 | j.err = zopfli.Compress(options, outputType, j.in, j.w) 89 | close(j.done) 90 | }(&jobs[jbnum]) 91 | 92 | offset += chunk 93 | } 94 | 95 | // Collect the output, concatenate into the output io.Writer 96 | // gzip file format supports concatenation transparently: 97 | // https://www.gnu.org/software/gzip/manual/gzip.html#Advanced-usage 98 | for i := range jobs { 99 | // Note: It seems like the above could be "for _,j := range jobs", 100 | // but that would be a data race, because j.err would have an old value 101 | // when we wake up from sleeping on done. Instead, use an array index 102 | // so that each access to jobs[i] respects the happens-before ordering. 103 | <-jobs[i].done 104 | if jobs[i].err != nil { 105 | return jobs[i].err 106 | } 107 | _, err := io.Copy(out, jobs[i].w) 108 | if err != nil { 109 | return err 110 | } 111 | } 112 | 113 | return nil 114 | } 115 | 116 | func main() { 117 | options := zopfli.DefaultOptions() 118 | 119 | flag.BoolVar(&options.Verbose, "v", options.Verbose, "verbose mode") 120 | flag.BoolVar(&options.VerboseMore, "vv", options.VerboseMore, "more verbose mode") 121 | outputToStdout := flag.Bool("c", false, "write the result on standard output, instead of disk") 122 | deflate := flag.Bool("deflate", false, "output to deflate format instead of gzip") 123 | zlib := flag.Bool("zlib", false, "output to zlib format instead of gzip") 124 | gzip := flag.Bool("gzip", true, "output to gzip format") 125 | flag.BoolVar(&options.BlockSplittingLast, "splitlast", options.BlockSplittingLast, "do block splitting last instead of first") 126 | flag.IntVar(&options.NumIterations, "i", options.NumIterations, "perform # iterations (default 15). More gives more compression but is slower. Examples: -i=10, -i=50, -i=1000") 127 | var cpuProfile string 128 | flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to file") 129 | flag.BoolVar(¶llel, "parallel", false, "compress in parallel (gzip only); use GOMAXPROCS to set the amount of parallelism. More parallelism = smaller independent chunks, thus worse compression ratio.") 130 | flag.Parse() 131 | 132 | if parallel && !*gzip { 133 | fmt.Fprintf(os.Stderr, "Error: parallel is only supported with gzip containers.") 134 | return 135 | } 136 | 137 | if options.VerboseMore { 138 | options.Verbose = true 139 | } 140 | var outputType int 141 | if *deflate && !*zlib && !*gzip { 142 | outputType = zopfli.FORMAT_DEFLATE 143 | } else if *zlib && !*deflate && !*gzip { 144 | outputType = zopfli.FORMAT_ZLIB 145 | } else { 146 | outputType = zopfli.FORMAT_GZIP 147 | } 148 | 149 | if options.NumIterations < 1 { 150 | fmt.Fprintf(os.Stderr, "Error: must have 1 or more iterations") 151 | return 152 | } 153 | 154 | var allFileNames []string 155 | if *outputToStdout { 156 | allFileNames = append(allFileNames, "") 157 | } else { 158 | allFileNames = flag.Args() 159 | } 160 | if len(allFileNames) <= 0 { 161 | fmt.Fprintf(os.Stderr, "Please provide filename\n") 162 | } 163 | if cpuProfile != "" { 164 | f, err := os.Create(cpuProfile) 165 | if err == nil { 166 | pprof.StartCPUProfile(f) 167 | defer f.Close() 168 | defer pprof.StopCPUProfile() 169 | } 170 | } 171 | for _, fileName := range allFileNames { 172 | var outFileName string 173 | if *outputToStdout { 174 | outFileName = "" 175 | } else { 176 | switch outputType { 177 | case zopfli.FORMAT_GZIP: 178 | outFileName = fileName + ".gz" 179 | case zopfli.FORMAT_ZLIB: 180 | outFileName = fileName + ".zlib" 181 | case zopfli.FORMAT_DEFLATE: 182 | outFileName = fileName + ".deflate" 183 | default: 184 | panic("Unknown output type") 185 | } 186 | if options.Verbose { 187 | fmt.Fprintf(os.Stderr, "Saving to: %s\n", outFileName) 188 | } 189 | } 190 | compressErr := compressFile(&options, outputType, fileName, outFileName) 191 | if compressErr != nil { 192 | fmt.Fprintf(os.Stderr, "could not compress %s: %v\n", fileName, compressErr) 193 | } 194 | } 195 | } 196 | --------------------------------------------------------------------------------