├── .gitignore
├── README.md
├── zopfli
    ├── blocksplitter.go
    ├── cache.go
    ├── deflate.go
    ├── gzip_container.go
    ├── hash.go
    ├── katajainen.go
    ├── lz77.go
    ├── squeeze.go
    ├── tree.go
    ├── util.go
    ├── zlib_container.go
    ├── zopfli.go
    └── zopfli_lib.go
└── zopfli_bin.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | go-zopfli
 2 | =========
 3 | 
 4 | Go port of Zopfli, a zlib-compatible compression library.
 5 | 
 6 | Zopfli compresses data more effectively than zlib does, at the expense of
 7 | compression speed. The go-zopfli port is 2-3 times slower than the C version,
 8 | and compresses at approximately 100 kB/s.
 9 | 
10 | It can be used to compress files that will not change often or if you need a
11 | lower-level interface to Deflate compression.
12 | 
13 | [See the package documentation.](http://godoc.org/github.com/foobaz/go-zopfli/zopfli)
14 | 


--------------------------------------------------------------------------------
/zopfli/blocksplitter.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | import (
 23 | 	"fmt"
 24 | 	"math"
 25 | 	"os"
 26 | )
 27 | 
 28 | // The "f" for the findMinimum function below.
 29 | // i: the current parameter of f(i)
 30 | // context: for your implementation
 31 | type findMinimumFun func(i int, context interface{}) uint64
 32 | 
 33 | // Finds minimum of function f(i) where i is of type int, f(i) is of type
 34 | // float64, i is in range start-end (excluding end).
 35 | func findMinimum(f findMinimumFun, context interface{}, start, end int) int {
 36 | 	if end-start < 1024 {
 37 | 		best := uint64(math.MaxUint64)
 38 | 		result := start
 39 | 		for i := start; i < end; i++ {
 40 | 			v := f(i, context)
 41 | 			if v < best {
 42 | 				best = v
 43 | 				result = i
 44 | 			}
 45 | 		}
 46 | 		return result
 47 | 	}
 48 | 
 49 | 	// Try to find minimum faster by recursively checking multiple points.
 50 | 	const NUM = 9 // Good value: 9.
 51 | 	var p [NUM]int
 52 | 	var vp [NUM]uint64
 53 | 	lastBest := uint64(math.MaxUint64)
 54 | 	pos := start
 55 | 
 56 | 	for end-start > NUM {
 57 | 		for i := 0; i < NUM; i++ {
 58 | 			p[i] = start + (i+1)*((end-start)/(NUM+1))
 59 | 			vp[i] = f(p[i], context)
 60 | 		}
 61 | 		var bestIndex int
 62 | 		best := vp[0]
 63 | 		for i := 1; i < NUM; i++ {
 64 | 			if vp[i] < best {
 65 | 				best = vp[i]
 66 | 				bestIndex = i
 67 | 			}
 68 | 		}
 69 | 		if best > lastBest {
 70 | 			break
 71 | 		}
 72 | 
 73 | 		if bestIndex > 0 {
 74 | 			start = p[bestIndex-1]
 75 | 		}
 76 | 		if bestIndex < NUM-1 {
 77 | 			end = p[bestIndex+1]
 78 | 		}
 79 | 
 80 | 		pos = p[bestIndex]
 81 | 		lastBest = best
 82 | 	}
 83 | 	return pos
 84 | }
 85 | 
 86 | // Returns estimated cost of a block in bits.	It includes the size to encode the
 87 | // tree and the size to encode all literal, length and distance symbols and their
 88 | // extra bits.
 89 | //
 90 | // litLens: lz77 lit/lengths
 91 | // dists: ll77 distances
 92 | func (store LZ77Store) estimateCost() uint64 {
 93 | 	return store.CalculateBlockSize(2)
 94 | }
 95 | 
 96 | type splitCostContext struct {
 97 | 	store      LZ77Store
 98 | 	start, end int
 99 | }
100 | 
101 | // Gets the cost which is the sum of the cost of the left and the right section
102 | // of the data.
103 | // type: findMinimumFun
104 | func splitCost(i int, context interface{}) uint64 {
105 | 	c := context.(*splitCostContext)
106 | 	a := c.store[c.start:i]
107 | 	b := c.store[i:c.end]
108 | 	return a.estimateCost() + b.estimateCost()
109 | }
110 | 
111 | func addSorted(splitPoints []int, value int) []int {
112 | 	oldSize := len(splitPoints)
113 | 	splitPoints = append(splitPoints, value)
114 | 	for i := 0; i < oldSize; i++ {
115 | 		if splitPoints[i] > value {
116 | 			copy(splitPoints[i+1:], splitPoints[i:])
117 | 			splitPoints[i] = value
118 | 			break
119 | 		}
120 | 	}
121 | 	return splitPoints
122 | }
123 | 
124 | // Prints the block split points as decimal and hex values in the terminal.
125 | func (store LZ77Store) printBlockSplitPoints(lz77SplitPoints []int) {
126 | 	llSize := len(store)
127 | 	nLZ77Points := len(lz77SplitPoints)
128 | 	splitPoints := make([]int, 0, nLZ77Points)
129 | 	// The input is given as lz77 indices, but we want to see the
130 | 	// uncompressed index values.
131 | 	if nLZ77Points > 0 {
132 | 		var pos int
133 | 		for i := 0; i < llSize; i++ {
134 | 			var length int
135 | 			if store[i].dist == 0 {
136 | 				length = 1
137 | 			} else {
138 | 				length = int(store[i].litLen)
139 | 			}
140 | 			if lz77SplitPoints[len(splitPoints)] == i {
141 | 				splitPoints = append(splitPoints, pos)
142 | 				if len(splitPoints) >= nLZ77Points {
143 | 					break
144 | 				}
145 | 			}
146 | 			pos += length
147 | 		}
148 | 	}
149 | 	if len(splitPoints) != nLZ77Points {
150 | 		panic("number of points do not match")
151 | 	}
152 | 
153 | 	fmt.Fprintf(os.Stderr, "block split points: ")
154 | 	for _, point := range splitPoints {
155 | 		fmt.Fprintf(os.Stderr, "%d ", point)
156 | 	}
157 | 	fmt.Fprintf(os.Stderr, "(hex:")
158 | 	for _, point := range splitPoints {
159 | 		fmt.Fprintf(os.Stderr, " %x", point)
160 | 	}
161 | 	fmt.Fprintf(os.Stderr, ")\n")
162 | }
163 | 
164 | // Finds next block to try to split, the largest of the available ones.
165 | // The largest is chosen to make sure that if only a limited amount of blocks is
166 | // requested, their sizes are spread evenly.
167 | // llSize: the size of the LL77 data, which is the size of the done array here.
168 | // done: array indicating which blocks starting at that position are no longer
169 | // splittable (splitting them increases rather than decreases cost).
170 | // splitPoints: the splitpoints found so far.
171 | // nPoints: the amount of splitpoints found so far.
172 | // lStart: output variable, giving start of block.
173 | // lEnd: output variable, giving end of block.
174 | // returns 1 if a block was found, 0 if no block found (all are done).
175 | func findLargestSplittableBlock(llSize int, done []bool, splitPoints []int) (lStart, lEnd int, found bool) {
176 | 	var longest int
177 | 	nPoints := len(splitPoints)
178 | 	for i := 0; i <= nPoints; i++ {
179 | 		var start, end int
180 | 		if i != 0 {
181 | 			start = splitPoints[i-1]
182 | 		}
183 | 		if i == nPoints {
184 | 			end = llSize - 1
185 | 		} else {
186 | 			end = splitPoints[i]
187 | 		}
188 | 		if !done[start] && end > longest+start {
189 | 			lStart = start
190 | 			lEnd = end
191 | 			found = true
192 | 			longest = end - start
193 | 		}
194 | 	}
195 | 	return lStart, lEnd, found
196 | }
197 | 
198 | func (store LZ77Store) blockSplitLZ77(options *Options, maxBlocks int) []int {
199 | 	llSize := len(store)
200 | 	if llSize < 10 {
201 | 		// This code fails on tiny files.
202 | 		return nil
203 | 	}
204 | 
205 | 	done := make([]bool, llSize)
206 | 
207 | 	var splitPoints []int
208 | 	var lStart int
209 | 	lEnd := llSize
210 | 	for {
211 | 		if maxBlocks > 0 && len(splitPoints)+1 >= maxBlocks {
212 | 			break
213 | 		}
214 | 
215 | 		var c splitCostContext
216 | 		c.store = store
217 | 		c.start = lStart
218 | 		c.end = lEnd
219 | 		if lStart >= lEnd {
220 | 			panic("overrun")
221 | 		}
222 | 		llPos := findMinimum(splitCost, &c, lStart+1, lEnd)
223 | 
224 | 		if llPos <= lStart {
225 | 			panic("underrun")
226 | 		}
227 | 		if llPos >= lEnd {
228 | 			panic("overrun")
229 | 		}
230 | 
231 | 		a := store[lStart:llPos]
232 | 		b := store[llPos:lEnd]
233 | 		splitCost := a.estimateCost() + b.estimateCost()
234 | 		both := store[lStart:lEnd]
235 | 		origCost := both.estimateCost()
236 | 
237 | 		if splitCost > origCost || llPos == lStart+1 || llPos == lEnd {
238 | 			done[lStart] = true
239 | 		} else {
240 | 			splitPoints = addSorted(splitPoints, llPos)
241 | 		}
242 | 
243 | 		var found bool
244 | 		lStart, lEnd, found = findLargestSplittableBlock(llSize, done, splitPoints)
245 | 		if !found {
246 | 			// No further split will probably reduce compression.
247 | 			break
248 | 		}
249 | 
250 | 		if lEnd < lStart+10 {
251 | 			break
252 | 		}
253 | 	}
254 | 
255 | 	if options.Verbose {
256 | 		store.printBlockSplitPoints(splitPoints)
257 | 	}
258 | 	return splitPoints
259 | }
260 | 
261 | // Does blocksplitting on uncompressed data.
262 | // The output splitpoints are indices in the uncompressed bytes.
263 | //
264 | // options: general program options.
265 | // in: uncompressed input data
266 | // inStart: where to start splitting
267 | // inEnd: where to end splitting (not inclusive)
268 | // maxBlocks: maximum amount of blocks to split into, or 0 for no limit
269 | // splitPoints: dynamic array to put the resulting split point coordinates into.
270 | //   The coordinates are indices in the input array.
271 | func blockSplit(options *Options, in []byte, inStart, inEnd, maxBlocks int) []int {
272 | 	s := NewBlockState(options, in, inStart, inEnd)
273 | 
274 | 	// Unintuitively, using a simple LZ77 method here instead of LZ77Optimal
275 | 	// results in better blocks.
276 | 	store := s.LZ77Greedy(inStart, inEnd)
277 | 	lz77SplitPoints := store.blockSplitLZ77(options, maxBlocks)
278 | 
279 | 	// Convert LZ77 positions to positions in the uncompressed input.
280 | 	var splitPoints []int
281 | 	pos := inStart
282 | 	if len(lz77SplitPoints) > 0 {
283 | 		storeSize := len(store)
284 | 		for i := 0; i < storeSize; i++ {
285 | 			var length int
286 | 			if store[i].dist == 0 {
287 | 				length = 1
288 | 			} else {
289 | 				length = int(store[i].litLen)
290 | 			}
291 | 			if lz77SplitPoints[len(splitPoints)] == i {
292 | 				splitPoints = append(splitPoints, pos)
293 | 				if len(splitPoints) == len(lz77SplitPoints) {
294 | 					break
295 | 				}
296 | 			}
297 | 			pos += length
298 | 		}
299 | 	}
300 | 	if len(splitPoints) != len(lz77SplitPoints) {
301 | 		panic("number of points do not match")
302 | 	}
303 | 	return splitPoints
304 | }
305 | 
306 | // Divides the input into equal blocks, does not even take LZ77 lengths into
307 | // account.
308 | func blockSplitSimple(inStart, inEnd, blockSize int) (splitPoints []int) {
309 | 	i := inStart
310 | 	for i < inEnd {
311 | 		splitPoints = append(splitPoints, i)
312 | 		i += blockSize
313 | 	}
314 | 	return splitPoints
315 | }
316 | 


--------------------------------------------------------------------------------
/zopfli/cache.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | // Cache used by FindLongestMatch to remember previously found length/dist
 23 | // values.
 24 | // This is needed because the squeeze runs will ask these values multiple times for
 25 | // the same position.
 26 | // Uses large amounts of memory, since it has to remember the distance belonging
 27 | // to every possible shorter-than-the-best length (the so called "sublen" array).
 28 | type longestMatchCache struct {
 29 | 	store  LZ77Store
 30 | 	sublen []uint8
 31 | 	active bool
 32 | }
 33 | 
 34 | // Initialize a LongestMatchCache.
 35 | func newCache(blockSize int) (lmc longestMatchCache) {
 36 | 	lmc.store = make(LZ77Store, blockSize)
 37 | 	// Rather large amount of memory.
 38 | 	lmc.sublen = make([]uint8, CACHE_LENGTH*3*blockSize)
 39 | 	lmc.active = true
 40 | 
 41 | 	// length > 0 and dist 0 is invalid combination, which indicates on
 42 | 	// purpose that this cache value is not filled in yet.
 43 | 	for i := 0; i < blockSize; i++ {
 44 | 		lmc.store[i].litLen = 1
 45 | 	}
 46 | 
 47 | 	return lmc
 48 | }
 49 | 
 50 | // Stores sublen array in the cache
 51 | func (lmc longestMatchCache) sublenToCache(sublen []uint16,
 52 | 	pos int, length uint16) {
 53 | 	var j, bestLength uint16
 54 | 
 55 | 	if CACHE_LENGTH == 0 {
 56 | 		return
 57 | 	}
 58 | 
 59 | 	cache := lmc.sublen[CACHE_LENGTH*pos*3:]
 60 | 	if length < 3 {
 61 | 		return
 62 | 	}
 63 | 	for i := uint16(3); i <= length; i++ {
 64 | 		if i == length || sublen[i] != sublen[i+1] {
 65 | 			cache[j*3] = uint8(i - 3)
 66 | 			cache[j*3+1] = uint8(sublen[i])
 67 | 			cache[j*3+2] = uint8(sublen[i] >> 8)
 68 | 			bestLength = i
 69 | 			j++
 70 | 			if j >= CACHE_LENGTH {
 71 | 				break
 72 | 			}
 73 | 		}
 74 | 	}
 75 | 	if j < CACHE_LENGTH {
 76 | 		if bestLength != length {
 77 | 			panic("couldn't find best length")
 78 | 		}
 79 | 		cache[(CACHE_LENGTH-1)*3] = uint8(bestLength - 3)
 80 | 	} else {
 81 | 		if bestLength > length {
 82 | 			panic("impossible length")
 83 | 		}
 84 | 	}
 85 | 	if bestLength != lmc.maxCachedSublen(pos) {
 86 | 		panic("didn't cache sublen")
 87 | 	}
 88 | }
 89 | 
 90 | // Extracts sublen array from the cache.
 91 | func (lmc longestMatchCache) cacheToSublen(pos int, length uint16, sublen []uint16) {
 92 | 	if CACHE_LENGTH == 0 {
 93 | 		return
 94 | 	}
 95 | 
 96 | 	if length < 3 {
 97 | 		return
 98 | 	}
 99 | 
100 | 	var prevLength uint16
101 | 	maxLength := lmc.maxCachedSublen(pos)
102 | 	cache := CACHE_LENGTH * pos * 3
103 | 	for j := 0; j < CACHE_LENGTH; j++ {
104 | 		length = uint16(lmc.sublen[cache+j*3]) + 3
105 | 		dist := uint16(lmc.sublen[cache+j*3+1]) + 256*uint16(lmc.sublen[cache+j*3+2])
106 | 		for i := prevLength; i <= length; i++ {
107 | 			sublen[i] = dist
108 | 		}
109 | 		if length == maxLength {
110 | 			break
111 | 		}
112 | 		prevLength = length + 1
113 | 	}
114 | }
115 | 
116 | // Returns the length up to which could be stored in the cache.
117 | func (lmc longestMatchCache) maxCachedSublen(pos int) uint16 {
118 | 	if CACHE_LENGTH == 0 {
119 | 		return 0
120 | 	}
121 | 	//cache := lmc.sublen[CACHE_LENGTH*pos*3:]
122 | 	cache := CACHE_LENGTH * pos * 3
123 | 	if lmc.sublen[cache+1] == 0 && lmc.sublen[cache+2] == 0 {
124 | 		//cache := lmc.sublen[CACHE_LENGTH*pos*3:]
125 | 		//if cache[1] == 0 && cache[2] == 0 {
126 | 		// No sublen cached.
127 | 		return 0
128 | 	}
129 | 	//return uint16(cache[(CACHE_LENGTH-1)*3]) + 3
130 | 	return uint16(lmc.sublen[cache+(CACHE_LENGTH-1)*3]) + 3
131 | 	//return uint16(lmc.sublen[(CACHE_LENGTH * (pos + 1) - 1) * 3]) + 3
132 | }
133 | 


--------------------------------------------------------------------------------
/zopfli/deflate.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | import (
 23 | 	"bufio"
 24 | 	"fmt"
 25 | 	"io"
 26 | 	"os"
 27 | )
 28 | 
 29 | type lz77Lengths struct {
 30 | 	litLen, dist []uint
 31 | }
 32 | 
 33 | type lz77Symbols struct {
 34 | 	litLen, dist []uint
 35 | }
 36 | 
 37 | type Deflator struct {
 38 | 	// out: pointer to the dynamic output array to which the result
 39 | 	//   is appended. Must be freed after use.
 40 | 	out *bufio.Writer
 41 | 
 42 | 	// bp: number of bits written. This is because deflate appends
 43 | 	//   blocks as bit-based data, rather than on byte boundaries.
 44 | 	bp uint
 45 | 
 46 | 	next    byte
 47 | 	options *Options
 48 | }
 49 | 
 50 | type nullWriter struct{}
 51 | 
 52 | func (w *nullWriter) Write(p []byte) (int, error) {
 53 | 	return len(p), nil
 54 | }
 55 | 
 56 | func NewDeflator(wr io.Writer, options *Options) Deflator {
 57 | 	out := bufio.NewWriter(wr)
 58 | 	return Deflator{out, 0, 0, options}
 59 | }
 60 | 
 61 | func (z *Deflator) writeBit(bit byte) {
 62 | 	withinByte := z.bp & 7
 63 | 	z.next |= (bit << withinByte)
 64 | 	if withinByte == 7 {
 65 | 		err := z.out.WriteByte(z.next)
 66 | 		if err != nil {
 67 | 			panic(err)
 68 | 		}
 69 | 		z.next = 0
 70 | 	}
 71 | 	z.bp++
 72 | }
 73 | 
 74 | func (z *Deflator) writeBits(symbol, length uint) {
 75 | 	// TODO(lode): make more efficient (add more bits at once).
 76 | 	for i := uint(0); i < length; i++ {
 77 | 		bit := byte((symbol >> i) & 1)
 78 | 		z.writeBit(bit)
 79 | 	}
 80 | }
 81 | 
 82 | // Adds bits, like AddBits, but the order is inverted. The deflate specification
 83 | // uses both orders in one standard.
 84 | func (z *Deflator) writeHuffmanBits(symbol, length uint) {
 85 | 	// TODO(lode): make more efficient (add more bits at once).
 86 | 	for i := uint(0); i < length; i++ {
 87 | 		bit := byte((symbol >> (length - i - 1)) & 1)
 88 | 		z.writeBit(bit)
 89 | 	}
 90 | }
 91 | 
 92 | func (z *Deflator) flush() {
 93 | 	withinByte := z.bp & 7
 94 | 	if withinByte > 0 {
 95 | 		z.writeByte(z.next)
 96 | 		z.next = 0
 97 | 		z.bp += 8 - withinByte
 98 | 	}
 99 | 	err := z.out.Flush()
100 | 	if err != nil {
101 | 		panic(err)
102 | 	}
103 | }
104 | 
105 | func (z *Deflator) writeByte(c byte) {
106 | 	err := z.out.WriteByte(c)
107 | 	if err != nil {
108 | 		panic(err)
109 | 	}
110 | 	z.bp += 8
111 | }
112 | 
113 | func (z *Deflator) write(p []byte) {
114 | 	_, err := z.out.Write(p)
115 | 	if err != nil {
116 | 		panic(err)
117 | 	}
118 | 	z.bp += uint(len(p)) * 8
119 | }
120 | 
121 | // Ensures there are at least 2 distance codes to support buggy decoders.
122 | // Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1
123 | // distance code (with length > 0), even though it's valid according to the
124 | // deflate spec to have 0 distance codes. On top of that, some mobile phones
125 | // require at least two distance codes. To support these decoders too (but
126 | // potentially at the cost of a few bytes), add dummy code lengths of 1.
127 | // References to this bug can be found in the changelog of
128 | // Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0.
129 | //
130 | // dLengths: the 32 lengths of the distance codes.
131 | func (lengths lz77Lengths) patchDistanceCodesForBuggyDecoders() {
132 | 	var numDistCodes uint // Amount of non-zero distance codes
133 | 	dLengths := lengths.dist
134 | 	for i := 0; i < 30; /* Ignore the two unused codes from the spec */ i++ {
135 | 		if dLengths[i] > 0 {
136 | 			numDistCodes++
137 | 		}
138 | 		if numDistCodes >= 2 {
139 | 			// Two or more codes is fine.
140 | 			return
141 | 		}
142 | 	}
143 | 
144 | 	if numDistCodes == 0 {
145 | 		dLengths[0] = 1
146 | 		dLengths[1] = 1
147 | 	} else if numDistCodes == 1 {
148 | 		var i int
149 | 		if dLengths[0] != 0 {
150 | 			i = 1
151 | 		}
152 | 		dLengths[i] = 1
153 | 	}
154 | }
155 | 
156 | // The order in which code length code lengths are encoded as per deflate.
157 | var clclOrder [19]uint8 = [19]uint8{
158 | 	16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
159 | }
160 | 
161 | func (z *Deflator) writeDynamicTree(lengths lz77Lengths) {
162 | 	hLit := uint(29)  // 286 - 257
163 | 	hDist := uint(29) // 32 - 1, but gzip does not like hDist > 29.*/
164 | 	// Trim zeros.
165 | 	for hLit > 0 && lengths.litLen[257+hLit-1] == 0 {
166 | 		hLit--
167 | 	}
168 | 	for hDist > 0 && lengths.dist[1+hDist-1] == 0 {
169 | 		hDist--
170 | 	}
171 | 
172 | 	// Size of lldLengths.
173 | 	lldTotal := hLit + 257 + hDist + 1
174 | 
175 | 	// All litLen and dist lengths with ending
176 | 	// zeros trimmed together in one array.
177 | 	lldLengths := make([]uint, lldTotal)
178 | 
179 | 	for i := uint(0); i < lldTotal; i++ {
180 | 		if i < 257+hLit {
181 | 			lldLengths[i] = lengths.litLen[i]
182 | 		} else {
183 | 			lldLengths[i] = lengths.dist[i-257-hLit]
184 | 		}
185 | 		if lldLengths[i] >= 16 {
186 | 			panic("length too large")
187 | 		}
188 | 	}
189 | 
190 | 	// Runlength encoded version of lengths of litLen and dist trees.
191 | 	var rle []uint
192 | 	// Extra bits for rle values 16, 17 and 18.
193 | 	var rleBits []uint
194 | 	for i := uint(0); i < lldTotal; i++ {
195 | 		var count uint
196 | 		for j := i; j < lldTotal && lldLengths[i] == lldLengths[j]; j++ {
197 | 			count++
198 | 		}
199 | 		if count >= 4 || (count >= 3 && lldLengths[i] == 0) {
200 | 			if lldLengths[i] == 0 {
201 | 				if count > 10 {
202 | 					if count > 138 {
203 | 						count = 138
204 | 					}
205 | 					rle = append(rle, 18)
206 | 					rleBits = append(rleBits, count-11)
207 | 				} else {
208 | 					rle = append(rle, 17)
209 | 					rleBits = append(rleBits, count-3)
210 | 				}
211 | 			} else {
212 | 				rle = append(rle, lldLengths[i])
213 | 				rleBits = append(rleBits, 0)
214 | 				repeat := count - 1 // Since the first one is hardcoded.
215 | 				for repeat >= 6 {
216 | 					rle = append(rle, 16)
217 | 					rleBits = append(rleBits, 6-3)
218 | 					repeat -= 6
219 | 				}
220 | 				if repeat >= 3 {
221 | 					rle = append(rle, 16)
222 | 					rleBits = append(rleBits, repeat-3)
223 | 					repeat = 0
224 | 				}
225 | 				for repeat > 0 {
226 | 					rle = append(rle, lldLengths[i])
227 | 					rleBits = append(rleBits, 0)
228 | 					repeat--
229 | 				}
230 | 			}
231 | 
232 | 			i += count - 1
233 | 		} else {
234 | 			rle = append(rle, lldLengths[i])
235 | 			rleBits = append(rleBits, 0)
236 | 		}
237 | 		if rle[len(rle)-1] > 18 {
238 | 			panic("last rle too large")
239 | 		}
240 | 	}
241 | 
242 | 	rleSize := len(rle)
243 | 	var clCounts [19]uint
244 | 	for i := 0; i < rleSize; i++ {
245 | 		clCounts[rle[i]]++
246 | 	}
247 | 
248 | 	// Code length code lengths.
249 | 	clcl := lengthLimitedCodeLengths(clCounts[:], 7)
250 | 	clSymbols := lengthsToSymbols(clcl, 7)
251 | 
252 | 	// Trim zeros.
253 | 	hcLen := uint(15)
254 | 	for hcLen > 0 && clCounts[clclOrder[hcLen+4-1]] == 0 {
255 | 		hcLen--
256 | 	}
257 | 
258 | 	z.writeBits(hLit, 5)
259 | 	z.writeBits(hDist, 5)
260 | 	z.writeBits(hcLen, 4)
261 | 
262 | 	for i := uint(0); i < hcLen+4; i++ {
263 | 		z.writeBits(clcl[clclOrder[i]], 3)
264 | 	}
265 | 
266 | 	for i := 0; i < rleSize; i++ {
267 | 		symbol := clSymbols[rle[i]]
268 | 		z.writeHuffmanBits(symbol, clcl[rle[i]])
269 | 		// Extra bits.
270 | 		if rle[i] == 16 {
271 | 			z.writeBits(rleBits[i], 2)
272 | 		} else if rle[i] == 17 {
273 | 			z.writeBits(rleBits[i], 3)
274 | 		} else if rle[i] == 18 {
275 | 			z.writeBits(rleBits[i], 7)
276 | 		}
277 | 	}
278 | }
279 | 
280 | // Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE.
281 | func (lengths lz77Lengths) calculateTreeSize() uint {
282 | 	var w *nullWriter
283 | 	z := NewDeflator(w, nil)
284 | 	z.writeDynamicTree(lengths)
285 | 	return z.bp
286 | }
287 | 
288 | func (lengths lz77Lengths) symbols(maxBits uint) (symbols lz77Symbols) {
289 | 	symbols.litLen = lengthsToSymbols(lengths.litLen, maxBits)
290 | 	symbols.dist = lengthsToSymbols(lengths.dist, maxBits)
291 | 	return symbols
292 | }
293 | 
294 | // Adds all lit/len and dist codes from the lists as huffman symbols. Does not
295 | // add end code 256. expectedDataSize is the uncompressed block size, used for
296 | // assert, but you can set it to 0 to not do the assertion.
297 | func (z *Deflator) writeLZ77Data(store LZ77Store,
298 | 	expectedDataSize int,
299 | 	symbols lz77Symbols, lengths lz77Lengths) {
300 | 	var testLength int
301 | 	lEnd := len(store)
302 | 	for i := 0; i < lEnd; i++ {
303 | 		pair := store[i]
304 | 		if pair.dist == 0 {
305 | 			if pair.litLen >= 256 {
306 | 				panic("litLen too large")
307 | 			}
308 | 			if lengths.litLen[pair.litLen] <= 0 {
309 | 				panic("length is zero")
310 | 			}
311 | 			z.writeHuffmanBits(symbols.litLen[pair.litLen], lengths.litLen[pair.litLen])
312 | 			testLength++
313 | 		} else {
314 | 			lls := pair.lengthSymbol()
315 | 			ds := pair.distSymbol()
316 | 			if pair.litLen < 3 || pair.litLen > 288 {
317 | 				panic("litLen out of range")
318 | 			}
319 | 			if lengths.litLen[lls] <= 0 {
320 | 				panic("length is zero")
321 | 			}
322 | 			if lengths.dist[ds] <= 0 {
323 | 				panic("length is zero")
324 | 			}
325 | 			z.writeHuffmanBits(symbols.litLen[lls], lengths.litLen[lls])
326 | 			z.writeBits(uint(pair.lengthExtraBitsValue()), uint(pair.lengthExtraBits()))
327 | 			z.writeHuffmanBits(symbols.dist[ds], lengths.dist[ds])
328 | 			z.writeBits(uint(pair.distExtraBitsValue()), uint(pair.distExtraBits()))
329 | 			testLength += int(pair.litLen)
330 | 		}
331 | 	}
332 | 	if expectedDataSize != 0 && testLength != expectedDataSize {
333 | 		panic("actual size did not match expected size")
334 | 	}
335 | }
336 | 
337 | func getFixedTree() (lengths lz77Lengths) {
338 | 	lengths.litLen = make([]uint, 288)
339 | 	lengths.dist = make([]uint, 32)
340 | 	for i := 0; i < 144; i++ {
341 | 		lengths.litLen[i] = 8
342 | 	}
343 | 	for i := 144; i < 256; i++ {
344 | 		lengths.litLen[i] = 9
345 | 	}
346 | 	for i := 256; i < 280; i++ {
347 | 		lengths.litLen[i] = 7
348 | 	}
349 | 	for i := 280; i < 288; i++ {
350 | 		lengths.litLen[i] = 8
351 | 	}
352 | 	for i := 0; i < 32; i++ {
353 | 		lengths.dist[i] = 5
354 | 	}
355 | 	return lengths
356 | }
357 | 
358 | // Calculates size of the part after the header and tree of an LZ77 block, in bits.
359 | func (store LZ77Store) calculateBlockSymbolSize(lengths lz77Lengths) uint64 {
360 | 	var result uint64
361 | 	lEnd := len(store)
362 | 	for i := 0; i < lEnd; i++ {
363 | 		if store[i].dist == 0 {
364 | 			result += uint64(lengths.litLen[store[i].litLen])
365 | 		} else {
366 | 			result += uint64(lengths.litLen[store[i].lengthSymbol()])
367 | 			result += uint64(lengths.dist[store[i].distSymbol()])
368 | 			result += uint64(store[i].lengthExtraBits())
369 | 			result += uint64(store[i].distExtraBits())
370 | 		}
371 | 	}
372 | 	result += uint64(lengths.litLen[256]) // end symbol
373 | 	return result
374 | }
375 | 
376 | // Calculates block size in bits.
377 | // litLens: lz77 lit/lengths
378 | // dists: ll77 distances
379 | func (store LZ77Store) CalculateBlockSize(blockType byte) uint64 {
380 | 	if blockType != FIXED_BLOCK && blockType != DYNAMIC_BLOCK {
381 | 		panic("this is not for uncompressed blocks")
382 | 	}
383 | 
384 | 	var lengths lz77Lengths
385 | 	result := uint64(3) // bFinal and blockType bits
386 | 	if blockType == FIXED_BLOCK {
387 | 		lengths = getFixedTree()
388 | 	} else {
389 | 		counts := store.lz77Counts()
390 | 		lengths.litLen = lengthLimitedCodeLengths(counts.litLen, 15)
391 | 		lengths.dist = lengthLimitedCodeLengths(counts.dist, 15)
392 | 		lengths.patchDistanceCodesForBuggyDecoders()
393 | 		result += uint64(lengths.calculateTreeSize())
394 | 	}
395 | 
396 | 	result += store.calculateBlockSymbolSize(lengths)
397 | 	return result
398 | }
399 | 
400 | // Adds a deflate block with the given LZ77 data to the output.
401 | // z: the stream to write to
402 | // blockType: the block type, must be 1 or 2
403 | // final: whether to set the "final" bit on this block, must be the last block
404 | // store: literal/length/distance array of the LZ77 data
405 | // expectedDataSize: the uncompressed block size, used for panic, but you can
406 | //   set it to 0 to not do the assertion.
407 | func (z *Deflator) WriteLZ77Block(blockType byte, final bool, store LZ77Store, expectedDataSize int) {
408 | 	var finalByte byte
409 | 	if final {
410 | 		finalByte = 1
411 | 	}
412 | 	z.writeBit(finalByte)
413 | 	z.writeBit(blockType & 1)
414 | 	z.writeBit((blockType & 2) >> 1)
415 | 
416 | 	var lengths lz77Lengths
417 | 	if blockType == FIXED_BLOCK {
418 | 		// Fixed block.
419 | 		lengths = getFixedTree()
420 | 	} else {
421 | 		// Dynamic block.
422 | 		if blockType != DYNAMIC_BLOCK {
423 | 			panic("illegal block type")
424 | 		}
425 | 		counts := store.lz77Counts()
426 | 		lengths.litLen = lengthLimitedCodeLengths(counts.litLen, 15)
427 | 		lengths.dist = lengthLimitedCodeLengths(counts.dist, 15)
428 | 		lengths.patchDistanceCodesForBuggyDecoders()
429 | 		detectTreeSize := z.bp
430 | 		z.writeDynamicTree(lengths)
431 | 		if z.options.Verbose {
432 | 			fmt.Fprintf(os.Stderr, "treesize: %d bits\n", z.bp-detectTreeSize)
433 | 		}
434 | 
435 | 		// Assert that for every present symbol, the code length is non-zero.
436 | 		// TODO(lode): remove this in release version.
437 | 		for i := 0; i < 288; i++ {
438 | 			if counts.litLen[i] != 0 && lengths.litLen[i] <= 0 {
439 | 				panic("length is zero")
440 | 			}
441 | 		}
442 | 		for i := 0; i < 32; i++ {
443 | 			if counts.dist[i] != 0 && lengths.dist[i] <= 0 {
444 | 				panic("length is zero")
445 | 			}
446 | 		}
447 | 	}
448 | 
449 | 	symbols := lengths.symbols(15)
450 | 
451 | 	detectBlockSize := z.bp
452 | 	z.writeLZ77Data(store, expectedDataSize, symbols, lengths)
453 | 	// End symbol.
454 | 	z.writeHuffmanBits(symbols.litLen[256], lengths.litLen[256])
455 | 	if final {
456 | 		// write last byte
457 | 		z.flush()
458 | 	}
459 | 
460 | 	if z.options.Verbose {
461 | 		var uncompressedSize uint
462 | 		lEnd := len(store)
463 | 		for i := 0; i < lEnd; i++ {
464 | 			if store[i].dist == 0 {
465 | 				uncompressedSize += 1
466 | 			} else {
467 | 				uncompressedSize += uint(store[i].litLen)
468 | 			}
469 | 		}
470 | 		compressedSize := z.bp - detectBlockSize
471 | 		var places int
472 | 		if compressedSize&1 != 0 {
473 | 			places = 3
474 | 		} else if compressedSize&2 != 0 {
475 | 			places = 2
476 | 		} else if compressedSize&4 != 0 {
477 | 			places = 1
478 | 		}
479 | 		fmt.Fprintf(
480 | 			os.Stderr,
481 | 			"compressed block size: %.*f (%dkB) (unc: %d (%dkB)\n",
482 | 			places,
483 | 			float64(compressedSize)/8,
484 | 			(compressedSize+4000)/8000,
485 | 			uncompressedSize,
486 | 			(uncompressedSize+500)/1000,
487 | 		)
488 | 	}
489 | }
490 | 
491 | func (z *Deflator) deflateDynamicBlock(final bool, in []byte, inStart, inEnd int) {
492 | 	s := NewBlockState(z.options, in, inStart, inEnd)
493 | 	store := s.LZ77Optimal(inStart, inEnd)
494 | 
495 | 	// For small block, encoding with fixed tree can be smaller. For large block,
496 | 	// don't bother doing this expensive test, dynamic tree will be better.
497 | 	blockType := byte(DYNAMIC_BLOCK)
498 | 	if len(store) < 1000 {
499 | 		fixedStore := s.LZ77OptimalFixed(inStart, inEnd)
500 | 		dynCost := store.CalculateBlockSize(2)
501 | 		fixedCost := fixedStore.CalculateBlockSize(1)
502 | 		if fixedCost < dynCost {
503 | 			blockType = FIXED_BLOCK
504 | 			store = fixedStore
505 | 		}
506 | 	}
507 | 
508 | 	blockSize := inEnd - inStart
509 | 	z.WriteLZ77Block(blockType, final, store, blockSize)
510 | }
511 | 
512 | func (z *Deflator) deflateFixedBlock(final bool, in []byte, inStart, inEnd int) {
513 | 	blockSize := inEnd - inStart
514 | 
515 | 	s := NewBlockState(z.options, in, inStart, inEnd)
516 | 	store := s.LZ77OptimalFixed(inStart, inEnd)
517 | 	z.WriteLZ77Block(FIXED_BLOCK, final, store, blockSize)
518 | }
519 | 
520 | func (z *Deflator) deflateNonCompressedBlock(final bool, in []byte) {
521 | 	blockSize := len(in)
522 | 	if blockSize >= 65536 {
523 | 		panic("Non compressed blocks are max this size.")
524 | 	}
525 | 	nLen := uint16(^blockSize)
526 | 
527 | 	var finalByte byte
528 | 	if final {
529 | 		finalByte = 1
530 | 	}
531 | 	z.writeBit(finalByte)
532 | 	// blockType 00
533 | 	z.writeBit(0)
534 | 	z.writeBit(0)
535 | 	// Any bits of input up to the next byte boundary are ignored.
536 | 	z.flush()
537 | 
538 | 	z.writeByte(byte(blockSize))
539 | 	z.writeByte(byte(blockSize / 256))
540 | 	z.writeByte(byte(nLen))
541 | 	z.writeByte(byte(nLen / 256))
542 | 
543 | 	z.write(in)
544 | }
545 | 
546 | func (z *Deflator) deflateBlock(final bool, in []byte, inStart, inEnd int) {
547 | 	switch z.options.BlockType {
548 | 	case UNCOMPRESSED_BLOCK:
549 | 		z.deflateNonCompressedBlock(final, in[inStart:inEnd])
550 | 	case FIXED_BLOCK:
551 | 		z.deflateFixedBlock(final, in, inStart, inEnd)
552 | 	case DYNAMIC_BLOCK:
553 | 		z.deflateDynamicBlock(final, in, inStart, inEnd)
554 | 	default:
555 | 		panic("illegal block type")
556 | 	}
557 | }
558 | 
559 | // Does squeeze strategy where first block splitting is done, then each block is
560 | // squeezed.
561 | // Parameters: see description of the Deflate function.
562 | func (z *Deflator) deflateSplittingFirst(final bool, in []byte, inStart, inEnd int) {
563 | 	var splitPoints []int
564 | 	switch z.options.BlockType {
565 | 	case UNCOMPRESSED_BLOCK:
566 | 		splitPoints = blockSplitSimple(inStart, inEnd, 65535)
567 | 	case FIXED_BLOCK:
568 | 		// If all blocks are fixed tree, splitting into separate blocks only
569 | 		// increases the total size. Leave splitPoints nil, this represents 1 block.
570 | 	case DYNAMIC_BLOCK:
571 | 		splitPoints = blockSplit(z.options, in, inStart, inEnd, z.options.BlockSplittingMax)
572 | 	}
573 | 
574 | 	nPoints := len(splitPoints)
575 | 	for i := 0; i <= nPoints; i++ {
576 | 		var start, end int
577 | 		if i == 0 {
578 | 			start = inStart
579 | 		} else {
580 | 			start = splitPoints[i-1]
581 | 		}
582 | 		if i == nPoints {
583 | 			end = inEnd
584 | 		} else {
585 | 			end = splitPoints[i]
586 | 		}
587 | 		z.deflateBlock(i == nPoints && final, in, start, end)
588 | 	}
589 | }
590 | 
591 | // Does squeeze strategy where first the best possible lz77 is done, and then based
592 | // on that data, block splitting is done.
593 | // Parameters: see description of the Deflate function.
594 | func (z *Deflator) deflateSplittingLast(final bool, in []byte, inStart, inEnd int) {
595 | 	blockType := z.options.BlockType
596 | 	if blockType == UNCOMPRESSED_BLOCK {
597 | 		// This function only supports LZ77 compression. deflateSplittingFirst
598 | 		// supports the special case of noncompressed data. Punt it to that one.
599 | 		z.deflateSplittingFirst(final, in, inStart, inEnd)
600 | 		return
601 | 	}
602 | 	if blockType != FIXED_BLOCK && blockType != DYNAMIC_BLOCK {
603 | 		panic("illegal block type")
604 | 	}
605 | 
606 | 	s := NewBlockState(z.options, in, inStart, inEnd)
607 | 
608 | 	var store LZ77Store
609 | 	if blockType == DYNAMIC_BLOCK {
610 | 		store = s.LZ77Optimal(inStart, inEnd)
611 | 	} else {
612 | 		if blockType != FIXED_BLOCK {
613 | 			panic("illegal block type")
614 | 		}
615 | 		store = s.LZ77OptimalFixed(inStart, inEnd)
616 | 	}
617 | 
618 | 	// If all blocks are fixed tree, splitting into separate blocks only
619 | 	// increases the total size. Leave nPoints at 0, this represents 1 block.
620 | 	var splitPoints []int
621 | 	if blockType != FIXED_BLOCK {
622 | 		splitPoints = store.blockSplitLZ77(z.options, z.options.BlockSplittingMax)
623 | 	}
624 | 
625 | 	storeSize := len(store)
626 | 	nPoints := len(splitPoints)
627 | 	for i := 0; i <= nPoints; i++ {
628 | 		var start, end int
629 | 		if i > 0 {
630 | 			start = splitPoints[i-1]
631 | 		}
632 | 		if i >= nPoints {
633 | 			end = storeSize
634 | 		} else {
635 | 			end = splitPoints[i]
636 | 		}
637 | 		z.WriteLZ77Block(blockType, i == nPoints && final, store[start:end], 0)
638 | 	}
639 | }
640 | 
641 | // Deflate a part, to allow Deflate() to use multiple master blocks if
642 | // needed.
643 | //
644 | // Like Deflate, but allows to specify start and end byte with inStart and
645 | // inEnd. Only that part is compressed, but earlier bytes are still used for the
646 | // back window.
647 | //
648 | // It is possible to call this function multiple times in a row, shifting
649 | // inStart and inEnd to next bytes of the data. If inStart is larger than 0, then
650 | // previous bytes are used as the initial dictionary for LZ77.
651 | // This function will usually output multiple deflate blocks. If final is 1, then
652 | // the final bit will be set on the last block.
653 | func (z *Deflator) DeflatePart(final bool, in []byte, inStart, inEnd int) (err error) {
654 | 	defer func() {
655 | 		problem := recover()
656 | 		if problem != nil {
657 | 			err = problem.(error)
658 | 		}
659 | 	}()
660 | 
661 | 	if z.options.BlockSplitting {
662 | 		if z.options.BlockSplittingLast {
663 | 			z.deflateSplittingLast(final, in, inStart, inEnd)
664 | 		} else {
665 | 			z.deflateSplittingFirst(final, in, inStart, inEnd)
666 | 		}
667 | 	} else {
668 | 		z.deflateBlock(final, in, inStart, inEnd)
669 | 	}
670 | 	return err
671 | }
672 | 
673 | // Compresses according to the deflate specification and append the compressed
674 | // result to the output.
675 | // This function will usually output multiple deflate blocks. If final is 1, then
676 | // the final bit will be set on the last block.
677 | //
678 | // final: whether this is the last section of the input, sets the final bit to the
679 | //	 last deflate block.
680 | // in: the input bytes
681 | func (z *Deflator) Deflate(final bool, in []byte) (err error) {
682 | 	defer func() {
683 | 		problem := recover()
684 | 		if problem != nil {
685 | 			err = problem.(error)
686 | 		}
687 | 	}()
688 | 
689 | 	if MASTER_BLOCK_SIZE == 0 {
690 | 		err := z.DeflatePart(true, in, 0, len(in))
691 | 		if err != nil {
692 | 			return err
693 | 		}
694 | 	} else {
695 | 		var i int
696 | 		inSize := len(in)
697 | 		for i < inSize {
698 | 			var size int
699 | 			masterFinal := i+MASTER_BLOCK_SIZE >= inSize
700 | 			final2 := final && masterFinal
701 | 			if masterFinal {
702 | 				size = inSize - i
703 | 			} else {
704 | 				size = MASTER_BLOCK_SIZE
705 | 			}
706 | 			err := z.DeflatePart(final2, in, i, i+size)
707 | 			if err != nil {
708 | 				return err
709 | 			}
710 | 			i += size
711 | 		}
712 | 	}
713 | 	if z.options.Verbose {
714 | 		inSize := len(in)
715 | 		outSize := z.bp / 8
716 | 		fmt.Fprintf(
717 | 			os.Stderr,
718 | 			"Original Size: %d, Deflate: %d, Compression: %f%% Removed\n",
719 | 			inSize, outSize,
720 | 			100*(float64(inSize)-float64(outSize))/float64(inSize),
721 | 		)
722 | 	}
723 | 	return nil
724 | }
725 | 


--------------------------------------------------------------------------------
/zopfli/gzip_container.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2013 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | import (
 23 | 	"fmt"
 24 | 	"hash/crc32"
 25 | 	"io"
 26 | 	"os"
 27 | )
 28 | 
 29 | // Compresses according to the gzip specification and writes the compressed
 30 | // result to the output.
 31 | //
 32 | // options: global program options
 33 | // out: writer to which the result is appended
 34 | func GzipCompress(options *Options, in []byte, out io.Writer) error {
 35 | 	var counter countingWriter
 36 | 	if options.Verbose {
 37 | 		counter = newCountingWriter(out)
 38 | 		out = &counter
 39 | 	}
 40 | 
 41 | 	header := []byte{
 42 | 		// ID
 43 | 		31,
 44 | 		139,
 45 | 		// CM
 46 | 		8,
 47 | 		// FLG
 48 | 		0,
 49 | 		// MTIME
 50 | 		0,
 51 | 		0,
 52 | 		0,
 53 | 		0,
 54 | 		// XFL, 2 indicates best compression.
 55 | 		2,
 56 | 		// OS follows Unix conventions.
 57 | 		3,
 58 | 	}
 59 | 	_, headerErr := out.Write(header)
 60 | 	if headerErr != nil {
 61 | 		return headerErr
 62 | 	}
 63 | 
 64 | 	z := NewDeflator(out, options)
 65 | 	writeErr := z.Deflate(true, in)
 66 | 	if writeErr != nil {
 67 | 		return writeErr
 68 | 	}
 69 | 
 70 | 	checksum := crc32.NewIEEE()
 71 | 	checksum.Write(in)
 72 | 	crcValue := checksum.Sum32()
 73 | 	inSize := len(in)
 74 | 	footer := []byte{
 75 | 		// CRC
 76 | 		byte(crcValue),
 77 | 		byte(crcValue >> 8),
 78 | 		byte(crcValue >> 16),
 79 | 		byte(crcValue >> 24),
 80 | 		// ISIZE
 81 | 		byte(inSize),
 82 | 		byte(inSize >> 8),
 83 | 		byte(inSize >> 16),
 84 | 		byte(inSize >> 24),
 85 | 	}
 86 | 	_, footerErr := out.Write(footer)
 87 | 	if footerErr != nil {
 88 | 		return footerErr
 89 | 	}
 90 | 
 91 | 	if options.Verbose {
 92 | 		inSize := len(in)
 93 | 		outSize := counter.written
 94 | 		fmt.Fprintf(os.Stderr,
 95 | 			"Original Size: %d, Gzip: %d, Compression: %f%% Removed\n",
 96 | 			inSize, outSize,
 97 | 			100*float64(inSize-outSize)/float64(inSize))
 98 | 	}
 99 | 	return nil
100 | }
101 | 


--------------------------------------------------------------------------------
/zopfli/hash.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | const (
 23 | 	HASH_SHIFT = 5
 24 | 	HASH_MASK  = 32767
 25 | )
 26 | 
 27 | type hash struct {
 28 | 	head    []int    // Hash value to index of its most recent occurance.
 29 | 	prev    []uint16 // Index to index of prev. occurance of same hash.
 30 | 	hashVal []int    // Index to hash value at this index.
 31 | 	val     int      // Current hash value.
 32 | 
 33 | 	// Only used when HASH_SAME_HASH is true
 34 | 	// Fields with similar purpose as the above hash, but for the second
 35 | 	// hash with a value that is calculated differently.
 36 | 	head2    []int    // Hash value to index of its most recent occurance.
 37 | 	prev2    []uint16 // Index to index of prev. occurance of same hash.
 38 | 	hashVal2 []int    // Index to hash value at this index.
 39 | 	val2     int      // Current hash value.
 40 | 
 41 | 	// Only used when HASH_SAME is true
 42 | 	same []uint16 // Amount of repetitions of same byte after this.
 43 | }
 44 | 
 45 | // Allocates and initializes all fields of Hash.
 46 | func newHash(a, b byte) (h hash) {
 47 | 	h.head = make([]int, 65536)
 48 | 	h.prev = make([]uint16, WINDOW_SIZE)
 49 | 	h.hashVal = make([]int, WINDOW_SIZE)
 50 | 	for i := 0; i < 65536; i++ {
 51 | 		h.head[i] = -1 // -1 indicates no head so far.
 52 | 	}
 53 | 	for i := uint16(0); i < WINDOW_SIZE; i++ {
 54 | 		h.prev[i] = i // If prev[j] == j, then prev[j] is uninitialized.
 55 | 		h.hashVal[i] = -1
 56 | 	}
 57 | 
 58 | 	if HASH_SAME {
 59 | 		h.same = make([]uint16, WINDOW_SIZE)
 60 | 	}
 61 | 
 62 | 	if HASH_SAME_HASH {
 63 | 		h.head2 = make([]int, 65536)
 64 | 		h.prev2 = make([]uint16, WINDOW_SIZE)
 65 | 		h.hashVal2 = make([]int, WINDOW_SIZE)
 66 | 		for i := 0; i < 65536; i++ {
 67 | 			h.head2[i] = -1
 68 | 		}
 69 | 		for i := uint16(0); i < WINDOW_SIZE; i++ {
 70 | 			h.prev2[i] = i
 71 | 			h.hashVal2[i] = -1
 72 | 		}
 73 | 	}
 74 | 
 75 | 	h.warmup(a, b)
 76 | 	return h
 77 | }
 78 | 
 79 | // Update the sliding hash value with the given byte. All calls to this function
 80 | // must be made on consecutive input characters. Since the hash value exists out
 81 | // of multiple input bytes, a few warmups with this function are needed initially.
 82 | func (h *hash) updateValue(c byte) {
 83 | 	h.val = ((h.val << HASH_SHIFT) ^ int(c)) & HASH_MASK
 84 | }
 85 | 
 86 | // Updates the hash values based on the current position in the array. All calls
 87 | // to this must be made for consecutive bytes.
 88 | func (h *hash) update(slice []byte, pos, end int) {
 89 | 	hPos := pos & WINDOW_MASK
 90 | 
 91 | 	var hashValue byte
 92 | 	if pos+MIN_MATCH <= end {
 93 | 		hashValue = slice[pos+MIN_MATCH-1]
 94 | 	}
 95 | 	h.updateValue(hashValue)
 96 | 	h.hashVal[hPos] = h.val
 97 | 	if h.head[h.val] != -1 && h.hashVal[h.head[h.val]] == h.val {
 98 | 		h.prev[hPos] = uint16(h.head[h.val])
 99 | 	} else {
100 | 		h.prev[hPos] = uint16(hPos)
101 | 	}
102 | 	h.head[h.val] = hPos
103 | 
104 | 	if HASH_SAME {
105 | 		// Update "same".
106 | 		var amount int
107 | 		if h.same[(pos-1)&WINDOW_MASK] > 1 {
108 | 			amount = int(h.same[(pos-1)&WINDOW_MASK]) - 1
109 | 		}
110 | 		for pos+amount+1 < end &&
111 | 			slice[pos] == slice[pos+amount+1] && amount < 0xFFFF {
112 | 			amount++
113 | 		}
114 | 		h.same[hPos] = uint16(amount)
115 | 	}
116 | 
117 | 	if HASH_SAME_HASH {
118 | 		h.val2 = int((h.same[hPos]-MIN_MATCH)&255) ^ h.val
119 | 		h.hashVal2[hPos] = h.val2
120 | 		if h.head2[h.val2] != -1 && h.hashVal2[h.head2[h.val2]] == h.val2 {
121 | 			h.prev2[hPos] = uint16(h.head2[h.val2])
122 | 		} else {
123 | 			h.prev2[hPos] = uint16(hPos)
124 | 		}
125 | 		h.head2[h.val2] = hPos
126 | 	}
127 | }
128 | 
129 | // Prepopulates hash:
130 | // Fills in the initial values in the hash, before Update can be used
131 | // correctly.
132 | func (h *hash) warmup(a, b byte) {
133 | 	h.updateValue(a)
134 | 	h.updateValue(b)
135 | }
136 | 


--------------------------------------------------------------------------------
/zopfli/katajainen.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | /*
 21 | Bounded package merge algorithm, based on the paper
 22 | "A Fast and Space-Economical Algorithm for Length-Limited Coding
 23 | Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
 24 | */
 25 | 
 26 | package zopfli
 27 | 
 28 | import (
 29 | 	"sort"
 30 | )
 31 | 
 32 | // Nodes forming chains. Also used to represent leaves.
 33 | type node struct {
 34 | 	weight uint  // Total weight (symbol count) of this chain.
 35 | 	tail   *node // Previous node(s) of this chain, or nil if none.
 36 | 	count  int   // Leaf symbol index, or number of leaves before this chain.
 37 | }
 38 | 
 39 | type symbolLeaves []*node
 40 | 
 41 | // Initializes a chain node with the given values and marks it as in use.
 42 | func newNode(weight uint, count int, tail *node) *node {
 43 | 	var node node
 44 | 	node.weight = weight
 45 | 	node.count = count
 46 | 	node.tail = tail
 47 | 	return &node
 48 | }
 49 | 
 50 | // Performs a Boundary Package-Merge step. Puts a new chain in the given list.
 51 | // The new chain is, depending on the weights, a leaf or a combination of two
 52 | // chains from the previous list.
 53 | // lists: The lists of chains.
 54 | // maxBits: Number of lists.
 55 | // leaves: The leaves, one per symbol.
 56 | // numSymbols: Number of leaves.
 57 | // index: The index of the list in which a new chain or leaf is required.
 58 | // final: Whether this is the last time this function is called. If it is then
 59 | // it is no more needed to recursively call self.
 60 | func boundaryPM(lists [][2]*node, leaves symbolLeaves, index int, final bool) {
 61 | 	lastCount := lists[index][1].count // Count of last chain of list.
 62 | 
 63 | 	numSymbols := len(leaves)
 64 | 	if index == 0 && lastCount >= numSymbols {
 65 | 		return
 66 | 	}
 67 | 
 68 | 	lists[index][0] = lists[index][1]
 69 | 
 70 | 	if index == 0 {
 71 | 		// New leaf node in list 0.
 72 | 		lists[index][1] = newNode(leaves[lastCount].weight, lastCount+1, nil)
 73 | 	} else {
 74 | 		sum := lists[index-1][0].weight + lists[index-1][1].weight
 75 | 		if lastCount < numSymbols && sum > leaves[lastCount].weight {
 76 | 			// New leaf inserted in list, so count is incremented.
 77 | 			lists[index][1] = newNode(leaves[lastCount].weight,
 78 | 				lastCount+1, lists[index][1].tail)
 79 | 		} else {
 80 | 			lists[index][1] = newNode(sum, lastCount, lists[index-1][1])
 81 | 			if !final {
 82 | 				// Two lookahead chains of previous list used up, create new ones.
 83 | 				boundaryPM(lists, leaves, index-1, false)
 84 | 				boundaryPM(lists, leaves, index-1, false)
 85 | 			}
 86 | 		}
 87 | 	}
 88 | }
 89 | 
 90 | // Initializes each list with as lookahead chains the two leaves with lowest
 91 | // weights.
 92 | func newLists(leaves symbolLeaves, maxBits int) (lists [][2]*node) {
 93 | 	lists = make([][2]*node, maxBits)
 94 | 	node0 := newNode(leaves[0].weight, 1, nil)
 95 | 	node1 := newNode(leaves[1].weight, 2, nil)
 96 | 	for i := 0; i < maxBits; i++ {
 97 | 		lists[i][0] = node0
 98 | 		lists[i][1] = node1
 99 | 	}
100 | 	return lists
101 | }
102 | 
103 | // Converts result of boundary package-merge to the bitLengths. The result in the
104 | // last chain of the last list contains the amount of active leaves in each list.
105 | // chain: Chain to extract the bit length from (last chain from last list).
106 | func extractBitLengths(chain *node, leaves symbolLeaves, bitLengths []uint) {
107 | 	for node := chain; node != nil; node = node.tail {
108 | 		for i := 0; i < node.count; i++ {
109 | 			bitLengths[leaves[i].count]++
110 | 		}
111 | 	}
112 | }
113 | 
114 | func (leaves *symbolLeaves) Len() int {
115 | 	return len(*leaves)
116 | }
117 | 
118 | // Comparator for sorting the leaves. Has the function signature for qsort.
119 | func (leaves *symbolLeaves) Less(i, j int) bool {
120 | 	return (*leaves)[i].weight < (*leaves)[j].weight
121 | }
122 | 
123 | func (leaves *symbolLeaves) Swap(i, j int) {
124 | 	(*leaves)[j], (*leaves)[i] = (*leaves)[i], (*leaves)[j]
125 | }
126 | 
127 | // Outputs minimum-redundancy length-limited code bitLengths for symbols with the
128 | // given counts. The bitLengths are limited by maxBits.
129 | //
130 | // The output is tailored for DEFLATE: symbols that never occur, get a bit length
131 | // of 0, and if only a single symbol occurs at least once, its bitlength will be 1,
132 | // and not 0 as would theoretically be needed for a single symbol.
133 | //
134 | // frequencies: The amount of occurances of each symbol.
135 | // n: The amount of symbols.
136 | // maxBits: Maximum bit length, inclusive.
137 | // bitLengths: Output, the bitlengths for the symbol prefix codes.
138 | // return: 0 for OK, non-0 for error.
139 | func lengthLimitedCodeLengths(frequencies []uint, maxBits int) []uint {
140 | 	n := len(frequencies)
141 | 	// One leaf per symbol. Only numSymbols leaves will be used.
142 | 	leaves := make(symbolLeaves, 0, n)
143 | 	// Count used symbols and place them in the leaves.
144 | 	for i := 0; i < n; i++ {
145 | 		if frequencies[i] > 0 {
146 | 			node := newNode(frequencies[i], i, nil)
147 | 			leaves = append(leaves, node)
148 | 		}
149 | 	}
150 | 
151 | 	// Amount of symbols with frequency > 0.
152 | 	numSymbols := len(leaves)
153 | 	// Check special cases and error conditions.
154 | 	if (1 << uint(maxBits)) < numSymbols {
155 | 		// Error, too few maxBits to represent symbols.
156 | 		panic("couldn't calculate code lengths")
157 | 	}
158 | 
159 | 	// Initialize all bitlengths at 0.
160 | 	bitLengths := make([]uint, n)
161 | 	if numSymbols == 0 {
162 | 		// No symbols at all. OK.
163 | 		return bitLengths
164 | 	}
165 | 	if numSymbols == 1 {
166 | 		// Only one symbol, give it bitLength 1, not 0. OK.
167 | 		bitLengths[leaves[0].count] = 1
168 | 		return bitLengths
169 | 	}
170 | 
171 | 	// Sort the leaves from lightest to heaviest.
172 | 	sort.Sort(&leaves)
173 | 
174 | 	// Array of lists of chains. Each list requires only two lookahead
175 | 	// chains at a time, so each list is a array of two node*'s.
176 | 	lists := newLists(leaves, maxBits)
177 | 
178 | 	// In the last list, 2 * numSymbols - 2 active chains need to be created. Two
179 | 	// are already created in the initialization. Each boundaryPM run creates one.
180 | 	numBoundaryPMRuns := 2*numSymbols - 4
181 | 	for i := 0; i < numBoundaryPMRuns; i++ {
182 | 		final := i == numBoundaryPMRuns-1
183 | 		boundaryPM(lists, leaves, maxBits-1, final)
184 | 	}
185 | 
186 | 	extractBitLengths(lists[maxBits-1][1], leaves, bitLengths)
187 | 	return bitLengths
188 | }
189 | 


--------------------------------------------------------------------------------
/zopfli/lz77.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | // litLen: Contains the literal symbol or length value.
 23 | // dists: Indicates the distance, or 0 to indicate that there is no distance and
 24 | //   litLens contains a literal instead of a length.
 25 | type lz77Pair struct {
 26 | 	// Lit or len.
 27 | 	litLen uint16
 28 | 
 29 | 	// If 0: indicates literal in corresponding litLens,
 30 | 	// if > 0: length in corresponding litLens, this is the distance.
 31 | 	dist uint16
 32 | }
 33 | 
 34 | // Stores lit/length and dist pairs for LZ77.
 35 | type LZ77Store []lz77Pair
 36 | 
 37 | // Some state information for compressing a block.
 38 | // This is currently a bit under-used (with mainly only the longest match cache),
 39 | // but is kept for easy future expansion.
 40 | type BlockState struct {
 41 | 	options *Options
 42 | 	block []byte
 43 | 
 44 | 	// The start (inclusive) and end (not inclusive) of the current block.
 45 | 	blockStart, blockEnd int
 46 | 
 47 | 	// Cache for length/distance pairs found so far.
 48 | 	lmc longestMatchCache
 49 | }
 50 | 
 51 | type lz77Counts struct {
 52 | 	litLen, dist []uint
 53 | }
 54 | 
 55 | // Gets a score of the length given the distance. Typically, the score of the
 56 | // length is the length itself, but if the distance is very long, decrease the
 57 | // score of the length a bit to make up for the fact that long distances use
 58 | // large amounts of extra bits.
 59 | 
 60 | // This is not an accurate score, it is a heuristic only for the greedy LZ77
 61 | // implementation. More accurate cost models are employed later. Making this
 62 | // heuristic more accurate may hurt rather than improve compression.
 63 | 
 64 | // The two direct uses of this heuristic are:
 65 | // -avoid using a length of 3 in combination with a long distance. This only has
 66 | //  an effect if length == 3.
 67 | // -make a slightly better choice between the two options of the lazy matching.
 68 | 
 69 | // Indirectly, this affects:
 70 | // -the block split points if the default of block splitting first is used, in a
 71 | //  rather unpredictable way
 72 | // -the first zopfli run, so it affects the chance of the first run being closer
 73 | //  to the optimal output
 74 | func (pair lz77Pair) lengthScore() uint16 {
 75 | 	// At 1024, the distance uses 9+ extra bits and this seems to be the
 76 | 	// sweet spot on tested files.
 77 | 	if pair.dist > 1024 {
 78 | 		return pair.litLen - 1
 79 | 	}
 80 | 	return pair.litLen
 81 | }
 82 | 
 83 | // Verifies if length and dist are indeed valid, only used for assertion.
 84 | func (pair lz77Pair) Verify(data []byte, pos int) {
 85 | 
 86 | 	// TODO(lode): make this only run in a debug compile, it's for assert only.
 87 | 
 88 | 	dataSize := len(data)
 89 | 	if pos+int(pair.litLen) > dataSize {
 90 | 		panic("overrun")
 91 | 	}
 92 | 	for i := 0; i < int(pair.litLen); i++ {
 93 | 		if data[pos-int(pair.dist)+i] != data[pos+i] {
 94 | 			panic("mismatch")
 95 | 		}
 96 | 	}
 97 | }
 98 | 
 99 | // Finds how long the match of scan and match is. Can be used to find how many
100 | // bytes starting from scan, and from match, are equal. Returns the last byte
101 | // after scan, which is still equal to the corresponding byte after match.
102 | // scan is the position to compare
103 | // match is the earlier position to compare.
104 | // end is the last possible position, beyond which to stop looking.
105 | func getMatch(slice []byte, scan, match, end int) int {
106 | 	for scan < end && slice[scan] == slice[match] {
107 | 		scan++
108 | 		match++
109 | 	}
110 | 
111 | 	return scan
112 | }
113 | 
114 | func NewBlockState(options *Options, in []byte, inStart, inEnd int) (s BlockState) {
115 | 	s.options = options
116 | 	s.block = in
117 | 	s.blockStart = inStart
118 | 	s.blockEnd = inEnd
119 | 	if LONGEST_MATCH_CACHE {
120 | 		blockSize := inEnd - inStart
121 | 		s.lmc = newCache(blockSize)
122 | 	}
123 | 	return s
124 | }
125 | 
126 | // Gets distance, length and sublen values from the cache if possible.
127 | // Returns 1 if it got the values from the cache, 0 if not.
128 | // Updates the limit value to a smaller one if possible with more limited
129 | // information from the cache.
130 | func (s *BlockState) tryGetFromLongestMatchCache(pos int, limit *uint16, sublen []uint16) (pair lz77Pair, ok bool) {
131 | 	// The LMC cache starts at the beginning of the block rather than the
132 | 	// beginning of the whole array.
133 | 	lmcPos := pos - s.blockStart
134 | 
135 | 	// Length > 0 and dist 0 is invalid combination, which indicates on
136 | 	// purpose that this cache value is not filled in yet.
137 | 	cacheAvailable := s.lmc.active && (s.lmc.store[lmcPos].litLen == 0 || s.lmc.store[lmcPos].dist != 0)
138 | 	var maxSublen uint16
139 | 	if cacheAvailable && sublen != nil {
140 | 		maxSublen = s.lmc.maxCachedSublen(lmcPos)
141 | 	}
142 | 	limitOkForCache := cacheAvailable && (*limit == MAX_MATCH || s.lmc.store[lmcPos].litLen <= *limit || (sublen != nil && maxSublen >= *limit))
143 | 	if s.lmc.active && limitOkForCache && cacheAvailable {
144 | 		if sublen == nil || s.lmc.store[lmcPos].litLen <= s.lmc.maxCachedSublen(lmcPos) {
145 | 			pair.litLen = s.lmc.store[lmcPos].litLen
146 | 			if pair.litLen > *limit {
147 | 				pair.litLen = *limit
148 | 			}
149 | 			if sublen != nil {
150 | 				s.lmc.cacheToSublen(lmcPos, pair.litLen, sublen)
151 | 				pair.dist = sublen[pair.litLen]
152 | 				if *limit == MAX_MATCH && pair.litLen >= MIN_MATCH {
153 | 					if pair.dist != s.lmc.store[lmcPos].dist {
154 | 						panic("bad sublen")
155 | 					}
156 | 				}
157 | 			} else {
158 | 				pair.dist = s.lmc.store[lmcPos].dist
159 | 			}
160 | 			return pair, true
161 | 		}
162 | 		// Can't use much of the cache, since the "sublens" need to
163 | 		// be calculated, but at least we already know when to stop.
164 | 		*limit = s.lmc.store[lmcPos].litLen
165 | 	}
166 | 
167 | 	return pair, false
168 | }
169 | 
170 | // Stores the found sublen, distance and length in the longest match cache, if
171 | // possible.
172 | func (s *BlockState) storeInLongestMatchCache(pos int, limit uint16,
173 | 	sublen []uint16, pair lz77Pair) {
174 | 	if !s.lmc.active {
175 | 		return
176 | 	}
177 | 
178 | 	// The LMC cache starts at the beginning of the block rather than the
179 | 	// beginning of the whole array.
180 | 	lmcPos := pos - s.blockStart
181 | 	lmcPair := s.lmc.store[lmcPos]
182 | 
183 | 	// Length > 0 and dist 0 is invalid combination, which indicates on purpose
184 | 	// that this cache value is not filled in yet.
185 | 	cacheAvailable := lmcPair.litLen == 0 || lmcPair.dist != 0
186 | 	if cacheAvailable {
187 | 		return
188 | 	}
189 | 
190 | 	if limit != MAX_MATCH || sublen == nil {
191 | 		return
192 | 	}
193 | 
194 | 	if lmcPair.litLen != 1 || lmcPair.dist != 0 {
195 | 		panic("overrun")
196 | 	}
197 | 	if pair.litLen < MIN_MATCH {
198 | 		lmcPair = lz77Pair{}
199 | 	} else {
200 | 		lmcPair = pair
201 | 	}
202 | 	s.lmc.store[lmcPos] = lmcPair
203 | 	if lmcPair.litLen == 1 && lmcPair.dist == 0 {
204 | 		panic("cached invalid combination")
205 | 	}
206 | 	s.lmc.sublenToCache(sublen, lmcPos, pair.litLen)
207 | }
208 | 
209 | // Finds the longest match (length and corresponding distance) for LZ77
210 | // compression.
211 | // Even when not using "sublen", it can be more efficient to provide an array,
212 | // because only then the caching is used.
213 | //
214 | // slice: the data
215 | //
216 | // pos: position in the data to find the match for
217 | //
218 | // size: size of the data
219 | //
220 | // limit: limit length to maximum this value (default should be 258). This allows
221 | // finding a shorter dist for that length (= less extra bits). Must be
222 | // in the range [MIN_MATCH, MAX_MATCH].
223 | //
224 | // sublen: output array of 259 elements, or null. Has, for each length, the
225 | // smallest distance required to reach this length. Only 256 of its 259 values
226 | // are used, the first 3 are ignored (the shortest length is 3. It is purely
227 | // for convenience that the array is made 3 longer).
228 | func (s *BlockState) findLongestMatch(h *hash, slice []byte, pos, size int, limit uint16, sublen []uint16) lz77Pair {
229 | 	hPos := uint16(pos & WINDOW_MASK)
230 | 	bestPair := lz77Pair{1, 0}
231 | 	chainCounter := MAX_CHAIN_HITS // For quitting early.
232 | 	hPrev := h.prev
233 | 
234 | 	if LONGEST_MATCH_CACHE {
235 | 		pair, ok := s.tryGetFromLongestMatchCache(pos, &limit, sublen)
236 | 		if ok {
237 | 			/*
238 | 				if pos+int(pair.litLen) > size {
239 | 					panic("overrun")
240 | 				}
241 | 			*/
242 | 			return pair
243 | 		}
244 | 	}
245 | 
246 | 	if limit > MAX_MATCH {
247 | 		panic("limit is too large")
248 | 	} else if limit < MIN_MATCH {
249 | 		panic("limit is too small")
250 | 	}
251 | 	if pos >= size {
252 | 		panic("overrun")
253 | 	}
254 | 
255 | 	if size < pos+MIN_MATCH {
256 | 		// The rest of the code assumes there are at least MIN_MATCH
257 | 		// bytes to try.
258 | 		return lz77Pair{}
259 | 	}
260 | 
261 | 	if pos+int(limit) > size {
262 | 		limit = uint16(size - pos)
263 | 	}
264 | 	arrayEnd := pos + int(limit)
265 | 
266 | 	if h.val >= 65536 {
267 | 		panic("hash value too large")
268 | 	}
269 | 
270 | 	pp := uint16(h.head[h.val]) // During the whole loop, p == h.prev[pp].
271 | 	p := hPrev[pp]
272 | 
273 | 	if pp != hPos {
274 | 		panic("invalid pp")
275 | 	}
276 | 
277 | 	var dist int // Not uint16 on purpose.
278 | 	if p < pp {
279 | 		dist = int(pp - p)
280 | 	} else {
281 | 		dist = WINDOW_SIZE + int(pp) - int(p)
282 | 	}
283 | 
284 | 	// Go through all distances.
285 | 	same0 := h.same[hPos]
286 | 	scanned := slice[pos]
287 | 	for dist < WINDOW_SIZE {
288 | 		scan := pos
289 | 		match := pos - dist
290 | 
291 | 		// Testing the byte at position bestLength first, goes slightly faster.
292 | 		var currentLength uint16
293 | 		bestLitLen := bestPair.litLen
294 | 		bestPos := pos + int(bestLitLen)
295 | 		bestMatch := match + int(bestLitLen)
296 | 		if bestPos >= size || slice[bestPos] == slice[bestMatch] {
297 | 			if HASH_SAME {
298 | 				if same0 > 2 && scanned == slice[match] {
299 | 					same1 := h.same[match&WINDOW_MASK]
300 | 					var same uint16
301 | 					if same0 < same1 {
302 | 						same = same0
303 | 					} else {
304 | 						same = same1
305 | 					}
306 | 					if same > limit {
307 | 						same = limit
308 | 					}
309 | 					scan += int(same)
310 | 					match += int(same)
311 | 				}
312 | 			}
313 | 			scan = getMatch(slice, scan, match, arrayEnd)
314 | 			currentLength = uint16(scan - pos) // The found length.
315 | 
316 | 			if currentLength > bestLitLen {
317 | 				if sublen != nil {
318 | 					for j := bestLitLen + 1; j <= currentLength; j++ {
319 | 						sublen[j] = uint16(dist)
320 | 					}
321 | 				}
322 | 				bestPair = lz77Pair{currentLength, uint16(dist)}
323 | 				if currentLength >= limit {
324 | 					break
325 | 				}
326 | 			}
327 | 		}
328 | 
329 | 		if HASH_SAME_HASH {
330 | 			// Switch to the other hash once this will be more efficient.
331 | 			if bestPair.litLen >= same0 && h.val2 == h.hashVal2[p] {
332 | 				// Now use the hash that encodes the length and first byte.
333 | 				hPrev = h.prev2
334 | 			}
335 | 		}
336 | 
337 | 		pp = p
338 | 		p = hPrev[p]
339 | 		if p == pp {
340 | 			// Uninited prev value.
341 | 			break
342 | 		}
343 | 
344 | 		if p >= pp {
345 | 			dist += WINDOW_SIZE
346 | 		}
347 | 		dist += int(pp) - int(p)
348 | 
349 | 		if MAX_CHAIN_HITS < WINDOW_SIZE {
350 | 			chainCounter--
351 | 			if chainCounter <= 0 {
352 | 				break
353 | 			}
354 | 		}
355 | 	}
356 | 
357 | 	if LONGEST_MATCH_CACHE {
358 | 		s.storeInLongestMatchCache(pos, limit, sublen, bestPair)
359 | 	}
360 | 
361 | 	if bestPair.litLen > limit {
362 | 		panic("overrun")
363 | 	}
364 | 
365 | 	if pos+int(bestPair.litLen) > size {
366 | 		panic("overrun")
367 | 	}
368 | 	return bestPair
369 | }
370 | 
371 | // Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
372 | // with the slow but better "squeeze" implementation.
373 | // The result is placed in the LZ77Store.
374 | // If inStart is larger than 0, it uses values before inStart as starting
375 | // dictionary.
376 | func (s *BlockState) LZ77Greedy(inStart, inEnd int) (store LZ77Store) {
377 | 	var windowStart int
378 | 	if inStart > WINDOW_SIZE {
379 | 		windowStart = inStart - WINDOW_SIZE
380 | 	}
381 | 
382 | 	// Lazy matching.
383 | 	var prevPair lz77Pair
384 | 	var matchAvailable bool
385 | 
386 | 	if inStart == inEnd {
387 | 		return
388 | 	}
389 | 
390 | 	h := newHash(s.block[windowStart], s.block[windowStart+1])
391 | 	for i := windowStart; i < inStart; i++ {
392 | 		h.update(s.block, i, inEnd)
393 | 	}
394 | 
395 | 	dummySublen := make([]uint16, 259)
396 | 	for i := inStart; i < inEnd; i++ {
397 | 		h.update(s.block, i, inEnd)
398 | 
399 | 		pair := s.findLongestMatch(&h, s.block, i, inEnd, MAX_MATCH, dummySublen[:])
400 | 		lengthScore := pair.lengthScore()
401 | 
402 | 		if LAZY_MATCHING {
403 | 			// Lazy matching.
404 | 			prevLengthScore := prevPair.lengthScore()
405 | 			if matchAvailable {
406 | 				matchAvailable = false
407 | 				if lengthScore > prevLengthScore+1 {
408 | 					store = append(store, lz77Pair{uint16(s.block[i-1]), 0})
409 | 					if lengthScore >= MIN_MATCH && pair.litLen < MAX_MATCH {
410 | 						matchAvailable = true
411 | 						prevPair = pair
412 | 						continue
413 | 					}
414 | 				} else {
415 | 					// Add previous to output.
416 | 					pair = prevPair
417 | 					lengthScore = prevLengthScore
418 | 					// Add to output.
419 | 					pair.Verify(s.block, i-1)
420 | 					store = append(store, pair)
421 | 					for j := uint16(2); j < pair.litLen; j++ {
422 | 						if i >= inEnd {
423 | 							panic("overrun")
424 | 						}
425 | 						i++
426 | 						h.update(s.block, i, inEnd)
427 | 					}
428 | 					continue
429 | 				}
430 | 			} else if lengthScore >= MIN_MATCH && pair.litLen < MAX_MATCH {
431 | 				matchAvailable = true
432 | 				prevPair = pair
433 | 				continue
434 | 			}
435 | 			// End of lazy matching.
436 | 		}
437 | 
438 | 		// Add to output.
439 | 		if lengthScore >= MIN_MATCH {
440 | 			pair.Verify(s.block, i)
441 | 			store = append(store, pair)
442 | 		} else {
443 | 			pair.litLen = 1
444 | 			store = append(store, lz77Pair{uint16(s.block[i]), 0})
445 | 		}
446 | 		for j := uint16(1); j < pair.litLen; j++ {
447 | 			if i >= inEnd {
448 | 				panic("overrun")
449 | 			}
450 | 			i++
451 | 			h.update(s.block, i, inEnd)
452 | 		}
453 | 	}
454 | 	return store
455 | }
456 | 
457 | // Counts the number of literal, length and distance symbols in the given lz77
458 | // arrays.
459 | // litLens: lz77 lit/lengths
460 | // dists: ll77 distances
461 | // start: where to begin counting in litLens and dists
462 | // end: where to stop counting in litLens and dists (not inclusive)
463 | // llCount: count of each lit/len symbol, must have size 288 (see deflate
464 | //     standard)
465 | // dCount: count of each dist symbol, must have size 32 (see deflate standard)
466 | func (store LZ77Store) lz77Counts() (counts lz77Counts) {
467 | 	counts.litLen = make([]uint, 288)
468 | 	counts.dist = make([]uint, 32)
469 | 	end := len(store)
470 | 	for i := 0; i < end; i++ {
471 | 		pair := store[i]
472 | 		if pair.dist == 0 {
473 | 			counts.litLen[pair.litLen]++
474 | 		} else {
475 | 			counts.litLen[pair.lengthSymbol()]++
476 | 			counts.dist[pair.distSymbol()]++
477 | 		}
478 | 	}
479 | 
480 | 	counts.litLen[256] = 1 // End symbol.
481 | 	return counts
482 | }
483 | 


--------------------------------------------------------------------------------
/zopfli/squeeze.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | import (
 23 | 	"fmt"
 24 | 	"math"
 25 | 	"os"
 26 | )
 27 | 
 28 | type symbolStats struct {
 29 | 	// The literal and length symbols.
 30 | 	litLens []float64
 31 | 	// The 32 unique dist symbols, not the 32768 possible dists.
 32 | 	dists []float64
 33 | 
 34 | 	// Length of each lit/len symbol in bits.
 35 | 	llSymbols []float64
 36 | 	// Length of each dist symbol in bits.
 37 | 	dSymbols []float64
 38 | }
 39 | 
 40 | // Adds the bit lengths.
 41 | func addWeightedFreqs(stats1 symbolStats, w1 float64,
 42 | 	stats2 symbolStats, w2 float64) (result symbolStats) {
 43 | 	result.litLens = make([]float64, 288)
 44 | 	result.dists = make([]float64, 32)
 45 | 	for i := 0; i < 288; i++ {
 46 | 		litlen1 := stats1.litLens[i] * w1
 47 | 		litlen2 := stats2.litLens[i] * w2
 48 | 		result.litLens[i] = litlen1 + litlen2
 49 | 	}
 50 | 	for i := 0; i < 32; i++ {
 51 | 		dist1 := stats1.dists[i] * w1
 52 | 		dist2 := stats2.dists[i] * w2
 53 | 		result.dists[i] = dist1 + dist2
 54 | 	}
 55 | 	result.litLens[256] = 1 // End symbol.
 56 | 	return result
 57 | }
 58 | 
 59 | type ranState struct {
 60 | 	m_w, m_z uint32
 61 | }
 62 | 
 63 | func newRanState() (state ranState) {
 64 | 	state.m_w = 1
 65 | 	state.m_z = 2
 66 | 	return state
 67 | }
 68 | 
 69 | /* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */
 70 | func (state ranState) ran() uint32 {
 71 | 	state.m_z = 36969*(state.m_z&65535) + (state.m_z >> 16)
 72 | 	state.m_w = 18000*(state.m_w&65535) + (state.m_w >> 16)
 73 | 	return (state.m_z << 16) + state.m_w // 32-bit result.
 74 | }
 75 | 
 76 | func (state ranState) randomizeFreqs(freqs []float64) {
 77 | 	n := uint32(len(freqs))
 78 | 	for i := uint32(0); i < n; i++ {
 79 | 		if (state.ran()>>4)%3 == 0 {
 80 | 			freqs[i] = freqs[state.ran()%n]
 81 | 		}
 82 | 	}
 83 | }
 84 | 
 85 | func (stats symbolStats) randomizeFreqs(state ranState) {
 86 | 	state.randomizeFreqs(stats.litLens)
 87 | 	state.randomizeFreqs(stats.dists)
 88 | 	stats.litLens[256] = 1 // End symbol.
 89 | }
 90 | 
 91 | // Function that calculates a cost based on a model for the given LZ77 symbol.
 92 | // litlen: means literal symbol if dist is 0, length otherwise.
 93 | type costModelFun func(pair lz77Pair, context interface{}) float64
 94 | 
 95 | // Cost model which should exactly match fixed tree.
 96 | // type: costModelFun
 97 | func costFixed(pair lz77Pair, unused interface{}) float64 {
 98 | 	if pair.dist == 0 {
 99 | 		if pair.litLen <= 143 {
100 | 			return 8
101 | 		}
102 | 		return 9
103 | 	}
104 | 	dBits := pair.distExtraBits()
105 | 	lBits := pair.lengthExtraBits()
106 | 	lSym := pair.lengthSymbol()
107 | 	cost := float64(5) // Every dist symbol has length 5.
108 | 	if lSym <= 279 {
109 | 		cost += 7
110 | 	} else {
111 | 		cost += 8
112 | 	}
113 | 	return cost + float64(dBits+lBits)
114 | }
115 | 
116 | // Cost model based on symbol statistics.
117 | // type: costModelFun
118 | func costStat(pair lz77Pair, context interface{}) float64 {
119 | 	stats := context.(symbolStats)
120 | 	if pair.dist == 0 {
121 | 		return stats.llSymbols[pair.litLen]
122 | 	}
123 | 	lSym := pair.lengthSymbol()
124 | 	lBits := pair.lengthExtraBits()
125 | 	dSym := pair.distSymbol()
126 | 	dBits := pair.distExtraBits()
127 | 	return stats.llSymbols[lSym] + float64(lBits) + stats.dSymbols[dSym] + float64(dBits)
128 | }
129 | 
130 | var dSymbolTable [30]uint16 = [30]uint16{
131 | 	1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
132 | 	769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577,
133 | }
134 | 
135 | // Finds the minimum possible cost this cost model can return for valid length and
136 | // distance symbols.
137 | func (costModel costModelFun) minCost(costContext interface{}) float64 {
138 | 	var minCost float64
139 | 
140 | 	// Table of distances that have a different distance symbol in the deflate
141 | 	// specification. Each value is the first distance that has a new symbol. Only
142 | 	// different symbols affect the cost model so only these need to be checked.
143 | 	// See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes).
144 | 
145 | 	// bestPair has lowest cost in the cost model
146 | 	var bestPair, pair lz77Pair
147 | 	pair.dist = 1
148 | 	minCost = math.Inf(1)
149 | 	for pair.litLen = uint16(3); pair.litLen < 259; pair.litLen++ {
150 | 		c := costModel(pair, costContext)
151 | 		if c < minCost {
152 | 			bestPair.litLen = pair.litLen
153 | 			minCost = c
154 | 		}
155 | 	}
156 | 
157 | 	// TODO: try using bestPair.litlen instead of 3
158 | 	pair.litLen = 3
159 | 	minCost = math.Inf(1)
160 | 	for i := 0; i < 30; i++ {
161 | 		pair.dist = dSymbolTable[i]
162 | 		c := costModel(pair, costContext)
163 | 		if c < minCost {
164 | 			bestPair.dist = pair.dist
165 | 			minCost = c
166 | 		}
167 | 	}
168 | 
169 | 	return costModel(bestPair, costContext)
170 | }
171 | 
172 | // Performs the forward pass for "squeeze". Gets the most optimal length to reach
173 | // every byte from a previous byte, using cost calculations.
174 | // s: the BlockState
175 | // inStart: where to start
176 | // inEnd: where to stop (not inclusive)
177 | // costModel: function to calculate the cost of some lit/len/dist pair.
178 | // costContext: abstract context for the costmodel function
179 | // lengths: output slice of size (inEnd - instart) which will receive the best length to reach this byte from a previous byte.
180 | // returns the cost that was, according to the costmodel, needed to get to the end.
181 | func (s *BlockState) bestLengths(inStart, inEnd int, costModel costModelFun, costContext interface{}) (lengths []uint16) {
182 | 	// Best cost to get here so far.
183 | 	if inStart == inEnd {
184 | 		return nil
185 | 	}
186 | 
187 | 	blockSize := inEnd - inStart
188 | 	lengths = make([]uint16, blockSize+1)
189 | 	costs := make([]float64, blockSize+1)
190 | 
191 | 	var windowStart int
192 | 	if inStart > WINDOW_SIZE {
193 | 		windowStart = inStart - WINDOW_SIZE
194 | 	}
195 | 	h := newHash(s.block[windowStart], s.block[windowStart+1])
196 | 	for i := windowStart; i < inStart; i++ {
197 | 		h.update(s.block, i, inEnd)
198 | 	}
199 | 
200 | 	minCost := costModel.minCost(costContext)
201 | 	infinity := math.Inf(1)
202 | 	for i := 1; i <= blockSize; i++ {
203 | 		costs[i] = infinity
204 | 	}
205 | 
206 | 	sublen := make([]uint16, 259)
207 | 	for i := inStart; i < inEnd; i++ {
208 | 		j := i - inStart // Index in the costs slice and lengths.
209 | 		h.update(s.block, i, inEnd)
210 | 		cost := costs[j]
211 | 
212 | 		if SHORTCUT_LONG_REPETITIONS {
213 | 			// If we're in a long repetition of the same character and have
214 | 			// more than MAX_MATCH characters before and after our position.
215 | 			if h.same[i&WINDOW_MASK] > MAX_MATCH*2 &&
216 | 				i > inStart+MAX_MATCH+1 &&
217 | 				i+MAX_MATCH*2+1 < inEnd &&
218 | 				h.same[(i-MAX_MATCH)&WINDOW_MASK] > MAX_MATCH {
219 | 				symbolCost := costModel(lz77Pair{MAX_MATCH, 1}, costContext)
220 | 				// Set the length to reach each one to MAX_MATCH, and the cost
221 | 				// to the cost corresponding to that length. Doing this, we
222 | 				// skip MAX_MATCH values to avoid calling findLongestMatch.
223 | 				for k := 0; k < MAX_MATCH; k++ {
224 | 					costs[j+MAX_MATCH] = cost + symbolCost
225 | 					lengths[j+MAX_MATCH] = MAX_MATCH
226 | 					i++
227 | 					j++
228 | 					h.update(s.block, i, inEnd)
229 | 					cost = costs[j]
230 | 				}
231 | 			}
232 | 		}
233 | 
234 | 		pair := s.findLongestMatch(&h, s.block, i, inEnd, MAX_MATCH, sublen)
235 | 		leng := pair.litLen
236 | 
237 | 		// Literal.
238 | 		if i+1 <= inEnd {
239 | 			newCost := cost + costModel(lz77Pair{uint16(s.block[i]), 0}, costContext)
240 | 			if !(newCost >= 0) {
241 | 				panic("new cost is not positive")
242 | 			}
243 | 			if newCost < costs[j+1] {
244 | 				costs[j+1] = newCost
245 | 				lengths[j+1] = 1
246 | 			}
247 | 		}
248 | 		// Lengths.
249 | 		for k := uint16(3); k <= leng && i+int(k) <= inEnd; k++ {
250 | 			// Calling the cost model is expensive, avoid this if we are
251 | 			// already at the minimum possible cost that it can return.
252 | 			nextCost := costs[j+int(k)]
253 | 			if nextCost <= minCost+cost {
254 | 				continue
255 | 			}
256 | 
257 | 			newCost := cost + costModel(lz77Pair{k, sublen[k]}, costContext)
258 | 			if !(newCost >= 0) {
259 | 				panic("new cost is not positive")
260 | 			}
261 | 			if newCost < nextCost {
262 | 				if k > MAX_MATCH {
263 | 					panic("k is larger than MAX_MATCH")
264 | 				}
265 | 				costs[j+int(k)] = newCost
266 | 				lengths[j+int(k)] = k
267 | 			}
268 | 		}
269 | 	}
270 | 
271 | 	cost := costs[blockSize]
272 | 	if !(cost >= 0) {
273 | 		panic("cost is not positive")
274 | 	}
275 | 	if math.IsNaN(cost) {
276 | 		panic("cost is NaN")
277 | 	}
278 | 	if math.IsInf(cost, 0) {
279 | 		panic("cost is infinite")
280 | 	}
281 | 	return lengths
282 | }
283 | 
284 | // Calculates the optimal path of lz77 lengths to use, from the calculated
285 | // lengths. The lengths must contain the optimal length to reach that
286 | // byte. The path will be filled with the lengths to use, so its data size will be
287 | // the amount of lz77 symbols.
288 | func traceBackwards(size int, lengths []uint16) []uint16 {
289 | 	if size == 0 {
290 | 		return nil
291 | 	}
292 | 
293 | 	var path []uint16
294 | 	index := size
295 | 	for {
296 | 		path = append(path, lengths[index])
297 | 		if int(lengths[index]) > index {
298 | 			panic("length is greater than index")
299 | 		}
300 | 		if lengths[index] > MAX_MATCH {
301 | 			panic("length is greater than MAX_MATCH")
302 | 		}
303 | 		if lengths[index] == 0 {
304 | 			panic("length is zero")
305 | 		}
306 | 		index -= int(lengths[index])
307 | 		if index == 0 {
308 | 			break
309 | 		}
310 | 	}
311 | 
312 | 	// Mirror result.
313 | 	pathSize := len(path)
314 | 	for index = 0; index < pathSize/2; index++ {
315 | 		path[index], path[pathSize-index-1] = path[pathSize-index-1], path[index]
316 | 	}
317 | 
318 | 	return path
319 | }
320 | 
321 | func (s *BlockState) followPath(inStart, inEnd int,
322 | 	path []uint16) LZ77Store {
323 | 	var store LZ77Store
324 | 	if inStart == inEnd {
325 | 		return store
326 | 	}
327 | 
328 | 	var windowStart int
329 | 	if inStart > WINDOW_SIZE {
330 | 		windowStart = inStart - WINDOW_SIZE
331 | 	}
332 | 	h := newHash(s.block[windowStart], s.block[windowStart+1])
333 | 	for i := windowStart; i < inStart; i++ {
334 | 		h.update(s.block, i, inEnd)
335 | 	}
336 | 
337 | 	pos := inStart
338 | 	for _, length := range path {
339 | 		if pos >= inEnd {
340 | 			panic("position overrun")
341 | 		}
342 | 
343 | 		h.update(s.block, pos, inEnd)
344 | 
345 | 		// Add to output.
346 | 		if length >= MIN_MATCH {
347 | 			// Get the distance by recalculating longest match. The
348 | 			// found length should match the length from the path.
349 | 			pair := s.findLongestMatch(&h, s.block, pos, inEnd, length, nil)
350 | 			if pair.litLen != length && length > 2 && pair.litLen > 2 {
351 | 				panic("dummy length is invalid")
352 | 			}
353 | 			pair.Verify(s.block, pos)
354 | 			store = append(store, pair)
355 | 		} else {
356 | 			length = 1
357 | 			store = append(store, lz77Pair{uint16(s.block[pos]), 0})
358 | 		}
359 | 
360 | 		if pos+int(length) > inEnd {
361 | 			panic("position overrun")
362 | 		}
363 | 		for j := 1; j < int(length); j++ {
364 | 			h.update(s.block, pos+j, inEnd)
365 | 		}
366 | 
367 | 		pos += int(length)
368 | 	}
369 | 	return store
370 | }
371 | 
372 | // Calculates the entropy of the statistics
373 | func (stats *symbolStats) calculate() {
374 | 	stats.llSymbols = CalculateEntropy(stats.litLens)
375 | 	stats.dSymbols = CalculateEntropy(stats.dists)
376 | }
377 | 
378 | // Appends the symbol statistics from the store.
379 | func (store LZ77Store) statistics() (stats symbolStats) {
380 | 	stats.litLens = make([]float64, 288)
381 | 	stats.dists = make([]float64, 32)
382 | 	storeSize := len(store)
383 | 	for i := 0; i < storeSize; i++ {
384 | 		if store[i].dist == 0 {
385 | 			stats.litLens[store[i].litLen]++
386 | 		} else {
387 | 			stats.litLens[store[i].lengthSymbol()]++
388 | 			stats.dists[store[i].distSymbol()]++
389 | 		}
390 | 	}
391 | 	stats.litLens[256] = 1 // End symbol.
392 | 
393 | 	stats.calculate()
394 | 	return stats
395 | }
396 | 
397 | // Does a single run for LZ77Optimal. For good compression, repeated runs
398 | // with updated statistics should be performed.
399 | //
400 | // s: the block state
401 | // inStart: where to start
402 | // inEnd: where to stop (not inclusive)
403 | // lengths: slice of size (inEnd - inStart) used to store lengths
404 | // costModel: function to use as the cost model for this squeeze run
405 | // costContext: abstract context for the costmodel function
406 | // store: place to output the LZ77 data
407 | // returns the cost that was, according to the costmodel, needed to get to the end.
408 | // This is not the actual cost.
409 | func (s *BlockState) lz77OptimalRun(inStart, inEnd int, costModel costModelFun, costContext interface{}) (store LZ77Store) {
410 | 	lengths := s.bestLengths(inStart, inEnd, costModel, costContext)
411 | 	path := traceBackwards(inEnd-inStart, lengths)
412 | 	store = s.followPath(inStart, inEnd, path)
413 | 	return store
414 | }
415 | 
416 | // Calculates lit/len and dist pairs for given data.
417 | // If instart is larger than 0, it uses values before instart as starting
418 | // dictionary.
419 | func (s *BlockState) LZ77Optimal(inStart, inEnd int) LZ77Store {
420 | 	// Dist to get to here with smallest cost.
421 | 	bestCost := uint64(math.MaxUint64)
422 | 	var lastCost uint64
423 | 	// Try randomizing the costs a bit once the size stabilizes.
424 | 	var randomize bool
425 | 
426 | 	ranState := newRanState()
427 | 
428 | 	// Do regular deflate, then loop multiple shortest path
429 | 	// runs, each time using the statistics of the previous run.
430 | 
431 | 	// Initial run.
432 | 	bestStore := s.LZ77Greedy(inStart, inEnd)
433 | 	bestStats := bestStore.statistics()
434 | 	lastStats := bestStats
435 | 
436 | 	// Repeat statistics with each time the cost model
437 | 	// from the previous stat run.
438 | 	for i := 0; i < s.options.NumIterations; i++ {
439 | 		store := s.lz77OptimalRun(inStart, inEnd, costStat, lastStats)
440 | 		cost := store.CalculateBlockSize(2)
441 | 		if s.options.VerboseMore || (s.options.Verbose && cost < bestCost) {
442 | 			fmt.Fprintf(os.Stderr, "Iteration %d: %d bit\n", i, int(cost))
443 | 		}
444 | 		stats := store.statistics()
445 | 		if cost < bestCost {
446 | 			// Copy to the output store.
447 | 			bestStore = store
448 | 			bestStats = stats
449 | 			bestCost = cost
450 | 		}
451 | 		if i > 5 && cost == lastCost {
452 | 			lastStats = bestStats
453 | 			lastStats.randomizeFreqs(ranState)
454 | 			lastStats.calculate()
455 | 			randomize = true
456 | 		} else {
457 | 			if randomize {
458 | 				// This makes it converge slower but better. Do it only once the
459 | 				// randomness kicks in so that if the user does few iterations, it gives a
460 | 				// better result sooner.
461 | 				stats = addWeightedFreqs(stats, 1.0, lastStats, 0.5)
462 | 				stats.calculate()
463 | 			}
464 | 			lastStats = stats
465 | 		}
466 | 		lastCost = cost
467 | 	}
468 | 	return bestStore
469 | }
470 | 
471 | // Does the same as LZ77Optimal, but optimized for the fixed tree of the
472 | // deflate standard.
473 | // The fixed tree never gives the best compression. But this gives the best
474 | // possible LZ77 encoding possible with the fixed tree.
475 | // This does not create or output any fixed tree, only LZ77 data optimized for
476 | // using with a fixed tree.
477 | // If inStart is larger than 0, it uses values before inStart as starting
478 | // dictionary.
479 | func (s *BlockState) LZ77OptimalFixed(inStart, inEnd int) LZ77Store {
480 | 	// Dist to get to here with smallest cost.
481 | 	// Shortest path for fixed tree This one should give the shortest possible
482 | 	// result for fixed tree, no repeated runs are needed since the tree is known.
483 | 	return s.lz77OptimalRun(inStart, inEnd, costFixed, nil)
484 | }
485 | 


--------------------------------------------------------------------------------
/zopfli/tree.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011 Google Inc. All Rights Reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 | 		http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 
16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 | */
19 | 
20 | package zopfli
21 | 
22 | import (
23 | 	"math"
24 | )
25 | 
26 | // Converts a series of Huffman tree bitLengths, to the bit values of the symbols.
27 | func lengthsToSymbols(lengths []uint, maxBits uint) (symbols []uint) {
28 | 	n := len(lengths)
29 | 	blCount := make([]uint, maxBits+1)
30 | 	nextCode := make([]uint, maxBits+1)
31 | 
32 | 	symbols = make([]uint, n)
33 | 
34 | 	// 1) Count the number of codes for each code length.
35 | 	// Let blCount[N] be the number of codes of length N, N >= 1.
36 | 	for bits := uint(0); bits <= maxBits; bits++ {
37 | 		blCount[bits] = 0
38 | 	}
39 | 	for i := 0; i < n; i++ {
40 | 		if lengths[i] > maxBits {
41 | 			panic("length is too large")
42 | 		}
43 | 		blCount[lengths[i]]++
44 | 	}
45 | 	// 2) Find the numerical value of the smallest code for each code length.
46 | 	var code uint
47 | 	blCount[0] = 0
48 | 	for bits := uint(1); bits <= maxBits; bits++ {
49 | 		code = (code + blCount[bits-1]) << 1
50 | 		nextCode[bits] = code
51 | 	}
52 | 	// 3) Assign numerical values to all codes, using consecutive values for
53 | 	// all codes of the same length with the base values determined at step 2.
54 | 	for i := 0; i < n; i++ {
55 | 		len := lengths[i]
56 | 		if len != 0 {
57 | 			symbols[i] = nextCode[len]
58 | 			nextCode[len]++
59 | 		}
60 | 	}
61 | 	return symbols
62 | }
63 | 
64 | // Calculates the entropy of each symbol, based on the counts of each symbol. The
65 | // result is similar to the result of CalculateBitLengths, but with the
66 | // actual theoritical bit lengths according to the entropy. Since the resulting
67 | // values are fractional, they cannot be used to encode the tree specified by
68 | // DEFLATE.
69 | func CalculateEntropy(count []float64) (bitLengths []float64) {
70 | 	var sum, log2sum float64
71 | 	n := len(count)
72 | 	for i := 0; i < n; i++ {
73 | 		sum += count[i]
74 | 	}
75 | 	if sum == 0 {
76 | 		log2sum = math.Log2(float64(n))
77 | 	} else {
78 | 		log2sum = math.Log2(sum)
79 | 	}
80 | 	bitLengths = make([]float64, n)
81 | 	for i := 0; i < n; i++ {
82 | 		// When the count of the symbol is 0, but its cost is requested anyway, it
83 | 		// means the symbol will appear at least once anyway, so give it the cost as if
84 | 		// its count is 1.
85 | 		if count[i] == 0 {
86 | 			bitLengths[i] = log2sum
87 | 		} else {
88 | 			bitLengths[i] = math.Log2(sum / count[i])
89 | 		}
90 | 		if !(bitLengths[i] >= 0) {
91 | 			panic("bit length is not positive")
92 | 		}
93 | 	}
94 | 	return bitLengths
95 | }
96 | 


--------------------------------------------------------------------------------
/zopfli/util.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | package zopfli
 21 | 
 22 | const (
 23 | 	// Minimum and maximum length that can be encoded in deflate.
 24 | 	MAX_MATCH = 258
 25 | 	MIN_MATCH = 3
 26 | 
 27 | 	// The window size for deflate. Must be a power of two. This should be
 28 | 	// 32768, the maximum possible by the deflate spec. Anything less hurts
 29 | 	// compression more than speed.
 30 | 	WINDOW_SIZE = 32768
 31 | 
 32 | 	// The window mask used to wrap indices into the window. This is why the
 33 | 	// window size must be a power of two.
 34 | 	WINDOW_MASK = (WINDOW_SIZE - 1)
 35 | 
 36 | 	// A block structure of huge, non-smart, blocks to divide the input into, to allow
 37 | 	// operating on huge files without exceeding memory, such as the 1GB wiki9 corpus.
 38 | 	// The whole compression algorithm, including the smarter block splitting, will
 39 | 	// be executed independently on each huge block.
 40 | 	// Dividing into huge blocks hurts compression, but not much relative to the size.
 41 | 	// Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks.
 42 | 	MASTER_BLOCK_SIZE = 20000000
 43 | 
 44 | 	// For longest match cache. max 256. Uses huge amounts of memory but makes it
 45 | 	// faster. Uses this many times three bytes per single byte of the input data.
 46 | 	// This is so because longest match finding has to find the exact distance
 47 | 	// that belongs to each length for the best lz77 strategy.
 48 | 	// Good values: e.g. 5, 8.
 49 | 	CACHE_LENGTH = 8
 50 | 
 51 | 	// limit the max hash chain hits for this hash value. This has an effect only
 52 | 	// on files where the hash value is the same very often. On these files, this
 53 | 	// gives worse compression (the value should ideally be 32768, which is the
 54 | 	// WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it
 55 | 	// faster on some specific files.
 56 | 	// Good value: e.g. 8192.
 57 | 	MAX_CHAIN_HITS = 8192
 58 | 
 59 | 	// Whether to use the longest match cache for FindLongestMatch. This cache
 60 | 	// consumes a lot of memory but speeds it up. No effect on compression size.
 61 | 	LONGEST_MATCH_CACHE = true
 62 | 
 63 | 	// Enable to remember amount of successive identical bytes in the hash chain for
 64 | 	// finding longest match
 65 | 	// required for HASH_SAME_HASH and SHORTCUT_LONG_REPETITIONS
 66 | 	// This has no effect on the compression result, and enabling it increases speed.
 67 | 	HASH_SAME = true
 68 | 
 69 | 	// Switch to a faster hash based on the info from HASH_SAME once the
 70 | 	// best length so far is long enough. This is way faster for files with lots of
 71 | 	// identical bytes, on which the compressor is otherwise too slow. Regular files
 72 | 	// are unaffected or maybe a tiny bit slower.
 73 | 	// This has no effect on the compression result, only on speed.
 74 | 	HASH_SAME_HASH = true
 75 | 
 76 | 	// Enable this, to avoid slowness for files which are a repetition of the same
 77 | 	// character more than a multiple of MAX_MATCH times. This should not affect
 78 | 	// the compression result.
 79 | 	SHORTCUT_LONG_REPETITIONS = true
 80 | 
 81 | 	// Whether to use lazy matching in the greedy LZ77 implementation. This gives a
 82 | 	// better result of LZ77Greedy, but the effect this has on the optimal LZ77
 83 | 	// varies from file to file.
 84 | 	LAZY_MATCHING = true
 85 | )
 86 | 
 87 | // Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec.
 88 | func (pair lz77Pair) distExtraBits() uint16 {
 89 | 	dist := pair.dist
 90 | 	if dist < 5 {
 91 | 		return 0
 92 | 	} else if dist < 9 {
 93 | 		return 1
 94 | 	} else if dist < 17 {
 95 | 		return 2
 96 | 	} else if dist < 33 {
 97 | 		return 3
 98 | 	} else if dist < 65 {
 99 | 		return 4
100 | 	} else if dist < 129 {
101 | 		return 5
102 | 	} else if dist < 257 {
103 | 		return 6
104 | 	} else if dist < 513 {
105 | 		return 7
106 | 	} else if dist < 1025 {
107 | 		return 8
108 | 	} else if dist < 2049 {
109 | 		return 9
110 | 	} else if dist < 4097 {
111 | 		return 10
112 | 	} else if dist < 8193 {
113 | 		return 11
114 | 	} else if dist < 16385 {
115 | 		return 12
116 | 	}
117 | 	return 13
118 | }
119 | 
120 | // Gets value of the extra bits for the given dist, cfr. the DEFLATE spec.
121 | func (pair lz77Pair) distExtraBitsValue() uint16 {
122 | 	dist := pair.dist
123 | 	switch {
124 | 	case dist < 5:
125 | 		return 0
126 | 	case dist < 9:
127 | 		return (dist - 5) & 1
128 | 	case dist < 17:
129 | 		return (dist - 9) & 3
130 | 	case dist < 33:
131 | 		return (dist - 17) & 7
132 | 	case dist < 65:
133 | 		return (dist - 33) & 15
134 | 	case dist < 129:
135 | 		return (dist - 65) & 31
136 | 	case dist < 257:
137 | 		return (dist - 129) & 63
138 | 	case dist < 513:
139 | 		return (dist - 257) & 127
140 | 	case dist < 1025:
141 | 		return (dist - 513) & 255
142 | 	case dist < 2049:
143 | 		return (dist - 1025) & 511
144 | 	case dist < 4097:
145 | 		return (dist - 2049) & 1023
146 | 	case dist < 8193:
147 | 		return (dist - 4097) & 2047
148 | 	case dist < 16385:
149 | 		return (dist - 8193) & 4095
150 | 	}
151 | 	return dist - 16385&8191
152 | }
153 | 
154 | // Gets the symbol for the given dist, cfr. the DEFLATE spec.
155 | func (pair lz77Pair) distSymbol() uint16 {
156 | 	dist := pair.dist
157 | 	if dist < 193 {
158 | 		if dist < 13 {
159 | 			// dist 0..13.
160 | 			if dist < 5 {
161 | 				return dist - 1
162 | 			} else if dist < 7 {
163 | 				return 4
164 | 			} else if dist < 9 {
165 | 				return 5
166 | 			}
167 | 			return 6
168 | 		} else {
169 | 			// dist 13..193.
170 | 			if dist < 17 {
171 | 				return 7
172 | 			} else if dist < 25 {
173 | 				return 8
174 | 			} else if dist < 33 {
175 | 				return 9
176 | 			} else if dist < 49 {
177 | 				return 10
178 | 			} else if dist < 65 {
179 | 				return 11
180 | 			} else if dist < 97 {
181 | 				return 12
182 | 			} else if dist < 129 {
183 | 				return 13
184 | 			}
185 | 			return 14
186 | 		}
187 | 	}
188 | 	if dist < 2049 {
189 | 		// dist 193..2049.
190 | 		if dist < 257 {
191 | 			return 15
192 | 		} else if dist < 385 {
193 | 			return 16
194 | 		} else if dist < 513 {
195 | 			return 17
196 | 		} else if dist < 769 {
197 | 			return 18
198 | 		} else if dist < 1025 {
199 | 			return 19
200 | 		} else if dist < 1537 {
201 | 			return 20
202 | 		}
203 | 		return 21
204 | 	}
205 | 	// dist 2049..32768.
206 | 	if dist < 3073 {
207 | 		return 22
208 | 	} else if dist < 4097 {
209 | 		return 23
210 | 	} else if dist < 6145 {
211 | 		return 24
212 | 	} else if dist < 8193 {
213 | 		return 25
214 | 	} else if dist < 12289 {
215 | 		return 26
216 | 	} else if dist < 16385 {
217 | 		return 27
218 | 	} else if dist < 24577 {
219 | 		return 28
220 | 	}
221 | 	return 29
222 | }
223 | 
224 | var lengthExtraBitsTable [259]uint16 = [259]uint16{
225 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
226 | 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
227 | 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
228 | 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
229 | 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
230 | 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
231 | 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
232 | 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
233 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
234 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
235 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
236 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
237 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
238 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
239 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
240 | 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0,
241 | }
242 | 
243 | // Gets the amount of extra bits for the given length, cfr. the DEFLATE spec.
244 | func (pair lz77Pair) lengthExtraBits() uint16 {
245 | 	return lengthExtraBitsTable[pair.litLen]
246 | }
247 | 
248 | var lengthExtraBitsValueTable [259]uint16 = [259]uint16{
249 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0,
250 | 	1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
251 | 	6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6,
252 | 	7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
253 | 	13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,
254 | 	3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
255 | 	10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
256 | 	29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
257 | 	18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6,
258 | 	7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
259 | 	27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
260 | 	16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0,
261 | }
262 | 
263 | // Gets value of the extra bits for the given length, cfr. the DEFLATE spec.
264 | func (pair lz77Pair) lengthExtraBitsValue() uint16 {
265 | 	return lengthExtraBitsValueTable[pair.litLen]
266 | }
267 | 
268 | var lengthSymbolTable [259]uint16 = [259]uint16{
269 | 	0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264,
270 | 	265, 265, 266, 266, 267, 267, 268, 268,
271 | 	269, 269, 269, 269, 270, 270, 270, 270,
272 | 	271, 271, 271, 271, 272, 272, 272, 272,
273 | 	273, 273, 273, 273, 273, 273, 273, 273,
274 | 	274, 274, 274, 274, 274, 274, 274, 274,
275 | 	275, 275, 275, 275, 275, 275, 275, 275,
276 | 	276, 276, 276, 276, 276, 276, 276, 276,
277 | 	277, 277, 277, 277, 277, 277, 277, 277,
278 | 	277, 277, 277, 277, 277, 277, 277, 277,
279 | 	278, 278, 278, 278, 278, 278, 278, 278,
280 | 	278, 278, 278, 278, 278, 278, 278, 278,
281 | 	279, 279, 279, 279, 279, 279, 279, 279,
282 | 	279, 279, 279, 279, 279, 279, 279, 279,
283 | 	280, 280, 280, 280, 280, 280, 280, 280,
284 | 	280, 280, 280, 280, 280, 280, 280, 280,
285 | 	281, 281, 281, 281, 281, 281, 281, 281,
286 | 	281, 281, 281, 281, 281, 281, 281, 281,
287 | 	281, 281, 281, 281, 281, 281, 281, 281,
288 | 	281, 281, 281, 281, 281, 281, 281, 281,
289 | 	282, 282, 282, 282, 282, 282, 282, 282,
290 | 	282, 282, 282, 282, 282, 282, 282, 282,
291 | 	282, 282, 282, 282, 282, 282, 282, 282,
292 | 	282, 282, 282, 282, 282, 282, 282, 282,
293 | 	283, 283, 283, 283, 283, 283, 283, 283,
294 | 	283, 283, 283, 283, 283, 283, 283, 283,
295 | 	283, 283, 283, 283, 283, 283, 283, 283,
296 | 	283, 283, 283, 283, 283, 283, 283, 283,
297 | 	284, 284, 284, 284, 284, 284, 284, 284,
298 | 	284, 284, 284, 284, 284, 284, 284, 284,
299 | 	284, 284, 284, 284, 284, 284, 284, 284,
300 | 	284, 284, 284, 284, 284, 284, 284, 285,
301 | }
302 | 
303 | // Gets the symbol for the given length, cfr. the DEFLATE spec.
304 | // Returns the symbol in the range [257-285] (inclusive)
305 | func (pair lz77Pair) lengthSymbol() uint16 {
306 | 	return lengthSymbolTable[pair.litLen]
307 | }
308 | 
309 | func DefaultOptions() (options Options) {
310 | 	options.Verbose = false
311 | 	options.VerboseMore = false
312 | 	options.NumIterations = 15
313 | 	options.BlockSplitting = true
314 | 	options.BlockSplittingLast = false
315 | 	options.BlockSplittingMax = 15
316 | 	options.BlockType = DYNAMIC_BLOCK
317 | 	return options
318 | }
319 | 


--------------------------------------------------------------------------------
/zopfli/zlib_container.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2013 Google Inc. All Rights Reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 
16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 | */
19 | 
20 | package zopfli
21 | 
22 | import (
23 | 	"fmt"
24 | 	"hash/adler32"
25 | 	"io"
26 | 	"os"
27 | )
28 | 
29 | func ZlibCompress(options *Options, in []byte, out io.Writer) error {
30 | 	var counter countingWriter
31 | 	if options.Verbose {
32 | 		counter = newCountingWriter(out)
33 | 		out = &counter
34 | 	}
35 | 
36 | 	const cmf = 120 /* CM 8, CINFO 7. See zlib spec.*/
37 | 	const flevel = 0
38 | 	const fdict = 0
39 | 	var cmfflg uint16 = 256*cmf + fdict*32 + flevel*64
40 | 	fcheck := 31 - cmfflg%31
41 | 	cmfflg += fcheck
42 | 	flagBytes := []byte{
43 | 		byte(cmfflg >> 8),
44 | 		byte(cmfflg),
45 | 	}
46 | 	_, flagErr := out.Write(flagBytes)
47 | 	if flagErr != nil {
48 | 		return flagErr
49 | 	}
50 | 
51 | 	z := NewDeflator(out, options)
52 | 	writeErr := z.Deflate(true, in)
53 | 	if writeErr != nil {
54 | 		return writeErr
55 | 	}
56 | 
57 | 	checksum := adler32.New()
58 | 	checksum.Write(in)
59 | 	final := checksum.Sum32()
60 | 	checksumBytes := []byte{
61 | 		byte(final >> 24),
62 | 		byte(final >> 16),
63 | 		byte(final >> 8),
64 | 		byte(final),
65 | 	}
66 | 	_, checksumErr := out.Write(checksumBytes)
67 | 	if checksumErr != nil {
68 | 		return checksumErr
69 | 	}
70 | 
71 | 	if options.Verbose {
72 | 		inSize := len(in)
73 | 		outSize := counter.written
74 | 		fmt.Fprintf(os.Stderr,
75 | 			"Original Size: %d, Zlib: %d, Compression: %f%% Removed\n",
76 | 			inSize, outSize,
77 | 			100*float64(inSize-outSize)/float64(inSize))
78 | 	}
79 | 	return nil
80 | }
81 | 


--------------------------------------------------------------------------------
/zopfli/zopfli.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011 Google Inc. All Rights Reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 
16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 | */
19 | 
20 | package zopfli
21 | 
22 | // Options used throughout the program.
23 | type Options struct {
24 | 	// Whether to print output
25 | 	Verbose bool
26 | 
27 | 	// Whether to print more detailed output
28 | 	VerboseMore bool
29 | 
30 | 	// Maximum amount of times to rerun forward and backward pass to optimize
31 | 	// LZ77 compression cost. Good values: 10, 15 for small files, 5 for files
32 | 	// over several MB in size or it will be too slow.
33 | 	NumIterations int
34 | 
35 | 	// If true, splits the data in multiple deflate blocks with optimal choice
36 | 	// for the block boundaries. Block splitting gives better compression. Default:
37 | 	// true.
38 | 	BlockSplitting bool
39 | 
40 | 	// If true, chooses the optimal block split points only after doing the iterative
41 | 	// LZ77 compression. If false, chooses the block split points first, then does
42 | 	// iterative LZ77 on each individual block. Depending on the file, either first
43 | 	// or last gives the best compression. Default: false.
44 | 	BlockSplittingLast bool
45 | 
46 | 	// Maximum amount of blocks to split into (0 for unlimited, but this can give
47 | 	// extreme results that hurt compression on some files). Default value: 15.
48 | 	BlockSplittingMax int
49 | 
50 | 	// The deflate block type. Use 2 for best compression.
51 | 	//	 -0: non compressed blocks (00)
52 | 	//	 -1: blocks with fixed tree (01)
53 | 	//	 -2: blocks with dynamic tree (10)
54 | 	BlockType byte
55 | }
56 | 
57 | // Output format
58 | const (
59 | 	FORMAT_GZIP = iota
60 | 	FORMAT_ZLIB
61 | 	FORMAT_DEFLATE
62 | )
63 | 
64 | // Block type
65 | const (
66 | 	UNCOMPRESSED_BLOCK = iota
67 | 	FIXED_BLOCK
68 | 	DYNAMIC_BLOCK
69 | )
70 | 


--------------------------------------------------------------------------------
/zopfli/zopfli_lib.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2011 Google Inc. All Rights Reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 
16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 | */
19 | 
20 | package zopfli
21 | 
22 | import (
23 | 	"io"
24 | )
25 | 
26 | type countingWriter struct {
27 | 	w       io.Writer
28 | 	written int
29 | }
30 | 
31 | func newCountingWriter(w io.Writer) countingWriter {
32 | 	return countingWriter{w, 0}
33 | }
34 | 
35 | func (cw *countingWriter) Write(p []byte) (int, error) {
36 | 	cw.written += len(p)
37 | 	return cw.w.Write(p)
38 | }
39 | 
40 | func Compress(options *Options, outputType int, in []byte, out io.Writer) error {
41 | 	switch outputType {
42 | 	case FORMAT_GZIP:
43 | 		return GzipCompress(options, in, out)
44 | 	case FORMAT_ZLIB:
45 | 		return ZlibCompress(options, in, out)
46 | 	case FORMAT_DEFLATE:
47 | 		return DeflateCompress(options, in, out)
48 | 	}
49 | 	panic("Unknown output type")
50 | }
51 | 
52 | func DeflateCompress(options *Options, in []byte, out io.Writer) error {
53 | 	z := NewDeflator(out, options)
54 | 	deflateErr := z.Deflate(true, in)
55 | 	if deflateErr != nil {
56 | 		return deflateErr
57 | 	}
58 | 
59 | 	return nil
60 | }
61 | 


--------------------------------------------------------------------------------
/zopfli_bin.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2011 Google Inc. All Rights Reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 		http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | 
 16 | Author: lode.vandevenne@gmail.com (Lode Vandevenne)
 17 | Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
 18 | */
 19 | 
 20 | /*
 21 | Zopfli compressor program. It can output gzip-, zlib- or deflate-compatible
 22 | data. By default it creates a .gz file. This tool can only compress, not
 23 | decompress. Decompression can be done by any standard gzip, zlib or deflate
 24 | decompressor.
 25 | */
 26 | 
 27 | package main
 28 | 
 29 | import (
 30 | 	"bytes"
 31 | 	"flag"
 32 | 	"fmt"
 33 | 	"github.com/foobaz/go-zopfli/zopfli"
 34 | 	"io"
 35 | 	"io/ioutil"
 36 | 	"os"
 37 | 	"runtime"
 38 | 	"runtime/pprof"
 39 | )
 40 | 
 41 | var parallel bool
 42 | 
 43 | // outfilename: filename to write output to, or 0 to write to stdout instead
 44 | func compressFile(options *zopfli.Options, outputType int,
 45 | 	inFileName, outFileName string) error {
 46 | 	in, inErr := ioutil.ReadFile(inFileName)
 47 | 	if inErr != nil {
 48 | 		return inErr
 49 | 	}
 50 | 
 51 | 	var out io.WriteCloser
 52 | 	if outFileName == "" {
 53 | 		out = os.Stdout
 54 | 	} else {
 55 | 		var outErr error
 56 | 		out, outErr = os.Create(outFileName)
 57 | 		if outErr != nil {
 58 | 			return outErr
 59 | 		}
 60 | 		defer out.Close()
 61 | 	}
 62 | 
 63 | 	nJobs := 1
 64 | 	if parallel {
 65 | 		nJobs = runtime.GOMAXPROCS(-1)
 66 | 	}
 67 | 	chunk := len(in) / nJobs
 68 | 	type job struct {
 69 | 		in	[]byte
 70 | 		w    *bytes.Buffer
 71 | 		err  error
 72 | 		done chan struct{}
 73 | 	}
 74 | 	jobs := make([]job, nJobs)
 75 | 
 76 | 	offset := 0
 77 | 	for jbnum := 0; jbnum < nJobs; jbnum++ {
 78 | 		end := offset + chunk
 79 | 		if end > len(in) {
 80 | 			end = len(in)
 81 | 		}
 82 | 
 83 | 		jobs[jbnum].in = in[offset:end]
 84 | 		jobs[jbnum].w = new(bytes.Buffer)
 85 | 		jobs[jbnum].done = make(chan struct{})
 86 | 
 87 | 		go func(j *job) {
 88 | 			j.err = zopfli.Compress(options, outputType, j.in, j.w)
 89 | 			close(j.done)
 90 | 		}(&jobs[jbnum])
 91 | 
 92 | 		offset += chunk
 93 | 	}
 94 | 
 95 | 	// Collect the output, concatenate into the output io.Writer
 96 | 	// gzip file format supports concatenation transparently:
 97 | 	// https://www.gnu.org/software/gzip/manual/gzip.html#Advanced-usage
 98 | 	for i := range jobs {
 99 | 		// Note: It seems like the above could be "for _,j := range jobs",
100 | 		// but that would be a data race, because j.err would have an old value
101 | 		// when we wake up from sleeping on done. Instead, use an array index
102 | 		// so that each access to jobs[i] respects the happens-before ordering.
103 | 		<-jobs[i].done
104 | 		if jobs[i].err != nil {
105 | 			return jobs[i].err
106 | 		}
107 | 		_, err := io.Copy(out, jobs[i].w)
108 | 		if err != nil {
109 | 			return err
110 | 		}
111 | 	}
112 | 
113 | 	return nil
114 | }
115 | 
116 | func main() {
117 | 	options := zopfli.DefaultOptions()
118 | 
119 | 	flag.BoolVar(&options.Verbose, "v", options.Verbose, "verbose mode")
120 | 	flag.BoolVar(&options.VerboseMore, "vv", options.VerboseMore, "more verbose mode")
121 | 	outputToStdout := flag.Bool("c", false, "write the result on standard output, instead of disk")
122 | 	deflate := flag.Bool("deflate", false, "output to deflate format instead of gzip")
123 | 	zlib := flag.Bool("zlib", false, "output to zlib format instead of gzip")
124 | 	gzip := flag.Bool("gzip", true, "output to gzip format")
125 | 	flag.BoolVar(&options.BlockSplittingLast, "splitlast", options.BlockSplittingLast, "do block splitting last instead of first")
126 | 	flag.IntVar(&options.NumIterations, "i", options.NumIterations, "perform # iterations (default 15). More gives more compression but is slower. Examples: -i=10, -i=50, -i=1000")
127 | 	var cpuProfile string
128 | 	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to file")
129 | 	flag.BoolVar(&parallel, "parallel", false, "compress in parallel (gzip only); use GOMAXPROCS to set the amount of parallelism. More parallelism = smaller independent chunks, thus worse compression ratio.")
130 | 	flag.Parse()
131 | 
132 | 	if parallel && !*gzip {
133 | 		fmt.Fprintf(os.Stderr, "Error: parallel is only supported with gzip containers.")
134 | 		return
135 | 	}
136 | 
137 | 	if options.VerboseMore {
138 | 		options.Verbose = true
139 | 	}
140 | 	var outputType int
141 | 	if *deflate && !*zlib && !*gzip {
142 | 		outputType = zopfli.FORMAT_DEFLATE
143 | 	} else if *zlib && !*deflate && !*gzip {
144 | 		outputType = zopfli.FORMAT_ZLIB
145 | 	} else {
146 | 		outputType = zopfli.FORMAT_GZIP
147 | 	}
148 | 
149 | 	if options.NumIterations < 1 {
150 | 		fmt.Fprintf(os.Stderr, "Error: must have 1 or more iterations")
151 | 		return
152 | 	}
153 | 
154 | 	var allFileNames []string
155 | 	if *outputToStdout {
156 | 		allFileNames = append(allFileNames, "")
157 | 	} else {
158 | 		allFileNames = flag.Args()
159 | 	}
160 | 	if len(allFileNames) <= 0 {
161 | 		fmt.Fprintf(os.Stderr, "Please provide filename\n")
162 | 	}
163 | 	if cpuProfile != "" {
164 | 		f, err := os.Create(cpuProfile)
165 | 		if err == nil {
166 | 			pprof.StartCPUProfile(f)
167 | 			defer f.Close()
168 | 			defer pprof.StopCPUProfile()
169 | 		}
170 | 	}
171 | 	for _, fileName := range allFileNames {
172 | 		var outFileName string
173 | 		if *outputToStdout {
174 | 			outFileName = ""
175 | 		} else {
176 | 			switch outputType {
177 | 			case zopfli.FORMAT_GZIP:
178 | 				outFileName = fileName + ".gz"
179 | 			case zopfli.FORMAT_ZLIB:
180 | 				outFileName = fileName + ".zlib"
181 | 			case zopfli.FORMAT_DEFLATE:
182 | 				outFileName = fileName + ".deflate"
183 | 			default:
184 | 				panic("Unknown output type")
185 | 			}
186 | 			if options.Verbose {
187 | 				fmt.Fprintf(os.Stderr, "Saving to: %s\n", outFileName)
188 | 			}
189 | 		}
190 | 		compressErr := compressFile(&options, outputType, fileName, outFileName)
191 | 		if compressErr != nil {
192 | 			fmt.Fprintf(os.Stderr, "could not compress %s: %v\n", fileName, compressErr)
193 | 		}
194 | 	}
195 | }
196 | 


--------------------------------------------------------------------------------