├── .github
    └── workflows
    │   └── go.yml
├── .gitignore
├── LICENSE
├── README.md
├── bench_test.go
├── clz.go
├── clz_amd64.s
├── clz_asm.go
├── clz_test.go
├── cmd
    └── fpc
    │   └── main.go
├── decode.go
├── decode_test.go
├── doc.go
├── encode.go
├── encode_test.go
├── go.mod
├── golden
    ├── README
    ├── test.trace.fpc
    └── test_decompressed.data
├── golden_test.go
├── predictor.go
├── reader.go
├── reader_test.go
├── reference_test.go
├── utils.go
├── writer.go
└── writer_test.go


/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     name: Build
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         go_version: [1.12, 1.13, 1.14]
17 |     steps:
18 |     - name: Set up Go 1.x
19 |       uses: actions/setup-go@v2
20 |       with:
21 |         go-version: ${{ matrix.go_version }}
22 |       id: go
23 | 
24 |     - name: Check out code
25 |       uses: actions/checkout@v2
26 | 
27 |     - name: Get dependencies
28 |       run: |
29 |         go get -v -t -d ./...
30 | 
31 |     - name: Build
32 |       run: go build -v .
33 | 
34 |     - name: Test
35 |       run: go test -v .
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.test
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Spencer Nelson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fpc #
 2 | 
 3 | [![GoDoc](http://godoc.org/github.com/spenczar/fpc?status.svg)](http://godoc.org/github.com/spenczar/fpc)
 4 | 
 5 | fpc is a Go implementation of Burtscher and Ratanaworabhan's ['FPC' algorithm](http://cs.txstate.edu/~burtscher/research/FPC/) for compressing a stream of floating point data.
 6 | 
 7 | ## Why? ##
 8 | 
 9 | The FPC algorithm can losslessly encode and decode huge amounts of
10 | floating-point data very quickly. It scales well to
11 | gigabyte-per-second streams. Compression ratios are better than just
12 | about any generic compressor like gzip or bzip, and compression and
13 | decompression throughput are much better (like, 8x to 300x faster)
14 | than other algorithms. For more on this,
15 | [the paper introducing FPC](http://cs.txstate.edu/~burtscher/papers/dcc07a.pdf)
16 | is really readable - I highly recommend it!
17 | 
18 | ## Usage ##
19 | 
20 | fpc provides a `Writer` and a `Reader`, following the pattern set by
21 | the Go standard library's compression packages. The Writer wraps an
22 | io.Writer that you want to write compressed data into, and the Reader
23 | wraps an io.Reader that you want to read compressed data out of.
24 | 
25 | Since FPC encodes streams of float64s, they impose some additional
26 | expectations on callers: when calling `Reader.Read(p []byte)` or
27 | `Writer.Write(p []byte)`, the length of `p` must be a multiple of 8,
28 | to match the expectation that the bytes represent a stream of 8-byte
29 | float64s.
30 | 
31 | In addition, utility methods are provided: `Reader` has a
32 | `ReadFloats(fs []float64) (int, error)` method which will read bytes
33 | from its underlying source, parse them as float64s, put them in `fs`,
34 | and return the number of float64s it placed in fs. When it reaches the
35 | end of the compressed stream, it will return `0, io.EOF`.
36 | 
37 | Similarly, `Writer` has a `WriteFloat(f float64) error` method which
38 | writes a single float64 to the compressed stream.
39 | 
40 | ## Performance ##
41 | 
42 | In benchmarks on a fairly vanilla laptop, reading or writing from an
43 | in-memory stream, `fpc` is able to encode at about 1.2 gigabytes per
44 | second, and it can decode at about 0.9 gigabytes per
45 | second. Benchmarks can be run on your own hardware with `go test
46 | -bench "Read|Write" .`.
47 | 


--------------------------------------------------------------------------------
/bench_test.go:
--------------------------------------------------------------------------------
  1 | package fpc
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"io/ioutil"
  6 | 	"math"
  7 | 	"math/rand"
  8 | 	"testing"
  9 | )
 10 | 
 11 | func generateValues(n int) []uint64 {
 12 | 	vals := make([]uint64, n)
 13 | 	// generate up to 1M random values
 14 | 	for i := range vals {
 15 | 		vals[i] = math.Float64bits(rand.ExpFloat64())
 16 | 	}
 17 | 	return vals
 18 | }
 19 | 
 20 | func min(x, y int) int {
 21 | 	if x > y {
 22 | 		return y
 23 | 	}
 24 | 	return x
 25 | }
 26 | 
 27 | func BenchmarkBlockEncode(b *testing.B) {
 28 | 	w := ioutil.Discard
 29 | 	e := newBlockEncoder(w, DefaultCompression)
 30 | 	e.enc.fcm = &mockPredictor{0xFABF}
 31 | 	e.enc.dfcm = &mockPredictor{0xFABF}
 32 | 	b.SetBytes(8)
 33 | 	for i := 0; i < b.N; i++ {
 34 | 		e.encodeFloat(0xFAFF * float64(i))
 35 | 	}
 36 | }
 37 | 
 38 | func BenchmarkLeadingZeroBytes(b *testing.B) {
 39 | 	b.SetBytes(8)
 40 | 	for i := 0; i < b.N; i++ {
 41 | 		clzBytes(uint64(i * 0xDEADBEEF))
 42 | 	}
 43 | }
 44 | 
 45 | func BenchmarkPairEncode(b *testing.B) {
 46 | 	e := newEncoder(DefaultCompression)
 47 | 	e.fcm = &mockPredictor{0xFABF}
 48 | 	e.dfcm = &mockPredictor{0xFABF}
 49 | 	b.SetBytes(16)
 50 | 	b.ResetTimer()
 51 | 	for i := 0; i < b.N; i++ {
 52 | 		e.encode(0xFAFF*uint64(i), 0x1234*uint64(i))
 53 | 	}
 54 | }
 55 | 
 56 | func BenchmarkEncodeNonzero(b *testing.B) {
 57 | 	e := newEncoder(DefaultCompression)
 58 | 	buf := make([]byte, 8)
 59 | 	b.SetBytes(8)
 60 | 	b.ResetTimer()
 61 | 	for i := 0; i < b.N; i++ {
 62 | 		e.encodeNonzero(uint64(i), uint8(i)%8, buf)
 63 | 	}
 64 | }
 65 | 
 66 | func BenchmarkComputeDiff(b *testing.B) {
 67 | 	e := newEncoder(DefaultCompression)
 68 | 	b.SetBytes(8)
 69 | 	b.ResetTimer()
 70 | 
 71 | 	for i := 0; i < b.N; i++ {
 72 | 		e.computeDiff(uint64(i))
 73 | 	}
 74 | }
 75 | 
 76 | func BenchmarkFCM(b *testing.B) {
 77 | 	fcm := newFCM(1 << DefaultCompression)
 78 | 	b.SetBytes(8)
 79 | 	b.ResetTimer()
 80 | 	for i := 0; i < b.N; i++ {
 81 | 		fcm.predict()
 82 | 		fcm.update(uint64(i))
 83 | 	}
 84 | }
 85 | 
 86 | func BenchmarkDFCM(b *testing.B) {
 87 | 	dfcm := newDFCM(1 << DefaultCompression)
 88 | 	b.SetBytes(8)
 89 | 	b.ResetTimer()
 90 | 	for i := 0; i < b.N; i++ {
 91 | 		dfcm.predict()
 92 | 		dfcm.update(uint64(i))
 93 | 	}
 94 | }
 95 | 
 96 | var benchcase = reftestcase{
 97 | 	comp:         3,
 98 | 	uncompressed: []float64{1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 100, 1000, 10000, 100000},
 99 | 	compressed: []byte{
100 | 		0x03, 0x0a, 0x00, 0x00, 0x53, 0x00, 0x00, 0x77,
101 | 		0xee, 0xee, 0xee, 0xee, 0xf1, 0x68, 0xe3, 0x88,
102 | 		0xb5, 0xf8, 0xe4, 0x3e, 0x2d, 0x43, 0x1c, 0xeb,
103 | 		0xe2, 0x36, 0x1a, 0x3f, 0xd1, 0xea, 0xed, 0x39,
104 | 		0xaf, 0x54, 0x4a, 0x87, 0xbd, 0x5f, 0x95, 0xac,
105 | 		0x18, 0xd4, 0xe1, 0x8d, 0x37, 0xde, 0x78, 0xe3,
106 | 		0x3d, 0x69, 0x00, 0x6f, 0x81, 0x04, 0xc5, 0x1f,
107 | 		0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7f, 0x3c,
108 | 		0xda, 0x38, 0x62, 0x2d, 0x7e, 0x01, 0x00, 0x00,
109 | 		0x00, 0x00, 0x00, 0x08, 0x06, 0x00, 0x00, 0x00,
110 | 		0x00, 0x00, 0xba, 0x0f},
111 | }
112 | 
113 | func BenchmarkReadFloat(b *testing.B) {
114 | 	b.SetBytes(int64(len(benchcase.compressed)))
115 | 	in := bytes.NewBuffer(benchcase.compressed)
116 | 
117 | 	r := NewReader(in)
118 | 	b.ResetTimer()
119 | 	for i := 0; i < b.N; i++ {
120 | 		r.ReadFloat()
121 | 	}
122 | }
123 | 
124 | func BenchmarkReadFloats(b *testing.B) {
125 | 	b.SetBytes(int64(len(benchcase.compressed)))
126 | 	in := bytes.NewBuffer(benchcase.compressed)
127 | 	out := make([]float64, len(benchcase.uncompressed))
128 | 
129 | 	b.ResetTimer()
130 | 	for i := 0; i < b.N; i++ {
131 | 		r := NewReader(in)
132 | 		r.ReadFloats(out)
133 | 		in.Reset()
134 | 	}
135 | }
136 | 
137 | func BenchmarkWriter(b *testing.B) {
138 | 	b.SetBytes(int64(len(benchcase.uncompressed) * 8))
139 | 	w, _ := NewWriterLevel(ioutil.Discard, int(benchcase.comp))
140 | 	b.ResetTimer()
141 | 	for i := 0; i < b.N; i++ {
142 | 		w.WriteFloat(benchcase.uncompressed[i%len(benchcase.uncompressed)])
143 | 	}
144 | }
145 | 


--------------------------------------------------------------------------------
/clz.go:
--------------------------------------------------------------------------------
 1 | // +build !amd64
 2 | 
 3 | package fpc
 4 | 
 5 | func clzBytes(val uint64) uint64 {
 6 | 	if val == 0 {
 7 | 		return 8
 8 | 	}
 9 | 	var i uint64
10 | 	// 'while top byte is zero'
11 | 	for i = 0; val&0xFF00000000000000 == 0; i++ {
12 | 		val <<= 8
13 | 	}
14 | 	return i
15 | }
16 | 


--------------------------------------------------------------------------------
/clz_amd64.s:
--------------------------------------------------------------------------------
 1 | // +build amd64
 2 | 
 3 | // func clzBytes(val uint64) uint64
 4 | TEXT ·clzBytes(SB),$0
 5 |         MOVQ    val+0(FP), AX
 6 |         BSWAPQ  AX      // Reverse order of val
 7 |         BSFQ    AX, AX  // Get index of highest set bit in val
 8 |         JZ      zero    // BSFQ returns 0 if no bits are set to 1. In that case, return 8.
 9 |         SHRQ    $3, AX  // Divide by 8 to get bytes
10 |         MOVQ    AX, ret+8(FP)
11 |         RET
12 | zero:
13 |         MOVQ    $8, ret+8(FP)
14 |         RET
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/clz_asm.go:
--------------------------------------------------------------------------------
1 | // +build amd64
2 | 
3 | package fpc
4 | 
5 | func clzBytes(val uint64) uint64
6 | 


--------------------------------------------------------------------------------
/clz_test.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestCountLeadingZeroes(t *testing.T) {
 6 | 	testcases := []struct {
 7 | 		in   string
 8 | 		want uint64
 9 | 	}{
10 | 		{
11 | 			in:   "11111111 00000000 00000000 00000000  00000000 00000000 00000000 00000000",
12 | 			want: 0,
13 | 		},
14 | 		{
15 | 			in:   "00000000 00000000 00000000 00000000  00000000 00000000 00000000 11111111",
16 | 			want: 7,
17 | 		},
18 | 		{
19 | 			in:   "00000000 00000000 00000000 00000000  00000000 00000000 00000000 10000000",
20 | 			want: 7,
21 | 		},
22 | 		{
23 | 			in:   "00000001 00000000 00000000 00000000  00000000 00000000 00000000 00000000",
24 | 			want: 0,
25 | 		},
26 | 		{
27 | 			in:   "00000000 00000000 00000000 11111111  00000000 00000000 00000000 00000000",
28 | 			want: 3,
29 | 		},
30 | 		{
31 | 			in:   "11111111 00000000 00000000 00000000  00000000 00000000 00000000 10000000",
32 | 			want: 0,
33 | 		},
34 | 		{
35 | 			in:   "00000000 00000000 00000000 00000000  00000000 00000000 00000000 00000000",
36 | 			want: 8,
37 | 		},
38 | 	}
39 | 	for i, tc := range testcases {
40 | 		have := clzBytes(binstr2u64(tc.in))
41 | 		if have != tc.want {
42 | 			t.Errorf("clzBytes test=%d  have=%d  want=%d", i, have, tc.want)
43 | 		}
44 | 	}
45 | }
46 | 


--------------------------------------------------------------------------------
/cmd/fpc/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"io"
 7 | 	"os"
 8 | 
 9 | 	"github.com/spenczar/fpc"
10 | )
11 | 
12 | const bufferSize = 1024
13 | 
14 | func main() {
15 | 	decompress := flag.Bool("d", false, "Decompress input data and write output to stdout.")
16 | 	level := flag.Int("l", fpc.DefaultCompression, "Compression level to use when compressing. Ignored when decompressing.")
17 | 	help := flag.Bool("h", false, "Print this help text")
18 | 	flag.Parse()
19 | 
20 | 	if *help {
21 | 		flag.Usage()
22 | 		os.Exit(0)
23 | 	}
24 | 
25 | 	if *decompress {
26 | 		decompressStream(os.Stdin, os.Stdout)
27 | 	} else {
28 | 		compressStream(os.Stdin, os.Stdout, *level)
29 | 	}
30 | }
31 | 
32 | func fatal(err error) {
33 | 	fmt.Fprintf(os.Stderr, "fatal: %s\n", err.Error())
34 | 	os.Exit(1)
35 | }
36 | 
37 | func compressStream(in io.Reader, out io.Writer, level int) {
38 | 	w, err := fpc.NewWriterLevel(out, level)
39 | 	if err != nil {
40 | 		fatal(err)
41 | 	}
42 | 
43 | 	buf := make([]byte, bufferSize)
44 | 	for {
45 | 		n, err := in.Read(buf)
46 | 		//log.Printf("bytes off wire: %#v", buf[:n])
47 | 		if err == io.EOF {
48 | 			w.Write(buf[:n])
49 | 			err = w.Close()
50 | 			if err != nil {
51 | 				fatal(err)
52 | 			}
53 | 			return
54 | 		} else if err != nil {
55 | 			fatal(err)
56 | 		}
57 | 		w.Write(buf[:n])
58 | 	}
59 | }
60 | 
61 | func decompressStream(in io.Reader, out io.Writer) {
62 | 	r := fpc.NewReader(os.Stdin)
63 | 	buf := make([]byte, bufferSize)
64 | 	for {
65 | 		n, err := r.Read(buf)
66 | 		if err == io.EOF {
67 | 			out.Write(buf[:n])
68 | 			return
69 | 		} else if err != nil {
70 | 			fatal(err)
71 | 		}
72 | 		out.Write(buf[:n])
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/decode.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | func decodeBlockHeader(b []byte) (nRecords, nBytes int) {
 4 | 	// First three bytes encode the number of records
 5 | 	nRecordsUint := uint32(b[2])
 6 | 	nRecordsUint = (nRecordsUint << 8) | uint32(b[1])
 7 | 	nRecordsUint = (nRecordsUint << 8) | uint32(b[0])
 8 | 
 9 | 	// remaining 3 encode the number of bytes in the block
10 | 	nBytesUint := uint32(b[5])
11 | 	nBytesUint = (nBytesUint << 8) | uint32(b[4])
12 | 	nBytesUint = (nBytesUint << 8) | uint32(b[3])
13 | 
14 | 	return int(nRecordsUint), int(nBytesUint)
15 | }
16 | 
17 | func decodeHeaders(b byte) (h1, h2 header) {
18 | 	h1 = header{
19 | 		len:   (b & 0x70) >> 4,
20 | 		pType: predictorClass((b & 0x80) >> 7),
21 | 	}
22 | 	h2 = header{
23 | 		len:   (b & 0x07),
24 | 		pType: predictorClass((b & 0x08) >> 3),
25 | 	}
26 | 	if h1.len >= 4 {
27 | 		h1.len += 1
28 | 	}
29 | 	if h2.len >= 4 {
30 | 		h2.len += 1
31 | 	}
32 | 	return h1, h2
33 | }
34 | 
35 | func decodeData(b []byte) (v uint64) {
36 | 	// Decode b as a partial little-endian uint64
37 | 	switch len(b) {
38 | 	case 8:
39 | 		v = (uint64(b[0]) |
40 | 			uint64(b[1])<<8 |
41 | 			uint64(b[2])<<16 |
42 | 			uint64(b[3])<<24 |
43 | 			uint64(b[4])<<32 |
44 | 			uint64(b[5])<<40 |
45 | 			uint64(b[6])<<48 |
46 | 			uint64(b[7])<<56)
47 | 	case 7:
48 | 		v = (uint64(b[0]) |
49 | 			uint64(b[1])<<8 |
50 | 			uint64(b[2])<<16 |
51 | 			uint64(b[3])<<24 |
52 | 			uint64(b[4])<<32 |
53 | 			uint64(b[5])<<40 |
54 | 			uint64(b[6])<<48)
55 | 	case 6:
56 | 		v = (uint64(b[0]) |
57 | 			uint64(b[1])<<8 |
58 | 			uint64(b[2])<<16 |
59 | 			uint64(b[3])<<24 |
60 | 			uint64(b[4])<<32 |
61 | 			uint64(b[5])<<40)
62 | 	case 5:
63 | 		v = (uint64(b[0]) |
64 | 			uint64(b[1])<<8 |
65 | 			uint64(b[2])<<16 |
66 | 			uint64(b[3])<<24 |
67 | 			uint64(b[4])<<32)
68 | 	case 4:
69 | 		v = (uint64(b[0]) |
70 | 			uint64(b[1])<<8 |
71 | 			uint64(b[2])<<16 |
72 | 			uint64(b[3])<<24)
73 | 	case 3:
74 | 		v = (uint64(b[0]) |
75 | 			uint64(b[1])<<8 |
76 | 			uint64(b[2])<<16)
77 | 	case 2:
78 | 		v = (uint64(b[0]) |
79 | 			uint64(b[1])<<8)
80 | 	case 1:
81 | 		v = uint64(b[0])
82 | 		// case 0: leave v as 0
83 | 	}
84 | 	return v
85 | }
86 | 


--------------------------------------------------------------------------------
/decode_test.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestDecodeHeader(t *testing.T) {
 9 | 	type output struct {
10 | 		n1, n2 uint8
11 | 		p1, p2 predictorClass
12 | 	}
13 | 	testcases := []struct {
14 | 		in   byte
15 | 		want pairHeader
16 | 	}{
17 | 		{
18 | 			in: binstr2byte("01110111"),
19 | 			want: pairHeader{
20 | 				h1: header{
21 | 					len:   8,
22 | 					pType: 0,
23 | 				},
24 | 				h2: header{
25 | 					len:   8,
26 | 					pType: 0,
27 | 				},
28 | 			},
29 | 		},
30 | 		{
31 | 			in: binstr2byte("11110111"),
32 | 			want: pairHeader{
33 | 				h1: header{
34 | 					len:   8,
35 | 					pType: 1,
36 | 				},
37 | 				h2: header{
38 | 					len:   8,
39 | 					pType: 0,
40 | 				},
41 | 			},
42 | 		},
43 | 		{
44 | 			in: binstr2byte("00101111"),
45 | 			want: pairHeader{
46 | 				h1: header{
47 | 					len:   2,
48 | 					pType: 0,
49 | 				},
50 | 				h2: header{
51 | 					len:   8,
52 | 					pType: 1,
53 | 				},
54 | 			},
55 | 		},
56 | 	}
57 | 	for i, tc := range testcases {
58 | 		var have pairHeader
59 | 		have.h1, have.h2 = decodeHeaders(tc.in)
60 | 		if !reflect.DeepEqual(have, tc.want) {
61 | 			t.Errorf("decodePrefix test=%d  have=%+v  want=%+v", i, have, tc.want)
62 | 		}
63 | 	}
64 | }
65 | 
66 | func TestDecodeBlockHeader(t *testing.T) {
67 | 
68 | 	type result struct {
69 | 		nRec   int
70 | 		nBytes int
71 | 	}
72 | 	testcases := []struct {
73 | 		in   []byte
74 | 		want result
75 | 	}{
76 | 		{
77 | 			in: []byte{0x00, 0x80, 0x00, 0xb6, 0x35, 0x02},
78 | 			want: result{
79 | 				nRec:   32768,
80 | 				nBytes: 144822,
81 | 			},
82 | 		},
83 | 		{
84 | 			in: []byte{0x00, 0x80, 0x00, 0xc2, 0x43, 0x00},
85 | 			want: result{
86 | 				nRec:   32768,
87 | 				nBytes: 17346,
88 | 			},
89 | 		},
90 | 	}
91 | 	for i, tc := range testcases {
92 | 		var have result
93 | 		have.nRec, have.nBytes = decodeBlockHeader(tc.in)
94 | 		if !reflect.DeepEqual(have, tc.want) {
95 | 			t.Errorf("decodeBlockHeader test=%d  have=%+v  want=%+v", i, have, tc.want)
96 | 		}
97 | 	}
98 | }
99 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
1 | // Package fpc implements reading and writing of FPC-format compressed
2 | // streams. FPC is a fast, lossless format for compressing IEEE 754
3 | // floating point values.
4 | 
5 | package fpc
6 | 


--------------------------------------------------------------------------------
/encode.go:
--------------------------------------------------------------------------------
  1 | package fpc
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"io"
  6 | 	"math"
  7 | )
  8 | 
  9 | const (
 10 | 	maxRecordsPerBlock = 32768
 11 | 	blockHeaderSize    = 6 // in bytes
 12 | )
 13 | 
 14 | var byteOrder = binary.LittleEndian
 15 | 
 16 | // pairHeader combines the headers for two values into a single byte
 17 | type pairHeader struct {
 18 | 	h1 header
 19 | 	h2 header
 20 | }
 21 | 
 22 | func (ph pairHeader) encode() byte {
 23 | 	return (ph.h1.encode()<<4 | ph.h2.encode())
 24 | }
 25 | 
 26 | // header is a cotainer for the count of the number of non-zero bytes in an
 27 | // encoded value, and the type of predictor used to generate the encoded value
 28 | type header struct {
 29 | 	len   uint8
 30 | 	pType predictorClass
 31 | }
 32 | 
 33 | // the top bit is the predictor type bit. Bottom 3 bits encode the number of
 34 | // leading zero bytes for the value.
 35 | func (h header) encode() byte {
 36 | 	if h.len > 4 {
 37 | 		return byte(h.pType)<<3 | byte(h.len-1)
 38 | 	} else {
 39 | 		return byte(h.pType)<<3 | byte(h.len)
 40 | 	}
 41 | }
 42 | 
 43 | type blockEncoder struct {
 44 | 	blockSize int // size of blocks in bytes
 45 | 
 46 | 	headers []byte
 47 | 	values  []byte
 48 | 
 49 | 	w   io.Writer // Destination for encoded bytes
 50 | 	enc *encoder  // Underlying machinery for encoding pairs of floats
 51 | 
 52 | 	// Mutable state below
 53 | 	last     uint64 // last value received to encode
 54 | 	nRecords int    // Count of float64s received in this block
 55 | 	nBytes   int    // Count of bytes in this block
 56 | }
 57 | 
 58 | // type block struct {
 59 | // 	header []byte
 60 | // }
 61 | 
 62 | func newBlockEncoder(w io.Writer, compression uint) *blockEncoder {
 63 | 	return &blockEncoder{
 64 | 		headers:  make([]byte, 0, maxRecordsPerBlock),
 65 | 		values:   make([]byte, 0, maxRecordsPerBlock*8),
 66 | 		w:        w,
 67 | 		enc:      newEncoder(compression),
 68 | 		last:     0,
 69 | 		nRecords: 0,
 70 | 	}
 71 | }
 72 | 
 73 | func (b *blockEncoder) encode(v uint64) error {
 74 | 	// Encode values in pairs
 75 | 	if b.nRecords%2 == 0 {
 76 | 		b.last = v
 77 | 		b.nRecords += 1
 78 | 		return nil
 79 | 	}
 80 | 	header, data := b.enc.encode(b.last, v)
 81 | 	nBytes := 1 + len(data) // 1 for header
 82 | 
 83 | 	// Append data to the block
 84 | 	b.headers = append(b.headers, header.encode())
 85 | 	b.values = append(b.values, data...)
 86 | 	b.nRecords += 1
 87 | 	b.nBytes += nBytes
 88 | 
 89 | 	// Flush if we need to
 90 | 	if b.nRecords == maxRecordsPerBlock {
 91 | 		if err := b.flush(); err != nil {
 92 | 			return err
 93 | 		}
 94 | 	}
 95 | 	return nil
 96 | 
 97 | }
 98 | 
 99 | func (b *blockEncoder) encodeFloat(f float64) error {
100 | 	return b.encode(math.Float64bits(f))
101 | }
102 | 
103 | func (b *blockEncoder) flush() error {
104 | 	if b.nRecords == 0 {
105 | 		return nil
106 | 	}
107 | 	if b.nRecords%2 == 1 {
108 | 		// There's an extra record waiting for a partner. Add a dummy value by
109 | 		// encoding a zero and adding it to data.
110 | 		h, data := b.enc.encode(b.last, 0)
111 | 		// Truncate out the dummy value's data. The header remains, but it
112 | 		// won't do any harm.
113 | 		data = data[:h.h1.len]
114 | 		b.headers = append(b.headers, h.encode())
115 | 		b.values = append(b.values, data...)
116 | 	}
117 | 
118 | 	block := b.encodeBlock()
119 | 	// Write data out
120 | 	n, err := b.w.Write(block)
121 | 	if err != nil {
122 | 		return err
123 | 	}
124 | 	if n < len(block) {
125 | 		return io.ErrShortWrite
126 | 	}
127 | 
128 | 	// Reset buffer and counters
129 | 	b.headers = make([]byte, 0, maxRecordsPerBlock)
130 | 	b.values = make([]byte, 0, maxRecordsPerBlock)
131 | 	b.nRecords = 0
132 | 	b.nBytes = 0
133 | 	return nil
134 | }
135 | 
136 | func (b *blockEncoder) encodeBlock() []byte {
137 | 	// The block header is layed out as two little-endian 24-bit unsigned
138 | 	// integers. The first integer is the number of records in the block, and
139 | 	// the second is the number of bytes.
140 | 	nByte := len(b.headers) + len(b.values) + blockHeaderSize
141 | 	block := make([]byte, 6, nByte)
142 | 
143 | 	//First three bytes are the number of records in the block.
144 | 	block[0] = byte(b.nRecords)
145 | 	block[1] = byte(b.nRecords >> 8)
146 | 	block[2] = byte(b.nRecords >> 16)
147 | 
148 | 	// Next three bytes are the number of bytes in the block.
149 | 	block[3] = byte(nByte)
150 | 	block[4] = byte(nByte >> 8)
151 | 	block[5] = byte(nByte >> 16)
152 | 
153 | 	// Record headers follow the block header
154 | 	block = append(block, b.headers...)
155 | 
156 | 	// After the header is all the rest of the data.
157 | 	block = append(block, b.values...)
158 | 	return block
159 | }
160 | 
161 | type encoder struct {
162 | 	buf []byte
163 | 
164 | 	// predictors
165 | 	fcm  predictor
166 | 	dfcm predictor
167 | }
168 | 
169 | func newEncoder(compression uint) *encoder {
170 | 	tableSize := uint(1 << compression)
171 | 	return &encoder{
172 | 		buf:  make([]byte, 17),
173 | 		fcm:  newFCM(tableSize),
174 | 		dfcm: newDFCM(tableSize),
175 | 	}
176 | }
177 | 
178 | // compute the difference between v and the best predicted value; return that
179 | // difference and which predictor was the most effective. Updates predictors as
180 | // a side effect.
181 | func (e *encoder) computeDiff(v uint64) (d uint64, h header) {
182 | 	fcmDelta := e.fcm.predict() ^ v
183 | 	e.fcm.update(v)
184 | 
185 | 	dfcmDelta := e.dfcm.predict() ^ v
186 | 	e.dfcm.update(v)
187 | 
188 | 	if fcmDelta <= dfcmDelta {
189 | 		d = fcmDelta
190 | 		h.pType = fcmPredictor
191 | 	} else {
192 | 		d = dfcmDelta
193 | 		h.pType = dfcmPredictor
194 | 	}
195 | 	h.len = uint8(8 - clzBytes(d))
196 | 
197 | 	//   "Since there can be between zero and eight leading zero bytes, i.e.,
198 | 	//   nine possibilities, not all of them can be encoded with a three-bit
199 | 	//   value. We decided not to support a leading zero count of four because
200 | 	//   it occurs only rarely (cf. Section 5.4). Consequently, all xor results
201 | 	//   with four leading zero bytes are treated like values with only three
202 | 	//   leading zero bytes and the fourth zero byte is emitted as part of the
203 | 	//   residual."
204 | 	//
205 | 	// Here we add 1, to include one of the leading 0s in the residual.
206 | 	if h.len == 4 {
207 | 		h.len += 1
208 | 	}
209 | 	return d, h
210 | }
211 | 
212 | // encode a pair of values
213 | func (e *encoder) encode(v1, v2 uint64) (h pairHeader, data []byte) {
214 | 	d1, h1 := e.computeDiff(v1)
215 | 	d2, h2 := e.computeDiff(v2)
216 | 
217 | 	h = pairHeader{h1, h2}
218 | 
219 | 	e.encodeNonzero(d1, h1.len, e.buf[:h1.len])
220 | 	e.encodeNonzero(d2, h2.len, e.buf[h1.len:h1.len+h2.len])
221 | 	return h, e.buf[:h1.len+h2.len]
222 | }
223 | 
224 | func (e *encoder) encodeNonzero(v uint64, n uint8, into []byte) {
225 | 	// Starting with the first nonzero byte, copy v's data into the byte slice.
226 | 	//
227 | 	// Unrolling this loop into a switch speeds up the computation dramatically.
228 | 	switch n {
229 | 	case 8:
230 | 		into[0] = byte(v & 0xFF)
231 | 		into[1] = byte((v >> 8) & 0xFF)
232 | 		into[2] = byte((v >> 16) & 0xFF)
233 | 		into[3] = byte((v >> 24) & 0xFF)
234 | 		into[4] = byte((v >> 32) & 0xFF)
235 | 		into[5] = byte((v >> 40) & 0xFF)
236 | 		into[6] = byte((v >> 48) & 0xFF)
237 | 		into[7] = byte((v >> 56) & 0xFF)
238 | 	case 7:
239 | 		into[0] = byte(v & 0xFF)
240 | 		into[1] = byte((v >> 8) & 0xFF)
241 | 		into[2] = byte((v >> 16) & 0xFF)
242 | 		into[3] = byte((v >> 24) & 0xFF)
243 | 		into[4] = byte((v >> 32) & 0xFF)
244 | 		into[5] = byte((v >> 40) & 0xFF)
245 | 		into[6] = byte((v >> 48) & 0xFF)
246 | 	case 6:
247 | 		into[0] = byte(v & 0xFF)
248 | 		into[1] = byte((v >> 8) & 0xFF)
249 | 		into[2] = byte((v >> 16) & 0xFF)
250 | 		into[3] = byte((v >> 24) & 0xFF)
251 | 		into[4] = byte((v >> 32) & 0xFF)
252 | 		into[5] = byte((v >> 40) & 0xFF)
253 | 	case 5:
254 | 		into[0] = byte(v & 0xFF)
255 | 		into[1] = byte((v >> 8) & 0xFF)
256 | 		into[2] = byte((v >> 16) & 0xFF)
257 | 		into[3] = byte((v >> 24) & 0xFF)
258 | 		into[4] = byte((v >> 32) & 0xFF)
259 | 	case 3:
260 | 		into[0] = byte(v & 0xFF)
261 | 		into[1] = byte((v >> 8) & 0xFF)
262 | 		into[2] = byte((v >> 16) & 0xFF)
263 | 	case 2:
264 | 		into[0] = byte(v & 0xFF)
265 | 		into[1] = byte((v >> 8) & 0xFF)
266 | 	case 1:
267 | 		into[0] = byte(v & 0xFF)
268 | 	}
269 | }
270 | 


--------------------------------------------------------------------------------
/encode_test.go:
--------------------------------------------------------------------------------
  1 | package fpc
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestBlockEncoder(t *testing.T) {
  9 | 	for i, tc := range refTests {
 10 | 		buf := new(bytes.Buffer)
 11 | 		e := newBlockEncoder(buf, tc.comp)
 12 | 		for _, v := range tc.uncompressed {
 13 | 			if err := e.encodeFloat(v); err != nil {
 14 | 				t.Fatalf("encode err=%q", err)
 15 | 			}
 16 | 		}
 17 | 		if err := e.flush(); err != nil {
 18 | 			t.Fatalf("flush err=%q", err)
 19 | 		}
 20 | 		want := tc.compressed[1:] // strip leading byte which describes compression to use
 21 | 		if have := buf.Bytes(); !bytes.Equal(have, want) {
 22 | 			t.Errorf("block encode  test=%d", i)
 23 | 			t.Logf("in.comp=%v", tc.comp)
 24 | 			t.Logf("in.values=%#v", tc.compressed)
 25 | 			t.Logf("have=%#v", bytes2binstr(have))
 26 | 			t.Logf("want=%#v", bytes2binstr(want))
 27 | 		}
 28 | 	}
 29 | }
 30 | 
 31 | func TestEncodeHeader(t *testing.T) {
 32 | 	testcases := []struct {
 33 | 		h    pairHeader
 34 | 		want byte
 35 | 	}{
 36 | 		{
 37 | 			h: pairHeader{
 38 | 				h1: header{
 39 | 					len:   0,
 40 | 					pType: 0,
 41 | 				},
 42 | 				h2: header{
 43 | 					len:   0,
 44 | 					pType: 0,
 45 | 				},
 46 | 			},
 47 | 			want: 0,
 48 | 		},
 49 | 		{
 50 | 			h: pairHeader{
 51 | 				h1: header{
 52 | 					len:   0,
 53 | 					pType: 0,
 54 | 				},
 55 | 				h2: header{
 56 | 					len:   1,
 57 | 					pType: 0,
 58 | 				},
 59 | 			},
 60 | 			want: 1,
 61 | 		},
 62 | 		{
 63 | 			h: pairHeader{
 64 | 				h1: header{
 65 | 					len:   1,
 66 | 					pType: 0,
 67 | 				},
 68 | 				h2: header{
 69 | 					len:   0,
 70 | 					pType: 0,
 71 | 				},
 72 | 			},
 73 | 			want: 0x10,
 74 | 		},
 75 | 		{
 76 | 			h: pairHeader{
 77 | 				h1: header{
 78 | 					len:   1,
 79 | 					pType: 1,
 80 | 				},
 81 | 				h2: header{
 82 | 					len:   1,
 83 | 					pType: 1,
 84 | 				},
 85 | 			},
 86 | 			want: 0x99,
 87 | 		},
 88 | 		{
 89 | 			h: pairHeader{
 90 | 				h1: header{
 91 | 					len:   3,
 92 | 					pType: 1,
 93 | 				},
 94 | 				h2: header{
 95 | 					len:   7,
 96 | 					pType: 1,
 97 | 				},
 98 | 			},
 99 | 			want: 0xBE,
100 | 		},
101 | 	}
102 | 	for i, tc := range testcases {
103 | 		have := tc.h.encode()
104 | 		if have != tc.want {
105 | 			t.Errorf("header encoding err  test=%d", i)
106 | 			t.Logf("have=%#v", have)
107 | 			t.Logf("want=%#v", tc.want)
108 | 		}
109 | 	}
110 | }
111 | 
112 | func TestEncodeNonzero(t *testing.T) {
113 | 	type input struct {
114 | 		val []byte
115 | 		len uint8
116 | 	}
117 | 	testcases := []struct {
118 | 		in   input
119 | 		want []byte
120 | 	}{
121 | 		{
122 | 			in: input{
123 | 				val: []byte{0xFF, 0, 0, 0, 0, 0, 0, 0XFF},
124 | 				len: 8,
125 | 			},
126 | 			want: []byte{0xFF, 0, 0, 0, 0, 0, 0, 0XFF},
127 | 		},
128 | 		{
129 | 			in: input{
130 | 				val: []byte{0xFF, 0, 0, 0, 0, 0, 0xFF, 0},
131 | 				len: 7,
132 | 			},
133 | 			want: []byte{0xFF, 0, 0, 0, 0, 0, 0xFF},
134 | 		},
135 | 		{
136 | 			in: input{
137 | 				val: []byte{0xAA, 0, 0, 0, 0, 0, 0, 0},
138 | 				len: 1,
139 | 			},
140 | 			want: []byte{0xAA},
141 | 		},
142 | 		{
143 | 			in: input{
144 | 				val: []byte{0xAA, 0, 0, 0, 0xAA, 0, 0, 0},
145 | 				len: 5,
146 | 			},
147 | 			want: []byte{0xAA, 0, 0, 0, 0xAA},
148 | 		},
149 | 	}
150 | 
151 | 	for i, tc := range testcases {
152 | 		e := newEncoder(DefaultCompression)
153 | 		have := make([]byte, tc.in.len)
154 | 		e.encodeNonzero(byteOrder.Uint64(tc.in.val), tc.in.len, have)
155 | 		if !bytes.Equal(have, tc.want) {
156 | 			t.Errorf("encodeNonzero test=%d", i)
157 | 			t.Logf("have=%s", bytes2binstr(have))
158 | 			t.Logf("want=%s", tc.want)
159 | 		}
160 | 	}
161 | }
162 | 
163 | func TestPairEncode(t *testing.T) {
164 | 	testcases := []struct {
165 | 		v1, v2 []byte
166 | 		want   string
167 | 	}{
168 | 		// {
169 | 		// 	v1:   []byte{0, 0, 0, 0, 0, 0, 0, 0},
170 | 		// 	v2:   []byte{0, 0, 0, 0, 0, 0, 0, 0},
171 | 		// 	want: "01110111",
172 | 		// },
173 | 		// {
174 | 		// 	v1:   []byte{1, 0, 0, 0, 0, 0, 0, 0},
175 | 		// 	v2:   []byte{1, 0, 0, 0, 0, 0, 0, 0},
176 | 		// 	want: "01100110 00000001 00000001",
177 | 		// },
178 | 		{
179 | 			v1:   []byte{0, 0, 0, 0, 0, 0, 0, 0},
180 | 			v2:   []byte{1, 0, 0, 0, 0, 0, 0, 0},
181 | 			want: "00000001 00000001",
182 | 		},
183 | 		{
184 | 			v1:   []byte{0, 0, 0, 0xFF, 0xFF, 0, 0, 0},
185 | 			v2:   []byte{1, 0, 0, 0, 0, 0, 0, 0},
186 | 			want: "01000001 00000000 00000000 00000000  11111111 11111111 00000001",
187 | 		},
188 | 		{
189 | 			v1: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
190 | 			v2: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
191 | 			want: "01110111 11111111 11111111 11111111  11111111 11111111 11111111 11111111 " +
192 | 				"11111111 11111111 11111111 11111111  11111111 11111111 11111111 11111111 " +
193 | 				"11111111",
194 | 		},
195 | 	}
196 | 	for i, tc := range testcases {
197 | 		e := newEncoder(DefaultCompression)
198 | 		e.fcm = &mockPredictor{0}
199 | 		e.dfcm = &mockPredictor{0}
200 | 		haveHeader, haveData := e.encode(byteOrder.Uint64(tc.v1), byteOrder.Uint64(tc.v2))
201 | 		have := append([]byte{haveHeader.encode()}, haveData...)
202 | 		if !bytes.Equal(have, binstr2bytes(tc.want)) {
203 | 			t.Errorf("encode test=%d", i)
204 | 			t.Logf("have=%s", bytes2binstr(have))
205 | 			t.Logf("want=%s", tc.want)
206 | 		}
207 | 	}
208 | }
209 | 
210 | // mockPredictor always predicts the same value
211 | type mockPredictor struct {
212 | 	val uint64
213 | }
214 | 
215 | func (p *mockPredictor) predict() uint64 { return p.val }
216 | func (p *mockPredictor) update(uint64)   {}
217 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/spenczar/fpc
2 | 
3 | go 1.14
4 | 


--------------------------------------------------------------------------------
/golden/README:
--------------------------------------------------------------------------------
 1 | This file contains a golden test set of data downloaded from
 2 | https://userweb.cs.txstate.edu/~burtscher/research/datasets/FPdouble/,
 3 | specifically the test dataset at
 4 | http://www.cs.txstate.edu/~burtscher/research/datasets/FPdouble/test.trace.fpc.
 5 | This file contains floating point data which has been FPC-compressed at level
 6 | 20.
 7 | 
 8 | 
 9 | The file has been decompressed into test_decompressed.data too.
10 | 


--------------------------------------------------------------------------------
/golden/test.trace.fpc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spenczar/fpc/3ae59fb43b23edd54e06637e2903076e9e6ff6e3/golden/test.trace.fpc


--------------------------------------------------------------------------------
/golden/test_decompressed.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spenczar/fpc/3ae59fb43b23edd54e06637e2903076e9e6ff6e3/golden/test_decompressed.data


--------------------------------------------------------------------------------
/golden_test.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"io"
 6 | 	"io/ioutil"
 7 | 	"os"
 8 | 	"testing"
 9 | )
10 | 
11 | const (
12 | 	goldenCompressedFilepath   = "golden/test.trace.fpc"
13 | 	goldenDecompressedFilepath = "golden/test_decompressed.data"
14 | )
15 | 
16 | func TestGoldenCompress(t *testing.T) {
17 | 	// Test that compressing data reproduces the reference implementation's
18 | 	// compression.
19 | 	input, err := os.Open(goldenDecompressedFilepath)
20 | 	if err != nil {
21 | 		t.Fatalf("unable to load decompressed bytes: %v", err)
22 | 	}
23 | 	defer input.Close()
24 | 
25 | 	buf := bytes.NewBuffer(nil)
26 | 	w, err := NewWriterLevel(buf, 20)
27 | 	if err != nil {
28 | 		t.Fatalf("unable to create writer: %v", err)
29 | 	}
30 | 
31 | 	_, err = io.Copy(w, input)
32 | 	if err != nil {
33 | 		t.Fatalf("unable to write: %v", err)
34 | 	}
35 | 	err = w.Close()
36 | 	if err != nil {
37 | 		t.Fatalf("unable to close: %v", err)
38 | 	}
39 | 
40 | 	want, err := ioutil.ReadFile(goldenCompressedFilepath)
41 | 	if err != nil {
42 | 		t.Fatalf("unable to load golden compressed bytes: %v", err)
43 | 	}
44 | 
45 | 	have := buf.Bytes()
46 | 	if !bytes.Equal(have, want) {
47 | 		t.Error("compressed data golden mismatch")
48 | 		t.Logf("len(have) = %d", len(have))
49 | 		t.Logf("len(want) = %d", len(want))
50 | 	}
51 | }
52 | 
53 | func TestGoldenDecompress(t *testing.T) {
54 | 	// Test that decompressing data reproduces the reference implementation's
55 | 	// decompression.
56 | 	input, err := os.Open(goldenCompressedFilepath)
57 | 	if err != nil {
58 | 		t.Fatalf("unable to load compressed bytes: %v", err)
59 | 	}
60 | 	defer input.Close()
61 | 
62 | 	r := NewReader(input)
63 | 	readBuf := make([]byte, 1024)
64 | 	haveBuf := bytes.NewBuffer(nil)
65 | 	for {
66 | 		n, err := r.Read(readBuf)
67 | 		if err == io.EOF {
68 | 			haveBuf.Write(readBuf[:n])
69 | 			break
70 | 		} else if err != nil {
71 | 			t.Fatalf("read error: %v", err)
72 | 		}
73 | 		haveBuf.Write(readBuf[:n])
74 | 	}
75 | 
76 | 	want, err := ioutil.ReadFile(goldenDecompressedFilepath)
77 | 	if err != nil {
78 | 		t.Fatalf("unable to load golden decompressed bytes: %v", err)
79 | 	}
80 | 
81 | 	have := haveBuf.Bytes()
82 | 	if !bytes.Equal(have, want) {
83 | 		t.Error("decompressed data golden mismatch")
84 | 		t.Logf("len(have) = %d", len(have))
85 | 		t.Logf("len(want) = %d", len(want))
86 | 	}
87 | }
88 | 


--------------------------------------------------------------------------------
/predictor.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | type predictorClass uint8
 4 | 
 5 | const (
 6 | 	fcmPredictor predictorClass = iota
 7 | 	dfcmPredictor
 8 | )
 9 | 
10 | type predictor interface {
11 | 	predict() (predicted uint64)
12 | 	update(actual uint64)
13 | }
14 | 
15 | type fcm struct {
16 | 	table    []uint64
17 | 	size     uint64
18 | 	lastHash uint64
19 | }
20 | 
21 | func newFCM(size uint) *fcm {
22 | 	// size must be a power of two
23 | 	return &fcm{
24 | 		table: make([]uint64, size, size),
25 | 		size:  uint64(size),
26 | 	}
27 | }
28 | 
29 | func (f *fcm) hash(actual uint64) uint64 {
30 | 	return ((f.lastHash << 6) ^ (actual >> 48)) & (f.size - 1)
31 | }
32 | 
33 | func (f *fcm) predict() uint64 {
34 | 	return f.table[f.lastHash]
35 | }
36 | 
37 | func (f *fcm) update(actual uint64) {
38 | 	f.table[f.lastHash] = actual
39 | 	f.lastHash = f.hash(actual)
40 | }
41 | 
42 | type dfcm struct {
43 | 	table     []uint64
44 | 	size      uint64
45 | 	lastHash  uint64
46 | 	lastValue uint64
47 | }
48 | 
49 | func newDFCM(size uint) *dfcm {
50 | 	// size must be a power of two
51 | 	return &dfcm{
52 | 		table: make([]uint64, size, size),
53 | 		size:  uint64(size),
54 | 	}
55 | }
56 | 
57 | func (d *dfcm) hash(actual uint64) uint64 {
58 | 	return ((d.lastHash << 2) ^ ((actual - d.lastValue) >> 40)) & (d.size - 1)
59 | }
60 | 
61 | func (d *dfcm) predict() uint64 {
62 | 	return d.table[d.lastHash] + d.lastValue
63 | }
64 | 
65 | func (d *dfcm) update(actual uint64) {
66 | 	d.table[d.lastHash] = actual - d.lastValue
67 | 	d.lastHash = d.hash(actual)
68 | 	d.lastValue = actual
69 | }
70 | 


--------------------------------------------------------------------------------
/reader.go:
--------------------------------------------------------------------------------
  1 | package fpc
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"math"
  9 | )
 10 | 
 11 | // A DataError is returned when the FPC data is found to be syntactically
 12 | // invalid.
 13 | type DataError string
 14 | 
 15 | func (e DataError) Error() string {
 16 | 	return "fpc data invalid: " + string(e)
 17 | }
 18 | 
 19 | // A Reader provides io.Reader-style access to a stream of FPC
 20 | // compressed data.
 21 | type Reader struct {
 22 | 	r io.Reader
 23 | 
 24 | 	fcm  predictor
 25 | 	dfcm predictor
 26 | 
 27 | 	initialized bool
 28 | 	eof         bool
 29 | 
 30 | 	block block // Current block being read
 31 | }
 32 | 
 33 | // NewReader creates a new Reader which reads and decompresses FPC data from
 34 | // the given io.Reader.
 35 | func NewReader(r io.Reader) *Reader {
 36 | 	return &Reader{
 37 | 		r: r,
 38 | 	}
 39 | }
 40 | 
 41 | func (r *Reader) initialize() (err error) {
 42 | 	comp, err := r.readGlobalHeader()
 43 | 	if err != nil {
 44 | 		return err
 45 | 	}
 46 | 	tableSize := uint(1 << comp)
 47 | 	r.fcm = newFCM(tableSize)
 48 | 	r.dfcm = newDFCM(tableSize)
 49 | 	r.initialized = true
 50 | 	return nil
 51 | }
 52 | 
 53 | // readGlobalHeader reads one byte and parses it as the compression level.
 54 | func (r *Reader) readGlobalHeader() (comp uint, err error) {
 55 | 	var b []byte = make([]byte, 1)
 56 | 	n, err := r.r.Read(b)
 57 | 	if err != nil {
 58 | 		return 0, err
 59 | 	}
 60 | 	if n != 1 {
 61 | 		return 0, DataError("missing first byte compression header")
 62 | 	}
 63 | 	return uint(b[0]), nil
 64 | }
 65 | 
 66 | // Read reads from up to (len(buf) / 8) IEEE 754 64-bit floating point
 67 | // values into buf. It is an error to provide a buf whose length is
 68 | // not a multiple of 8, because that would prevent encoding of the
 69 | // read float64s.
 70 | //
 71 | // If more values might be available, Read will return len(buf),
 72 | // nil. If no more values are available, Read will return with
 73 | // err==io.EOF
 74 | func (r *Reader) Read(buf []byte) (int, error) {
 75 | 	if len(buf)%8 != 0 {
 76 | 		return 0, errors.New("fpc: []byte passed to Reader.Read must have length which is a multiple of 8")
 77 | 	}
 78 | 
 79 | 	if !r.initialized {
 80 | 		err := r.initialize()
 81 | 		if err != nil {
 82 | 			return 0, err
 83 | 		}
 84 | 	}
 85 | 
 86 | 	nRead := 0
 87 | 	for {
 88 | 		// If available, read data from the block.
 89 | 		n, err := r.readFromBlock(buf)
 90 | 		if err != nil {
 91 | 			return n, err
 92 | 		}
 93 | 		nRead += n
 94 | 		// We've read everything we need to.
 95 | 		if nRead == len(buf) {
 96 | 			return nRead, nil
 97 | 		}
 98 | 
 99 | 		// End of block.
100 | 		if n == 0 {
101 | 			// Check whether counts match up.
102 | 			if r.block.nRecRead != r.block.nRec {
103 | 				return nRead, DataError("block record length too short")
104 | 			}
105 | 			if r.block.nByteRead != r.block.nByte {
106 | 				return nRead, DataError(fmt.Sprintf("block byte length too short, have=%d  want=%d", r.block.nByteRead, r.block.nByte))
107 | 			}
108 | 
109 | 			// Find a new block
110 | 			r.block, err = r.readBlockHeader()
111 | 			if err != nil {
112 | 				return nRead, err
113 | 			}
114 | 		}
115 | 	}
116 | }
117 | 
118 | // ReadFloats will read data from the underlying io.Reader, parsing
119 | // the data it gets back as float64s and putting them into fs. If no
120 | // more values are available, ReadFloats will returns with an
121 | // err==io.EOF.
122 | func (r *Reader) ReadFloats(fs []float64) (int, error) {
123 | 	buf := make([]byte, 8)
124 | 	var val uint64
125 | 	for i := range fs {
126 | 		_, err := r.Read(buf)
127 | 		if err != nil {
128 | 			return i, err
129 | 		}
130 | 		val = binary.LittleEndian.Uint64(buf)
131 | 		fs[i] = math.Float64frombits(val)
132 | 	}
133 | 	return len(fs), nil
134 | }
135 | 
136 | // ReadFloat will read data from the underlying io.Reader until it has
137 | // read enough data to provide a float64, decodes that data, and
138 | // returns the decoded float64. If an error is encountered while
139 | // reading, it returns 0 and that error. If no more values are
140 | // available, ReadFloat will return with err==io.EOF.
141 | func (r *Reader) ReadFloat() (float64, error) {
142 | 	buf := make([]byte, 8)
143 | 	_, err := r.Read(buf)
144 | 	if err != nil {
145 | 		return 0, err
146 | 	}
147 | 	val := binary.LittleEndian.Uint64(buf)
148 | 	return math.Float64frombits(val), nil
149 | }
150 | 
151 | // readBlockHeader reads the block header and record headers that start a data
152 | // block. It returns the slice of record headers, the number of bytes remaining
153 | // in the block, and any errors encountered while reading.
154 | func (r *Reader) readBlockHeader() (b block, err error) {
155 | 	// The first 6 bytes of the block describe the number of records and bytes
156 | 	// in the block.
157 | 	buf := make([]byte, 6)
158 | 	n, err := r.r.Read(buf)
159 | 	if n == 0 && err == io.EOF {
160 | 		// No data available: This is a genuine EOF. We have no blocks left.
161 | 		return b, io.EOF
162 | 	} else if n < len(buf) || err == io.EOF {
163 | 		// Partial data available: This is a corrupted header, we expected 6 bytes.
164 | 		return b, DataError("block header too short")
165 | 	} else if err != nil {
166 | 		// Some other unexpected error
167 | 		return b, err
168 | 	}
169 | 	b.nRec, b.nByte = decodeBlockHeader(buf)
170 | 	b.nByteRead += 6 // the first 6 bytes are included in the header's count
171 | 
172 | 	// Each record has a 4-bit header value. These headers have 1 bit to
173 | 	// describe which predictor hash table to use, and 3 bits to describe how
174 | 	// many zero bits prefix their associated value.
175 | 	//
176 | 	// The 4-bit records are packed as pairs into bytes. If there are an odd
177 | 	// number of records in the block, then the last 4-bit header is
178 | 	// meaningless and can be discarded.
179 | 	b.headers = make([]header, b.nRec)
180 | 
181 | 	// Read out the appropriate number of bytes.
182 | 	buf = make([]byte, b.nRec/2)
183 | 	n, err = io.ReadFull(r.r, buf)
184 | 	if err != nil {
185 | 		return b, err
186 | 	}
187 | 	for i, byte := range buf {
188 | 		b.headers[2*i], b.headers[2*i+1] = decodeHeaders(byte)
189 | 	}
190 | 	b.nByteRead += b.nRec / 2
191 | 
192 | 	// If there are an odd number of records, then read just the first 4 bits
193 | 	// of the next byte.
194 | 	if b.nRec%2 == 1 {
195 | 		// Read one byte.
196 | 		buf = buf[:1]
197 | 		_, err = io.ReadFull(r.r, buf)
198 | 		if err != nil {
199 | 			return b, err
200 | 		}
201 | 		b.headers[b.nRec-1], _ = decodeHeaders(buf[0])
202 | 		b.nByteRead += 1
203 | 	}
204 | 
205 | 	return b, nil
206 | }
207 | 
208 | func (r *Reader) readFromBlock(p []byte) (int, error) {
209 | 	var (
210 | 		b    []byte // workspace for decoding
211 | 		val  uint64
212 | 		pred uint64
213 | 		h    header
214 | 
215 | 		bytesDecoded int
216 | 	)
217 | 
218 | 	b = make([]byte, 8) // records can be at most 8 bytes
219 | 	for r.block.nRecRead < r.block.nRec && len(p) > 0 {
220 | 		// Get as many bytes off the reader as the header says we should take.
221 | 		h = r.block.headers[r.block.nRecRead]
222 | 		n, err := r.r.Read(b[:h.len])
223 | 		if n < int(h.len) || err == io.EOF {
224 | 			return bytesDecoded, DataError("missing records")
225 | 		}
226 | 		if err != nil {
227 | 			return bytesDecoded, err
228 | 		}
229 | 
230 | 		// Parse the bytes.
231 | 		val = decodeData(b[:h.len])
232 | 
233 | 		// XOR with the predictions to get back the true values.
234 | 		if h.pType == fcmPredictor {
235 | 			pred = r.fcm.predict()
236 | 		} else {
237 | 			pred = r.dfcm.predict()
238 | 		}
239 | 		val = pred ^ val
240 | 		r.fcm.update(val)
241 | 		r.dfcm.update(val)
242 | 
243 | 		// Write the value to p.
244 | 		binary.LittleEndian.PutUint64(p[:8], val)
245 | 		p = p[8:]
246 | 
247 | 		// increment counters
248 | 		bytesDecoded += 8
249 | 		r.block.nByteRead += int(h.len)
250 | 		r.block.nRecRead += 1
251 | 	}
252 | 	return bytesDecoded, nil
253 | }
254 | 
255 | type block struct {
256 | 	headers []header
257 | 
258 | 	// Counters for current position within the block
259 | 	nRecRead  int
260 | 	nByteRead int
261 | 
262 | 	// Total counts for the block
263 | 	nRec  int
264 | 	nByte int
265 | }
266 | 


--------------------------------------------------------------------------------
/reader_test.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestReader(t *testing.T) {
 9 | 	for _, tc := range refTests {
10 | 		comp := bytes.NewBuffer(tc.compressed)
11 | 
12 | 		r := NewReader(comp)
13 | 
14 | 		var err error
15 | 		have := make([]float64, len(tc.uncompressed))
16 | 		for i := range have {
17 | 			have[i], err = r.ReadFloat()
18 | 			tc.AssertNoError(t, err, "ReadFloats")
19 | 		}
20 | 		tc.AssertEqual(t, have, tc.uncompressed, "Reader")
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/reference_test.go:
--------------------------------------------------------------------------------
  1 | package fpc
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | )
  7 | 
  8 | type refinput struct {
  9 | 	comp uint
 10 | 	in   []float64
 11 | }
 12 | 
 13 | type reftestcase struct {
 14 | 	comp         uint
 15 | 	uncompressed []float64
 16 | 	compressed   []byte
 17 | }
 18 | 
 19 | func (tc reftestcase) idx() int {
 20 | 	for i, rtc := range refTests {
 21 | 		if reflect.DeepEqual(rtc, tc) {
 22 | 			return i
 23 | 		}
 24 | 	}
 25 | 	return -1
 26 | }
 27 | 
 28 | func (tc reftestcase) AssertNoError(t *testing.T, err error, msg string) {
 29 | 	if err != nil {
 30 | 		tc.Error(t, msg, err)
 31 | 	}
 32 | }
 33 | 
 34 | func (tc reftestcase) AssertEqual(t *testing.T, have, want interface{}, msg string) {
 35 | 	if !reflect.DeepEqual(have, want) {
 36 | 		t.Errorf("%s  test=%d", msg, tc.idx())
 37 | 		t.Logf("  comp=%#v", tc.compressed)
 38 | 		t.Logf("uncomp=%#v", tc.uncompressed)
 39 | 		t.Logf("  have=%#v", have)
 40 | 		t.Logf("  want=%#v", want)
 41 | 	}
 42 | }
 43 | 
 44 | func (tc reftestcase) Fail(t *testing.T, msg string, have interface{}) {
 45 | }
 46 | 
 47 | func (tc reftestcase) Error(t *testing.T, msg string, err error) {
 48 | 	t.Errorf("%s  test=%d  err=%q", msg, tc.idx(), err)
 49 | 	t.Logf("  comp=%#v", tc.compressed)
 50 | 	t.Logf("uncomp=%#v", tc.uncompressed)
 51 | }
 52 | 
 53 | // Reference tests which have been generated by running fpc.c
 54 | var refTests = []reftestcase{
 55 | 	{
 56 | 		comp:         1,
 57 | 		uncompressed: []float64{},
 58 | 		compressed: []byte{
 59 | 			0x01},
 60 | 	},
 61 | 	{
 62 | 		comp:         3,
 63 | 		uncompressed: []float64{},
 64 | 		compressed: []byte{
 65 | 			0x03},
 66 | 	},
 67 | 	{
 68 | 		comp:         10,
 69 | 		uncompressed: []float64{},
 70 | 		compressed: []byte{
 71 | 			0x0a},
 72 | 	},
 73 | 	{
 74 | 		comp:         1,
 75 | 		uncompressed: []float64{1, 1, 0.9, 0.9},
 76 | 		compressed: []byte{
 77 | 			0x01, 0x04, 0x00, 0x00, 0x17, 0x00, 0x00, 0x70,
 78 | 			0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
 79 | 			0x3f, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x1c,
 80 | 		},
 81 | 	},
 82 | 	{
 83 | 		comp:         3,
 84 | 		uncompressed: []float64{1, 1, 0.9, 0.9},
 85 | 		compressed: []byte{
 86 | 			0x03, 0x04, 0x00, 0x00, 0x17, 0x00, 0x00, 0x70,
 87 | 			0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
 88 | 			0x3f, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x1c,
 89 | 		},
 90 | 	},
 91 | 	{
 92 | 		comp:         10,
 93 | 		uncompressed: []float64{1, 1, 0.9, 0.9},
 94 | 		compressed: []byte{
 95 | 			0x0a, 0x04, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x77,
 96 | 			0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
 97 | 			0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
 98 | 			0x3f, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x1c,
 99 | 		},
100 | 	},
101 | 	{
102 | 		comp:         1,
103 | 		uncompressed: []float64{0, 0, 0, 0, 0},
104 | 		compressed: []byte{
105 | 			0x01, 0x05, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
106 | 			0x00, 0x00},
107 | 	},
108 | 	{
109 | 		comp:         3,
110 | 		uncompressed: []float64{0, 0, 0, 0, 0},
111 | 		compressed: []byte{
112 | 			0x03, 0x05, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
113 | 			0x00, 0x00},
114 | 	},
115 | 	{
116 | 		comp:         10,
117 | 		uncompressed: []float64{0, 0, 0, 0, 0},
118 | 		compressed: []byte{
119 | 			0x0a, 0x05, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
120 | 			0x00, 0x00},
121 | 	},
122 | 	{
123 | 		comp:         1,
124 | 		uncompressed: []float64{-1, 1, -2, 2, -3, 3, -4},
125 | 		compressed: []byte{
126 | 			0x01, 0x07, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x7f,
127 | 			0xfe, 0xee, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00,
128 | 			0x00, 0xf0, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00,
129 | 			0x00, 0x10, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
130 | 			0x00, 0xf0, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00,
131 | 			0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
132 | 			0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
133 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18},
134 | 	},
135 | 	{
136 | 		comp:         3,
137 | 		uncompressed: []float64{-1, 1, -2, 2, -3, 3, -4},
138 | 		compressed: []byte{
139 | 			0x03, 0x07, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x7f,
140 | 			0xfe, 0xee, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00,
141 | 			0x00, 0xf0, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00,
142 | 			0x00, 0x10, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
143 | 			0x00, 0xf0, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00,
144 | 			0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
145 | 			0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
146 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18},
147 | 	},
148 | 	{
149 | 		comp:         10,
150 | 		uncompressed: []float64{-1, 1, -2, 2, -3, 3, -4},
151 | 		compressed: []byte{
152 | 			0x0a,
153 | 			0x07, 0x00, 0x00,
154 | 			0x3e, 0x00, 0x00,
155 | 			0x77, 0xfe, 0xee, 0xe0,
156 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0xbf,
157 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
158 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f,
159 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
160 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
161 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
162 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18},
163 | 	},
164 | 	{
165 | 		comp:         1,
166 | 		uncompressed: []float64{1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 100, 1000, 10000, 100000},
167 | 		compressed: []byte{
168 | 			0x01, 0x0a, 0x00, 0x00, 0x53, 0x00, 0x00, 0x77,
169 | 			0xe6, 0xee, 0xee, 0xee, 0xf1, 0x68, 0xe3, 0x88,
170 | 			0xb5, 0xf8, 0xe4, 0x3e, 0xdc, 0x2b, 0xff, 0x63,
171 | 			0x57, 0xce, 0xfe, 0x01, 0x95, 0xb4, 0xa4, 0x9f,
172 | 			0x5d, 0x17, 0x1f, 0x87, 0xbd, 0x5f, 0x95, 0xac,
173 | 			0x18, 0xd4, 0xd0, 0xe2, 0x1a, 0xb6, 0xd5, 0x3f,
174 | 			0x03, 0xb9, 0x1e, 0x85, 0xeb, 0x51, 0xb8, 0x1e,
175 | 			0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7f, 0x00,
176 | 			0x00, 0x00, 0x00, 0x00, 0x40, 0x4d, 0x00, 0x00,
177 | 			0x00, 0x00, 0x00, 0x08, 0x06, 0x00, 0x00, 0x00,
178 | 			0x00, 0x00, 0xba, 0x0f},
179 | 	},
180 | 	{
181 | 		comp:         3,
182 | 		uncompressed: []float64{1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 100, 1000, 10000, 100000},
183 | 		compressed: []byte{
184 | 			0x03, 0x0a, 0x00, 0x00, 0x53, 0x00, 0x00, 0x77,
185 | 			0xee, 0xee, 0xee, 0xee, 0xf1, 0x68, 0xe3, 0x88,
186 | 			0xb5, 0xf8, 0xe4, 0x3e, 0x2d, 0x43, 0x1c, 0xeb,
187 | 			0xe2, 0x36, 0x1a, 0x3f, 0xd1, 0xea, 0xed, 0x39,
188 | 			0xaf, 0x54, 0x4a, 0x87, 0xbd, 0x5f, 0x95, 0xac,
189 | 			0x18, 0xd4, 0xe1, 0x8d, 0x37, 0xde, 0x78, 0xe3,
190 | 			0x3d, 0x69, 0x00, 0x6f, 0x81, 0x04, 0xc5, 0x1f,
191 | 			0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7f, 0x3c,
192 | 			0xda, 0x38, 0x62, 0x2d, 0x7e, 0x01, 0x00, 0x00,
193 | 			0x00, 0x00, 0x00, 0x08, 0x06, 0x00, 0x00, 0x00,
194 | 			0x00, 0x00, 0xba, 0x0f},
195 | 	},
196 | 	{
197 | 		comp:         10,
198 | 		uncompressed: []float64{1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 100, 1000, 10000, 100000},
199 | 		compressed: []byte{
200 | 			0x0a, 0x0a, 0x00, 0x00, 0x54, 0x00, 0x00, 0x7f,
201 | 			0xee, 0xee, 0x7e, 0xee, 0xf1, 0x68, 0xe3, 0x88,
202 | 			0xb5, 0xf8, 0xe4, 0x3e, 0xdc, 0x2b, 0xff, 0x63,
203 | 			0x57, 0xce, 0xfe, 0x01, 0xd1, 0xea, 0xed, 0x39,
204 | 			0xaf, 0x54, 0x4a, 0x87, 0xbd, 0x5f, 0x95, 0xac,
205 | 			0x18, 0xd4, 0xe1, 0x8d, 0x37, 0xde, 0x78, 0xe3,
206 | 			0x3d, 0x9a, 0x99, 0x99, 0x99, 0x99, 0x99, 0x49,
207 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x40,
208 | 			0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xd6, 0x00,
209 | 			0x00, 0x00, 0x00, 0x00, 0xc8, 0x4c, 0x00, 0x00,
210 | 			0x00, 0x00, 0x00, 0xe2, 0x3b},
211 | 	},
212 | 	{
213 | 		comp:         10,
214 | 		uncompressed: []float64{1.627073029e-314, 1.212760445e-314},
215 | 		compressed: []byte{
216 | 			0xa, 0x2, 0x0, 0x0, 0x11, 0x0, 0x0, 0x44, 0x5a,
217 | 			0xbd, 0x4a, 0xc4, 0x0, 0x5e, 0xb3, 0x5, 0x56, 0x0},
218 | 	},
219 | }
220 | 


--------------------------------------------------------------------------------
/utils.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/binary"
 6 | 	"strconv"
 7 | 	"strings"
 8 | )
 9 | 
10 | // Utilities for using binary string representations of values
11 | 
12 | func bytes2binstr(bs []byte) string {
13 | 	var ss []string
14 | 	for _, b := range bs {
15 | 		s := strconv.FormatUint(uint64(b), 2)
16 | 		for len(s) < 8 {
17 | 			s = "0" + s
18 | 		}
19 | 		ss = append(ss, s)
20 | 	}
21 | 	return strings.Join(ss, " ")
22 | }
23 | 
24 | func binstr2bytes(s string) []byte {
25 | 	s = strings.Replace(s, " ", "", -1)
26 | 
27 | 	var bs []byte
28 | 	for len(s) > 0 {
29 | 		end := 8
30 | 		if 8 > len(s) {
31 | 			end = len(s)
32 | 		}
33 | 		val, err := strconv.ParseUint(s[:end], 2, 8)
34 | 		if err != nil {
35 | 			panic(err)
36 | 		}
37 | 		bs = append(bs, byte(val))
38 | 		s = s[end:]
39 | 	}
40 | 	return bs
41 | }
42 | 
43 | func bytes2u64(b []byte) uint64 {
44 | 	return binary.LittleEndian.Uint64(b)
45 | }
46 | 
47 | func u642binstr(x uint64) string {
48 | 	s := strconv.FormatUint(x, 2)
49 | 	for len(s) < 64 {
50 | 		s = "0" + s
51 | 	}
52 | 	return insertNth(s, 8)
53 | }
54 | func u82binstr(x uint8) string {
55 | 	s := strconv.FormatUint(uint64(x), 2)
56 | 	for len(s) < 8 {
57 | 		s = "0" + s
58 | 	}
59 | 	return s
60 | }
61 | 
62 | func insertNth(s string, n int) string {
63 | 	var buffer bytes.Buffer
64 | 	for i, rune := range s {
65 | 		buffer.WriteRune(rune)
66 | 		if i%n == n-1 && i != len(s)-1 {
67 | 			buffer.WriteRune(' ')
68 | 		}
69 | 	}
70 | 	return buffer.String()
71 | }
72 | 
73 | func binstr2u64(s string) uint64 {
74 | 	s = strings.Replace(s, " ", "", -1)
75 | 
76 | 	val, err := strconv.ParseUint(s, 2, 64)
77 | 	if err != nil {
78 | 		panic(err)
79 | 	}
80 | 	return val
81 | }
82 | 
83 | func binstr2u8(s string) uint8 {
84 | 	s = strings.Replace(s, " ", "", -1)
85 | 
86 | 	val, err := strconv.ParseUint(s, 2, 8)
87 | 	if err != nil {
88 | 		panic(err)
89 | 	}
90 | 	return uint8(val)
91 | }
92 | 
93 | func binstr2byte(s string) byte {
94 | 	return byte(binstr2u8(s))
95 | }
96 | 


--------------------------------------------------------------------------------
/writer.go:
--------------------------------------------------------------------------------
  1 | package fpc
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"math"
  9 | )
 10 | 
 11 | const (
 12 | 	DefaultCompression = 10
 13 | 	// The reference implementation uses 255 for a max compression, but that
 14 | 	// hardly seems realistic: merely 32 will require 68 gigabytes of working
 15 | 	// memory to compute hashes. Beyond 35 we start hitting panics.
 16 | 	MaxCompression = 32
 17 | 
 18 | 	floatChunkSize = 8
 19 | )
 20 | 
 21 | // A Writer is an io.WriteCloser which FPC-compresses data it receives
 22 | // and writes it to an underlying writer, w.  Writes to a Writer are
 23 | type Writer struct {
 24 | 	w     io.Writer
 25 | 	level int
 26 | 	enc   *blockEncoder
 27 | 
 28 | 	wroteHeader bool
 29 | 	closed      bool
 30 | }
 31 | 
 32 | // NewWriter makes a new Writer which writes compressed data to w
 33 | // using the default compression level.
 34 | func NewWriter(w io.Writer) *Writer {
 35 | 	z, _ := NewWriterLevel(w, DefaultCompression)
 36 | 	return z
 37 | }
 38 | 
 39 | // NewWriterLevel makes a new Writer which writes compressed data to w
 40 | // using a provided compression level. Higher compression levels will
 41 | // result in more compressed data, but require exponentially more
 42 | // memory. The space required is O(2^level) bytes. NewWriterLevel
 43 | // returns an error if an invalid compression level is provided.
 44 | func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
 45 | 	if level < 1 || level > MaxCompression {
 46 | 		return nil, fmt.Errorf("fpc: invalid compression level: %d", level)
 47 | 	}
 48 | 	z := &Writer{
 49 | 		w:     w,
 50 | 		level: level,
 51 | 		enc:   newBlockEncoder(w, uint(level)),
 52 | 	}
 53 | 	return z, nil
 54 | }
 55 | 
 56 | // Write interprets b as a stream of byte-encoded, 64-bit IEEE 754
 57 | // floating point values. The length of b must be a multiple of 8 in
 58 | // order to match this expectation.
 59 | func (w *Writer) Write(b []byte) (int, error) {
 60 | 	if len(b)%8 != 0 {
 61 | 		return 0, errors.New("fpc.Write: len of data must be a multiple of 8")
 62 | 	}
 63 | 	for i := 0; i < len(b); i += 8 {
 64 | 		if err := w.writeBytes(b[i : i+8]); err != nil {
 65 | 			return i, err
 66 | 		}
 67 | 	}
 68 | 	return len(b), nil
 69 | }
 70 | 
 71 | // WriteFloat writes a single float64 value to the encoded stream.
 72 | func (w *Writer) WriteFloat(f float64) error {
 73 | 	return w.writeFloat64(f)
 74 | }
 75 | 
 76 | // Flush will make sure all internally-buffered values are written to
 77 | // w. FPC's format specifies that data get written in blocks; calling
 78 | // Flush will write the current data to a block, even if it results in
 79 | // a partial block.
 80 | //
 81 | // Flush does not flush the underlying io.Writer which w is delegating
 82 | // to.
 83 | func (w *Writer) Flush() error {
 84 | 	if err := w.ensureHeader(); err != nil {
 85 | 		return err
 86 | 	}
 87 | 	return w.enc.flush()
 88 | }
 89 | 
 90 | // Close will flush the Writer and make any subsequent writes return
 91 | // errors. It does not close the underlying io.Writer which w is
 92 | // delegating to.
 93 | func (w *Writer) Close() error {
 94 | 	if w.closed == true {
 95 | 		return nil
 96 | 	}
 97 | 	w.closed = true
 98 | 	return w.Flush()
 99 | }
100 | 
101 | func (w *Writer) ensureHeader() error {
102 | 	if !w.wroteHeader {
103 | 		w.wroteHeader = true
104 | 		_, err := w.w.Write([]byte{byte(w.level)})
105 | 		if err != nil {
106 | 			return err
107 | 		}
108 | 	}
109 | 	return nil
110 | }
111 | 
112 | func (w *Writer) writeFloat64(f float64) error {
113 | 	return w.writeUint64(math.Float64bits(f))
114 | }
115 | 
116 | func (w *Writer) writeUint64(u uint64) error {
117 | 	if err := w.ensureHeader(); err != nil {
118 | 		return err
119 | 	}
120 | 	if err := w.enc.encode(u); err != nil {
121 | 		return err
122 | 	}
123 | 	return nil
124 | }
125 | 
126 | // writeBytes writes a single 8-byte encoded IEEE 754 float
127 | func (w *Writer) writeBytes(b []byte) error {
128 | 	if err := w.writeUint64(binary.LittleEndian.Uint64(b)); err != nil {
129 | 		return err
130 | 	}
131 | 	return nil
132 | }
133 | 


--------------------------------------------------------------------------------
/writer_test.go:
--------------------------------------------------------------------------------
 1 | package fpc
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestWriter(t *testing.T) {
 9 | 	for _, tc := range refTests {
10 | 		have := bytes.NewBuffer(nil)
11 | 		w, err := NewWriterLevel(have, int(tc.comp))
12 | 		if err != nil {
13 | 			t.Fatalf("NewWriterLevel err=%q", err)
14 | 		}
15 | 		for _, f := range tc.uncompressed {
16 | 			err = w.WriteFloat(f)
17 | 			tc.AssertNoError(t, err, "WriteFloat")
18 | 		}
19 | 		err = w.Close()
20 | 		tc.AssertNoError(t, err, "Close")
21 | 
22 | 		tc.AssertEqual(t, have.Bytes(), tc.compressed, "Writer")
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------