├── LICENSE.txt
├── array.go
├── array_test.go
├── bool.go
├── bool_test.go
├── buffer.go
├── build.go
├── build_test.go
├── buildschema.go
├── buildschema_test.go
├── bytes.go
├── bytes_test.go
├── codec.go
├── discard.go
├── encoder.go
├── encoder_test.go
├── file.go
├── file_test.go
├── filewriter.go
├── filewriter_test.go
├── fixed.go
├── fixed_test.go
├── float.go
├── float_test.go
├── go.mod
├── go.sum
├── int.go
├── int_test.go
├── interface.go
├── map.go
├── map_test.go
├── null.go
├── null
    ├── null.go
    ├── null_test.go
    └── testdata
    │   └── nullavro
├── pointer.go
├── pointer_test.go
├── readme.md
├── record.go
├── record_test.go
├── schema.go
├── schema_test.go
├── string.go
├── string_test.go
├── testdata
    └── avro1
├── time
    ├── parse.go
    ├── parse_test.go
    ├── time.go
    └── time_test.go
├── union.go
├── union_test.go
└── unsafetricks.go


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2020-2025 Phil Pearl
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/array.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"unsafe"
  7 | )
  8 | 
  9 | type arrayCodec struct {
 10 | 	itemCodec Codec
 11 | 	itemType  reflect.Type
 12 | 	omitEmpty bool
 13 | }
 14 | 
 15 | func (rc *arrayCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
 16 | 	sh := (*sliceHeader)(p)
 17 | 
 18 | 	// Blocks can be repeated
 19 | 	for {
 20 | 		count, err := r.Varint()
 21 | 		if err != nil {
 22 | 			return fmt.Errorf("failed to read count for array. %w", err)
 23 | 		}
 24 | 		if count == 0 {
 25 | 			break
 26 | 		}
 27 | 		if count < 0 {
 28 | 			// negative length means there's a block size, which is only really
 29 | 			// useful for skipping.
 30 | 			count = -count
 31 | 			if _, err := r.Varint(); err != nil {
 32 | 				return fmt.Errorf("failed to read block size for array. %w", err)
 33 | 			}
 34 | 		}
 35 | 
 36 | 		// If our array is nil or undersized then we can fix it up here.
 37 | 		*sh = rc.resizeSlice(r, *sh, int(count))
 38 | 
 39 | 		itemSize := rc.itemType.Size()
 40 | 		for i := range count {
 41 | 			cursor := unsafe.Add(sh.Data, uintptr(sh.Len)*itemSize)
 42 | 			if err := rc.itemCodec.Read(r, cursor); err != nil {
 43 | 				return fmt.Errorf("failed to decode array entry %d. %w", i, err)
 44 | 			}
 45 | 			sh.Len++
 46 | 		}
 47 | 	}
 48 | 
 49 | 	return nil
 50 | }
 51 | 
 52 | func (rc *arrayCodec) Skip(r *ReadBuf) error {
 53 | 	for {
 54 | 		count, err := r.Varint()
 55 | 		if err != nil {
 56 | 			return fmt.Errorf("failed to read count for array. %w", err)
 57 | 		}
 58 | 		if count == 0 {
 59 | 			break
 60 | 		}
 61 | 		if count < 0 {
 62 | 			// negative count means there's a block size we can use to skip the
 63 | 			// rest of this block
 64 | 			bs, err := r.Varint()
 65 | 			if err != nil {
 66 | 				return fmt.Errorf("failed to read block size for array. %w", err)
 67 | 			}
 68 | 			if err := skip(r, bs); err != nil {
 69 | 				return err
 70 | 			}
 71 | 			continue
 72 | 		}
 73 | 
 74 | 		for ; count > 0; count-- {
 75 | 			if err := rc.itemCodec.Skip(r); err != nil {
 76 | 				return fmt.Errorf("failed to skip array entry. %w", err)
 77 | 			}
 78 | 		}
 79 | 	}
 80 | 
 81 | 	return nil
 82 | }
 83 | 
 84 | var sliceType = reflect.TypeFor[sliceHeader]()
 85 | 
 86 | func (rc *arrayCodec) New(r *ReadBuf) unsafe.Pointer {
 87 | 	return r.Alloc(sliceType)
 88 | }
 89 | 
 90 | // resizeSlice increases the length of the slice by len entries
 91 | func (rc *arrayCodec) resizeSlice(r *ReadBuf, in sliceHeader, len int) sliceHeader {
 92 | 	if in.Len+len <= in.Cap {
 93 | 		return in
 94 | 	}
 95 | 	// Will assume for now that blocks are sensible sizes
 96 | 	out := sliceHeader{
 97 | 		Cap: in.Len + len,
 98 | 		Len: in.Len,
 99 | 	}
100 | 	out.Data = r.AllocArray(rc.itemType, out.Cap)
101 | 
102 | 	if in.Data != nil {
103 | 		elemType := unpackEFace(rc.itemType).data
104 | 		typedslicecopy(elemType, out, in)
105 | 	}
106 | 	return out
107 | }
108 | 
109 | func (rc *arrayCodec) Omit(p unsafe.Pointer) bool {
110 | 	return rc.omitEmpty && len(*(*[]byte)(p)) == 0
111 | }
112 | 
113 | func (rc *arrayCodec) Write(w *WriteBuf, p unsafe.Pointer) {
114 | 	sh := (*sliceHeader)(p)
115 | 	if sh.Len == 0 {
116 | 		w.Varint(0)
117 | 		return
118 | 	}
119 | 
120 | 	// TODO: you can write negative counts, which are then followed by the size
121 | 	// of the block, then the data. That makes it easier to skip over data. TBD if we want to do that
122 | 	w.Varint(int64(sh.Len))
123 | 	for i := range sh.Len {
124 | 		cursor := unsafe.Add(sh.Data, uintptr(i)*rc.itemType.Size())
125 | 		rc.itemCodec.Write(w, cursor)
126 | 	}
127 | 
128 | 	// Write a zero count to indicate the end of the array. This does appear to
129 | 	// be necessary as you can write multiple blocks.
130 | 	w.Varint(0)
131 | }
132 | 


--------------------------------------------------------------------------------
/array_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | 
  8 | 	"github.com/google/go-cmp/cmp"
  9 | 	"github.com/google/go-cmp/cmp/cmpopts"
 10 | )
 11 | 
 12 | func TestArrayCodec(t *testing.T) {
 13 | 	tests := []struct {
 14 | 		name string
 15 | 		data []byte
 16 | 		exp  []string
 17 | 		out  []string
 18 | 	}{
 19 | 		{
 20 | 			name: "empty",
 21 | 			data: []byte{0},
 22 | 		},
 23 | 		{
 24 | 			name: "one",
 25 | 			data: []byte{
 26 | 				2,
 27 | 				6, 'o', 'n', 'e',
 28 | 				0,
 29 | 			},
 30 | 			exp: []string{"one"},
 31 | 		},
 32 | 		{
 33 | 			name: "one append",
 34 | 			data: []byte{
 35 | 				2,
 36 | 				6, 'o', 'n', 'e',
 37 | 				0,
 38 | 			},
 39 | 			out: []string{"two"},
 40 | 			exp: []string{"two", "one"},
 41 | 		},
 42 | 		{
 43 | 			name: "two",
 44 | 			data: []byte{
 45 | 				4,
 46 | 				6, 'o', 'n', 'e',
 47 | 				6, 't', 'w', 'o',
 48 | 				0,
 49 | 			},
 50 | 			exp: []string{"one", "two"},
 51 | 		},
 52 | 		{
 53 | 			name: "two blocks",
 54 | 			data: []byte{
 55 | 				2,
 56 | 				6, 'o', 'n', 'e',
 57 | 				2,
 58 | 				6, 't', 'w', 'o',
 59 | 				0,
 60 | 			},
 61 | 			exp: []string{"one", "two"},
 62 | 		},
 63 | 		{
 64 | 			name: "two blocks with size",
 65 | 			data: []byte{
 66 | 				1,
 67 | 				8,
 68 | 				6, 'o', 'n', 'e',
 69 | 				1,
 70 | 				8,
 71 | 				6, 't', 'w', 'o',
 72 | 				0,
 73 | 			},
 74 | 			exp: []string{"one", "two"},
 75 | 		},
 76 | 	}
 77 | 
 78 | 	c := arrayCodec{
 79 | 		itemCodec: StringCodec{},
 80 | 		itemType:  reflect.TypeFor[string](),
 81 | 	}
 82 | 
 83 | 	for _, test := range tests {
 84 | 		t.Run(test.name, func(t *testing.T) {
 85 | 			buf := NewReadBuf(test.data)
 86 | 
 87 | 			if err := c.Read(buf, unsafe.Pointer(&test.out)); err != nil {
 88 | 				t.Fatal(err)
 89 | 			}
 90 | 
 91 | 			if diff := cmp.Diff(test.exp, test.out); diff != "" {
 92 | 				t.Fatalf("output not as expected. %s", diff)
 93 | 			}
 94 | 			if buf.Len() != 0 {
 95 | 				t.Fatalf("unread data (%d)", buf.Len())
 96 | 			}
 97 | 		})
 98 | 		t.Run(test.name+"_skip", func(t *testing.T) {
 99 | 			buf := NewReadBuf(test.data)
100 | 
101 | 			if err := c.Skip(buf); err != nil {
102 | 				t.Fatal(err)
103 | 			}
104 | 
105 | 			if buf.Len() != 0 {
106 | 				t.Fatalf("unread data (%d)", buf.Len())
107 | 			}
108 | 		})
109 | 
110 | 	}
111 | }
112 | 
113 | func TestArrayCodecInt(t *testing.T) {
114 | 	tests := []struct {
115 | 		name string
116 | 		data []byte
117 | 		exp  []int32
118 | 		out  []int32
119 | 	}{
120 | 		{
121 | 			name: "empty",
122 | 			data: []byte{0},
123 | 		},
124 | 		{
125 | 			name: "one",
126 | 			data: []byte{
127 | 				2,
128 | 				2,
129 | 				0,
130 | 			},
131 | 			exp: []int32{1},
132 | 		},
133 | 		{
134 | 			name: "one append",
135 | 			data: []byte{
136 | 				2,
137 | 				2,
138 | 				0,
139 | 			},
140 | 			out: []int32{2},
141 | 			exp: []int32{2, 1},
142 | 		},
143 | 		{
144 | 			name: "more",
145 | 			data: []byte{
146 | 				8,
147 | 				1,
148 | 				2,
149 | 				3,
150 | 				4,
151 | 				0,
152 | 			},
153 | 			exp: []int32{-1, 1, -2, 2},
154 | 		},
155 | 		{
156 | 			name: "two blocks",
157 | 			data: []byte{
158 | 				2,
159 | 				2,
160 | 				2,
161 | 				4,
162 | 				0,
163 | 			},
164 | 			exp: []int32{1, 2},
165 | 		},
166 | 	}
167 | 
168 | 	c := arrayCodec{
169 | 		itemCodec: Int32Codec{},
170 | 		itemType:  reflect.TypeFor[int32](),
171 | 	}
172 | 
173 | 	for _, test := range tests {
174 | 		test := test
175 | 		t.Run(test.name, func(t *testing.T) {
176 | 			t.Parallel()
177 | 			buf := NewReadBuf(test.data)
178 | 
179 | 			if err := c.Read(buf, unsafe.Pointer(&test.out)); err != nil {
180 | 				t.Fatal(err)
181 | 			}
182 | 
183 | 			if diff := cmp.Diff(test.exp, test.out); diff != "" {
184 | 				t.Fatalf("output not as expected. %s", diff)
185 | 			}
186 | 			if buf.Len() != 0 {
187 | 				t.Fatalf("unread data (%d)", buf.Len())
188 | 			}
189 | 		})
190 | 		t.Run(test.name+"_skip", func(t *testing.T) {
191 | 			t.Parallel()
192 | 			buf := NewReadBuf(test.data)
193 | 
194 | 			if err := c.Skip(buf); err != nil {
195 | 				t.Fatal(err)
196 | 			}
197 | 
198 | 			if buf.Len() != 0 {
199 | 				t.Fatalf("unread data (%d)", buf.Len())
200 | 			}
201 | 		})
202 | 
203 | 	}
204 | }
205 | 
206 | func TestArrayCodecRoundTrip(t *testing.T) {
207 | 	tests := []struct {
208 | 		name string
209 | 		data []string
210 | 	}{
211 | 		{
212 | 			name: "empty",
213 | 			data: []string{},
214 | 		},
215 | 		{
216 | 			name: "one",
217 | 			data: []string{"one"},
218 | 		},
219 | 		{
220 | 			name: "two",
221 | 			data: []string{"one", "two"},
222 | 		},
223 | 		{
224 | 			name: "three",
225 | 			data: []string{"one", "two", "three"},
226 | 		},
227 | 	}
228 | 
229 | 	c := arrayCodec{
230 | 		itemCodec: StringCodec{},
231 | 		itemType:  reflect.TypeFor[string](),
232 | 	}
233 | 
234 | 	for _, test := range tests {
235 | 		t.Run(test.name, func(t *testing.T) {
236 | 			t.Parallel()
237 | 			w := NewWriteBuf(nil)
238 | 
239 | 			c.Write(w, unsafe.Pointer(&test.data))
240 | 
241 | 			var out []string
242 | 			r := NewReadBuf(w.Bytes())
243 | 			if err := c.Read(r, unsafe.Pointer(&out)); err != nil {
244 | 				t.Fatal(err)
245 | 			}
246 | 
247 | 			if diff := cmp.Diff(test.data, out, cmpopts.EquateEmpty()); diff != "" {
248 | 				t.Fatalf("output not as expected. %s", diff)
249 | 			}
250 | 		})
251 | 	}
252 | }
253 | 


--------------------------------------------------------------------------------
/bool.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | type BoolCodec struct{ omitEmpty bool }
 9 | 
10 | func (BoolCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
11 | 	b, err := r.ReadByte()
12 | 	if err != nil {
13 | 		return err
14 | 	}
15 | 
16 | 	*(*bool)(p) = b != 0
17 | 	return nil
18 | }
19 | 
20 | func (BoolCodec) Skip(r *ReadBuf) error {
21 | 	return skip(r, 1)
22 | }
23 | 
24 | var boolType = reflect.TypeFor[bool]()
25 | 
26 | func (BoolCodec) New(r *ReadBuf) unsafe.Pointer {
27 | 	return r.Alloc(boolType)
28 | }
29 | 
30 | func (rc BoolCodec) Omit(p unsafe.Pointer) bool {
31 | 	return rc.omitEmpty && !*(*bool)(p)
32 | }
33 | 
34 | func (rc BoolCodec) Write(w *WriteBuf, p unsafe.Pointer) {
35 | 	if *(*bool)(p) {
36 | 		w.Byte(1)
37 | 	} else {
38 | 		w.Byte(0)
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/bool_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | )
  8 | 
  9 | func TestBoolCodec(t *testing.T) {
 10 | 	tests := []struct {
 11 | 		name string
 12 | 		data []byte
 13 | 		exp  bool
 14 | 	}{
 15 | 		{
 16 | 			name: "true",
 17 | 			data: []byte{1},
 18 | 			exp:  true,
 19 | 		},
 20 | 		{
 21 | 			name: "false",
 22 | 			data: []byte{0},
 23 | 			exp:  false,
 24 | 		},
 25 | 	}
 26 | 
 27 | 	c := BoolCodec{}
 28 | 
 29 | 	for _, test := range tests {
 30 | 		test := test
 31 | 		t.Run(test.name, func(t *testing.T) {
 32 | 			t.Parallel()
 33 | 			var actual bool
 34 | 			r := NewReadBuf(test.data)
 35 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
 36 | 				t.Fatal(err)
 37 | 			}
 38 | 			if actual != test.exp {
 39 | 				t.Fatalf("got %t, expected %t", actual, test.exp)
 40 | 			}
 41 | 			if r.Len() != 0 {
 42 | 				t.Fatalf("%d bytes left", r.Len())
 43 | 			}
 44 | 		})
 45 | 
 46 | 		t.Run(test.name+" skip", func(t *testing.T) {
 47 | 			t.Parallel()
 48 | 			r := NewReadBuf(test.data)
 49 | 			if err := c.Skip(r); err != nil {
 50 | 				t.Fatal(err)
 51 | 			}
 52 | 			if r.Len() != 0 {
 53 | 				t.Fatalf("%d bytes left", r.Len())
 54 | 			}
 55 | 		})
 56 | 	}
 57 | }
 58 | 
 59 | func BenchmarkBoolPointer(b *testing.B) {
 60 | 	data := bytes.Repeat([]byte{1}, 1000)
 61 | 	r := NewReadBuf(data)
 62 | 
 63 | 	c := PointerCodec{BoolCodec{}}
 64 | 	b.ReportAllocs()
 65 | 
 66 | 	for b.Loop() {
 67 | 		r.Reset(data)
 68 | 		for range 1000 {
 69 | 			var out *bool
 70 | 			if err := c.Read(r, unsafe.Pointer(&out)); err != nil {
 71 | 				b.Fatal(err)
 72 | 			}
 73 | 			if !*out {
 74 | 				b.Fatal("wrong bool")
 75 | 			}
 76 | 		}
 77 | 		r.ExtractResourceBank().Close()
 78 | 	}
 79 | }
 80 | 
 81 | func TestBoolCodecRoundTrip(t *testing.T) {
 82 | 	tests := []struct {
 83 | 		name string
 84 | 		data bool
 85 | 	}{
 86 | 		{
 87 | 			name: "true",
 88 | 			data: true,
 89 | 		},
 90 | 		{
 91 | 			name: "false",
 92 | 			data: false,
 93 | 		},
 94 | 	}
 95 | 
 96 | 	c := BoolCodec{}
 97 | 
 98 | 	for _, test := range tests {
 99 | 		test := test
100 | 		t.Run(test.name, func(t *testing.T) {
101 | 			t.Parallel()
102 | 			var actual bool
103 | 			w := NewWriteBuf(nil)
104 | 			c.Write(w, unsafe.Pointer(&test.data))
105 | 			r := NewReadBuf(w.Bytes())
106 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
107 | 				t.Fatal(err)
108 | 			}
109 | 			if actual != test.data {
110 | 				t.Fatalf("got %t, expected %t", actual, test.data)
111 | 			}
112 | 		})
113 | 	}
114 | }
115 | 


--------------------------------------------------------------------------------
/buffer.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"errors"
  6 | 	"io"
  7 | 	"reflect"
  8 | 	"sync"
  9 | 	"unsafe"
 10 | )
 11 | 
 12 | // WriteBuf is a simple, append only, replacement for bytes.Buffer. It is used
 13 | // by AVRO encoders. It is not safe for concurrent use.
 14 | type WriteBuf struct {
 15 | 	buf []byte
 16 | }
 17 | 
 18 | // NewWriteBuf returns a new WriteBuf.
 19 | func NewWriteBuf(buf []byte) *WriteBuf {
 20 | 	return &WriteBuf{buf: buf}
 21 | }
 22 | 
 23 | func (w *WriteBuf) Varint(v int64) {
 24 | 	w.buf = binary.AppendVarint(w.buf, v)
 25 | }
 26 | 
 27 | func (w *WriteBuf) Byte(val byte) {
 28 | 	w.buf = append(w.buf, val)
 29 | }
 30 | 
 31 | func (w *WriteBuf) Write(val []byte) {
 32 | 	w.buf = append(w.buf, val...)
 33 | }
 34 | 
 35 | func (w *WriteBuf) Bytes() []byte {
 36 | 	return w.buf
 37 | }
 38 | 
 39 | func (w *WriteBuf) Reset() {
 40 | 	w.buf = w.buf[:0]
 41 | }
 42 | 
 43 | func (w *WriteBuf) Len() int {
 44 | 	return len(w.buf)
 45 | }
 46 | 
 47 | // ReadBuf is a very simple replacement for bytes.Reader that avoids data copies
 48 | type ReadBuf struct {
 49 | 	i   int
 50 | 	buf []byte
 51 | 	rb  *ResourceBank
 52 | }
 53 | 
 54 | // NewReadBuf returns a new Buffer.
 55 | func NewReadBuf(data []byte) *ReadBuf {
 56 | 	return &ReadBuf{buf: data, rb: newResourceBank()}
 57 | }
 58 | 
 59 | // Reset allows you to reuse a buffer with a new set of data
 60 | func (d *ReadBuf) Reset(data []byte) {
 61 | 	d.i = 0
 62 | 	d.buf = data
 63 | 	if d.rb == nil {
 64 | 		d.rb = newResourceBank()
 65 | 	}
 66 | }
 67 | 
 68 | // ExtractResourceBank extracts the current ResourceBank from the buffer, and replaces
 69 | // it with a fresh one.
 70 | func (d *ReadBuf) ExtractResourceBank() *ResourceBank {
 71 | 	rb := d.rb
 72 | 	d.rb = newResourceBank()
 73 | 	return rb
 74 | }
 75 | 
 76 | // Next returns the next l bytes from the buffer. It does so without copying, so
 77 | // if you hold onto the data you risk holding onto a lot of data. If l exceeds
 78 | // the remaining space Next returns io.EOF
 79 | func (d *ReadBuf) Next(l int) ([]byte, error) {
 80 | 	if l+d.i > len(d.buf) {
 81 | 		return nil, io.EOF
 82 | 	}
 83 | 	d.i += l
 84 | 	return d.buf[d.i-l : d.i], nil
 85 | }
 86 | 
 87 | // NextAsString returns the next l bytes from the buffer as a string. The string
 88 | // data is held in a StringBank and will be valid only until someone calls Close
 89 | // on that bank. If l exceeds the remaining space NextAsString returns io.EOF
 90 | func (d *ReadBuf) NextAsString(l int) (string, error) {
 91 | 	if l+d.i > len(d.buf) {
 92 | 		return "", io.EOF
 93 | 	}
 94 | 	d.i += l
 95 | 	return d.rb.ToString(d.buf[d.i-l : d.i]), nil
 96 | }
 97 | 
 98 | func (d *ReadBuf) NextAsBytes(l int) ([]byte, error) {
 99 | 	if l+d.i > len(d.buf) {
100 | 		return nil, io.EOF
101 | 	}
102 | 	d.i += l
103 | 	return d.rb.ToBytes(d.buf[d.i-l : d.i]), nil
104 | }
105 | 
106 | // Alloc allocates a pointer to the type rtyp. The data is allocated in a ResourceBank
107 | func (d *ReadBuf) Alloc(rtyp reflect.Type) unsafe.Pointer {
108 | 	return d.rb.Alloc(rtyp)
109 | }
110 | 
111 | func (d *ReadBuf) AllocArray(rtyp reflect.Type, len int) unsafe.Pointer {
112 | 	return d.rb.AllocArray(rtyp, len)
113 | }
114 | 
115 | // ReadByte returns the next byte from the buffer. If no bytes are left it
116 | // returns io.EOF
117 | func (d *ReadBuf) ReadByte() (byte, error) {
118 | 	if d.i >= len(d.buf) {
119 | 		return 0, io.EOF
120 | 	}
121 | 	d.i++
122 | 	return d.buf[d.i-1], nil
123 | }
124 | 
125 | // Len returns the length of unread data in the buffer
126 | func (d *ReadBuf) Len() int {
127 | 	return len(d.buf) - d.i
128 | }
129 | 
130 | // Varint reads a varint from the buffer
131 | func (d *ReadBuf) Varint() (int64, error) {
132 | 	v, err := d.uvarint() // ok to continue in presence of error
133 | 	return int64(v>>1) ^ -int64(v&1), err
134 | }
135 | 
136 | var errOverflow = errors.New("varint overflows a 64-bit integer")
137 | 
138 | func (d *ReadBuf) uvarint() (uint64, error) {
139 | 	var x uint64
140 | 	var s uint
141 | 	for i := 0; ; i++ {
142 | 		b, err := d.ReadByte()
143 | 		if err != nil {
144 | 			return x, err
145 | 		}
146 | 		if b < 0x80 {
147 | 			if i > 9 || i == 9 && b > 1 {
148 | 				return x, errOverflow
149 | 			}
150 | 			return x | uint64(b)<<s, nil
151 | 		}
152 | 		x |= uint64(b&0x7f) << s
153 | 		s += 7
154 | 	}
155 | }
156 | 
157 | var resourceBankPool = sync.Pool{
158 | 	New: func() interface{} {
159 | 		return &ResourceBank{}
160 | 	},
161 | }
162 | 
163 | type resourceType struct {
164 | 	// Type information for this type.
165 | 	ptyp unsafe.Pointer
166 | 	// Where the bits of memory for this type is
167 | 	array unsafe.Pointer
168 | 	// How much memory we currently have
169 | 	cap int
170 | 	// How much of the memory is currently in-use
171 | 	len int
172 | 	// The size of this type
173 | 	size int
174 | }
175 | 
176 | // ResourceBank is used to allocate memory used to create structs to decode AVRO
177 | // into. The primary reason for having it is to allow the user to flag the
178 | // memory can be re-used, so reducing the strain on the GC
179 | //
180 | // We allocate using the required type of thing so the GC can still inspect
181 | // within the memory.
182 | type ResourceBank struct {
183 | 	types []resourceType
184 | 
185 | 	// We also have a special store for string data
186 | 	sData []byte
187 | }
188 | 
189 | func newResourceBank() *ResourceBank {
190 | 	return resourceBankPool.Get().(*ResourceBank)
191 | }
192 | 
193 | // Alloc reserves some memory in the ResourceBank. Note that this memory may be
194 | // re-used after Close is called.
195 | func (rb *ResourceBank) Alloc(rtyp reflect.Type) unsafe.Pointer {
196 | 	rt := rb.findTyp(rtyp)
197 | 
198 | 	rt.ensureSpace(1)
199 | 
200 | 	start := rt.len
201 | 	rt.len++
202 | 	ptr := unsafe.Add(rt.array, start*rt.size)
203 | 	// Because we're re-using we need to clear the memory ourselves. Should perhaps
204 | 	// do this on Close
205 | 	typedmemclr(rt.ptyp, ptr)
206 | 	return ptr
207 | }
208 | 
209 | // AllocArray reserves some memory in the ResourceBank for an array of the given
210 | // type and length. Note that this memory may be re-used after Close is called.
211 | func (rb *ResourceBank) AllocArray(rtyp reflect.Type, len int) unsafe.Pointer {
212 | 	rt := rb.findTyp(rtyp)
213 | 	rt.ensureSpace(len)
214 | 	start := rt.len
215 | 	rt.len += len
216 | 	ptr := unsafe.Add(rt.array, start*rt.size)
217 | 	typedarrayclear(rt.ptyp, ptr, len)
218 | 
219 | 	return ptr
220 | }
221 | 
222 | func (rt *resourceType) ensureSpace(len int) {
223 | 	if rt.len+len <= rt.cap {
224 | 		return
225 | 	}
226 | 	newCap := max(rt.cap*2, len)
227 | 	if newCap < 16 {
228 | 		newCap = 16
229 | 	}
230 | 
231 | 	rt.array = unsafe_NewArray(rt.ptyp, newCap)
232 | 	rt.cap = newCap
233 | 	rt.len = 0
234 | }
235 | 
236 | func (rb *ResourceBank) findTyp(rtyp reflect.Type) *resourceType {
237 | 	ptyp := unpackEFace(rtyp).data
238 | 	// We don't expect many types, so we just do a linear search
239 | 	for i := range rb.types {
240 | 		rt := &rb.types[i]
241 | 		if rt.ptyp == ptyp {
242 | 			return rt
243 | 		}
244 | 	}
245 | 
246 | 	rb.types = append(rb.types, resourceType{
247 | 		ptyp: ptyp,
248 | 		size: int(rtyp.Size()),
249 | 	})
250 | 
251 | 	return &rb.types[len(rb.types)-1]
252 | }
253 | 
254 | // Close marks the resources in the ResourceBank as available for re-use
255 | func (rb *ResourceBank) Close() {
256 | 	// We don't free the memory here. We keep our arrays at the maximum size we've
257 | 	// needed, but we set the length used to zero so we can re-use it all.
258 | 	for i := range rb.types {
259 | 		t := &rb.types[i]
260 | 		t.len = 0
261 | 	}
262 | 
263 | 	// We also need to clear the string data
264 | 	rb.sData = rb.sData[:0]
265 | 
266 | 	resourceBankPool.Put(rb)
267 | }
268 | 
269 | // ToString saves string data in the bank and returns a string. The string is
270 | // valid until someone calls Close
271 | func (rb *ResourceBank) ToString(in []byte) string {
272 | 	start := len(rb.sData)
273 | 	// append will do some unnecessary copying. But we should get to the right
274 | 	// size and stop growing pretty quickly
275 | 	rb.sData = append(rb.sData, in...)
276 | 	out := rb.sData[start:]
277 | 
278 | 	return unsafe.String(unsafe.SliceData(out), len(out))
279 | }
280 | 
281 | // ToBytes saves byte data in the bank and returns a byte slice. The slice is
282 | // valid until someone calls Close
283 | func (rb *ResourceBank) ToBytes(in []byte) []byte {
284 | 	start := len(rb.sData)
285 | 	// append will do some unnecessary copying. But we should get to the right
286 | 	// size and stop growing pretty quickly
287 | 	rb.sData = append(rb.sData, in...)
288 | 	return rb.sData[start : start+len(in) : start+len(in)]
289 | }
290 | 


--------------------------------------------------------------------------------
/build.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"reflect"
  7 | 	"strings"
  8 | 	"sync"
  9 | )
 10 | 
 11 | // CodecBuildFunc is the function signature for a codec builder. If you want to
 12 | // customise AVRO decoding for a type register a CodecBuildFunc via the Register
 13 | // call. Schema is the AVRO schema for the type to build. typ should match the
 14 | // type the function was registered under.
 15 | type CodecBuildFunc func(schema Schema, typ reflect.Type, omit bool) (Codec, error)
 16 | 
 17 | var (
 18 | 	registryMutex sync.RWMutex
 19 | 	registry      = make(map[reflect.Type]CodecBuildFunc)
 20 | )
 21 | 
 22 | // Register is used to set a custom codec builder for a type
 23 | func Register(typ reflect.Type, f CodecBuildFunc) {
 24 | 	registryMutex.Lock()
 25 | 	defer registryMutex.Unlock()
 26 | 	registry[typ] = f
 27 | }
 28 | 
 29 | // buildCodec builds a codec for use with a schema and type. Note that typ can
 30 | // be nil, in which case we still need a codec to know how to skip over the
 31 | // field
 32 | func buildCodec(schema Schema, typ reflect.Type, omit bool) (Codec, error) {
 33 | 	if schema.Type != "union" && schema.Type != "null" && typ != nil {
 34 | 		if typ.Kind() == reflect.Ptr {
 35 | 			return buildPointerCodec(schema, typ)
 36 | 		}
 37 | 
 38 | 		registryMutex.RLock()
 39 | 		cf, ok := registry[typ]
 40 | 		registryMutex.RUnlock()
 41 | 		if ok {
 42 | 			return cf(schema, typ, omit)
 43 | 		}
 44 | 	}
 45 | 
 46 | 	switch schema.Type {
 47 | 	case "null":
 48 | 		return buildNullCodec()
 49 | 	case "boolean":
 50 | 		return buildBoolCodec(typ, omit)
 51 | 	case "int":
 52 | 		return buildIntCodec(typ, omit)
 53 | 	case "long":
 54 | 		return buildLongCodec(typ, omit)
 55 | 	case "float":
 56 | 		return buildFloatCodec(typ, omit)
 57 | 	case "double":
 58 | 		return buildDoubleCodec(typ, omit)
 59 | 	case "bytes":
 60 | 		return buildBytesCodec(typ, omit)
 61 | 	case "string":
 62 | 		return buildStringCodec(typ, omit)
 63 | 	case "record":
 64 | 		return buildRecordCodec(schema, typ)
 65 | 	case "enum":
 66 | 		return nil, fmt.Errorf("enum not currently supported")
 67 | 	case "array":
 68 | 		return buildArrayCodec(schema, typ, omit)
 69 | 	case "map":
 70 | 		return BuildMapCodec(schema, typ, omit)
 71 | 	case "union":
 72 | 		return buildUnionCodec(schema, typ, omit)
 73 | 	case "fixed":
 74 | 		return buildFixedCodec(schema, typ)
 75 | 	}
 76 | 
 77 | 	return nil, fmt.Errorf("%s not currently supported", schema.Type)
 78 | }
 79 | 
 80 | func buildPointerCodec(schema Schema, typ reflect.Type) (Codec, error) {
 81 | 	c, err := buildCodec(schema, typ.Elem(), false)
 82 | 	if err != nil {
 83 | 		return nil, err
 84 | 	}
 85 | 	return &PointerCodec{Codec: c}, nil
 86 | }
 87 | 
 88 | func buildBoolCodec(typ reflect.Type, omit bool) (Codec, error) {
 89 | 	if typ != nil && typ.Kind() != reflect.Bool {
 90 | 		return nil, fmt.Errorf("type for boolean must be a bool, not %s", typ)
 91 | 	}
 92 | 
 93 | 	return BoolCodec{omitEmpty: omit}, nil
 94 | }
 95 | 
 96 | func buildIntCodec(typ reflect.Type, omit bool) (Codec, error) {
 97 | 	// We can actually use the same codecs as long ints. We might want to
 98 | 	// separate them if we do encoding.
 99 | 	return buildLongCodec(typ, omit)
100 | }
101 | 
102 | func buildLongCodec(typ reflect.Type, omit bool) (Codec, error) {
103 | 	// TODO: unsigned types?
104 | 	// It's likely BQ will specify this type even for smaller integer types.
105 | 	if typ == nil {
106 | 		return Int64Codec{omitEmpty: omit}, nil
107 | 	}
108 | 
109 | 	switch typ.Kind() {
110 | 	case reflect.Int64, reflect.Int:
111 | 		return Int64Codec{omitEmpty: omit}, nil
112 | 	case reflect.Int32:
113 | 		return Int32Codec{omitEmpty: omit}, nil
114 | 	case reflect.Int16:
115 | 		return Int32Codec{omitEmpty: omit}, nil
116 | 	}
117 | 
118 | 	return nil, fmt.Errorf("type %s (kind %s) not supported for long codec", typ, typ.Kind())
119 | }
120 | 
121 | func buildFloatCodec(typ reflect.Type, omit bool) (Codec, error) {
122 | 	if typ != nil && typ.Kind() != reflect.Float32 {
123 | 		return nil, fmt.Errorf("type for float codec must be a 32 bit float, not %s", typ)
124 | 	}
125 | 
126 | 	return FloatCodec{omitEmpty: omit}, nil
127 | }
128 | 
129 | func buildDoubleCodec(typ reflect.Type, omit bool) (Codec, error) {
130 | 	if typ == nil {
131 | 		return DoubleCodec{omitEmpty: omit}, nil
132 | 	}
133 | 
134 | 	switch typ.Kind() {
135 | 	case reflect.Float32:
136 | 		return Float32DoubleCodec{DoubleCodec: DoubleCodec{omitEmpty: omit}}, nil
137 | 	case reflect.Float64:
138 | 		return DoubleCodec{omitEmpty: omit}, nil
139 | 	}
140 | 
141 | 	return nil, fmt.Errorf("type %s not supported for double codec", typ)
142 | }
143 | 
144 | func buildNullCodec() (Codec, error) {
145 | 	return nullCodec{}, nil
146 | }
147 | 
148 | func buildFixedCodec(schema Schema, typ reflect.Type) (Codec, error) {
149 | 	if typ != nil {
150 | 		if typ.Kind() != reflect.Array || typ.Elem().Kind() != reflect.Uint8 {
151 | 			return nil, fmt.Errorf("type for fixed must be a byte array")
152 | 		}
153 | 		if typ.Len() != schema.Object.Size {
154 | 			return nil, fmt.Errorf("array for fixed of size %d is %d", schema.Object.Size, typ.Len())
155 | 		}
156 | 	}
157 | 	return &fixedCodec{Size: schema.Object.Size}, nil
158 | }
159 | 
160 | func buildBytesCodec(typ reflect.Type, omit bool) (Codec, error) {
161 | 	if typ != nil {
162 | 		if typ.Kind() != reflect.Slice || typ.Elem().Kind() != reflect.Uint8 {
163 | 			return nil, fmt.Errorf("type for bytes must be a byte slice, not %s", typ)
164 | 		}
165 | 	}
166 | 	return BytesCodec{omitEmpty: omit}, nil
167 | }
168 | 
169 | func buildStringCodec(typ reflect.Type, omit bool) (Codec, error) {
170 | 	if typ != nil && typ.Kind() != reflect.String {
171 | 		return nil, fmt.Errorf("type for string must be a string, not %s", typ)
172 | 	}
173 | 	return StringCodec{omitEmpty: omit}, nil
174 | }
175 | 
176 | func buildArrayCodec(schema Schema, typ reflect.Type, omit bool) (Codec, error) {
177 | 	var itemType reflect.Type
178 | 	if typ != nil {
179 | 		if typ.Kind() != reflect.Slice {
180 | 			return nil, fmt.Errorf("type for an array must be a slice, not %s", typ)
181 | 		}
182 | 		itemType = typ.Elem()
183 | 	}
184 | 
185 | 	itemCodec, err := buildCodec(schema.Object.Items, itemType, false)
186 | 	if err != nil {
187 | 		return nil, fmt.Errorf("could not build array item codec: %w", err)
188 | 	}
189 | 
190 | 	return &arrayCodec{itemCodec: itemCodec, itemType: itemType, omitEmpty: omit}, nil
191 | }
192 | 
193 | func BuildMapCodec(schema Schema, typ reflect.Type, omit bool) (Codec, error) {
194 | 	var valueType reflect.Type
195 | 	if typ != nil {
196 | 		if typ.Kind() != reflect.Map || typ.Key().Kind() != reflect.String {
197 | 			return nil, fmt.Errorf("type for a map must be a map with string keys")
198 | 		}
199 | 		valueType = typ.Elem()
200 | 	}
201 | 
202 | 	valueCodec, err := buildCodec(schema.Object.Values, valueType, false)
203 | 	if err != nil {
204 | 		return nil, fmt.Errorf("could not build map value codec: %w", err)
205 | 	}
206 | 
207 | 	return &MapCodec{valueCodec: valueCodec, rtype: typ, omitEmpty: omit}, nil
208 | }
209 | 
210 | func buildUnionCodec(schema Schema, typ reflect.Type, omit bool) (Codec, error) {
211 | 	if len(schema.Union) == 2 {
212 | 		if schema.Union[0].Type == "null" || schema.Union[1].Type == "null" {
213 | 			var c unionOneAndNullCodec
214 | 			if schema.Union[0].Type == "null" {
215 | 				c.nonNull = 1
216 | 			}
217 | 			u := schema.Union[c.nonNull]
218 | 			sc, err := buildCodec(u, typ, omit)
219 | 			if err != nil {
220 | 				return nil, fmt.Errorf("failed to build union sub-codec %q: %w", u.Type, err)
221 | 			}
222 | 			if _, ok := sc.(StringCodec); ok {
223 | 				return &unionNullString{codec: StringCodec{}, nonNull: c.nonNull}, nil
224 | 			}
225 | 			c.codec = sc
226 | 			return &c, nil
227 | 		}
228 | 	}
229 | 
230 | 	var c unionCodec
231 | 	c.codecs = make([]Codec, len(schema.Union))
232 | 
233 | 	// We're only really expecting unions that are unions of a thing and null,
234 | 	// so we can only cope with pointers for now
235 | 	for i, u := range schema.Union {
236 | 		sc, err := buildCodec(u, typ, omit)
237 | 		if err != nil {
238 | 			return nil, fmt.Errorf("failed to build union sub-codec %q: %w", u.Type, err)
239 | 		}
240 | 		c.codecs[i] = sc
241 | 	}
242 | 	return &c, nil
243 | }
244 | 
245 | func nameForField(sf reflect.StructField) string {
246 | 	if !sf.IsExported() {
247 | 		return "-"
248 | 	}
249 | 
250 | 	bqTag := sf.Tag.Get("bq")
251 | 	if bqTag == "-" {
252 | 		return "-"
253 | 	}
254 | 	jsonTag := sf.Tag.Get("json")
255 | 	name, _, _ := strings.Cut(jsonTag, ",")
256 | 	if name == "-" {
257 | 		return "-"
258 | 	}
259 | 	if name == "" {
260 | 		return sf.Name
261 | 	}
262 | 
263 | 	return name
264 | }
265 | 
266 | func omitEmpty(sf reflect.StructField) bool {
267 | 	jsonTag := sf.Tag.Get("json")
268 | 	_, opts, _ := strings.Cut(jsonTag, ",")
269 | 	for len(opts) > 0 {
270 | 		var opt string
271 | 		opt, opts, _ = strings.Cut(opts, ",")
272 | 		if opt == "omitempty" {
273 | 			return true
274 | 		}
275 | 	}
276 | 	return false
277 | }
278 | 
279 | func buildRecordCodec(schema Schema, typ reflect.Type) (Codec, error) {
280 | 	if schema.Object == nil {
281 | 		return nil, fmt.Errorf("record schema does not have object")
282 | 	}
283 | 
284 | 	var ntf map[string]reflect.StructField
285 | 	if typ != nil {
286 | 		if typ.Kind() != reflect.Struct {
287 | 			return nil, fmt.Errorf("type for a record must be struct, not %s", typ.Kind())
288 | 		}
289 | 
290 | 		// Build a name to field map
291 | 		ntf = make(map[string]reflect.StructField, typ.NumField())
292 | 		for i := range typ.NumField() {
293 | 			sf := typ.Field(i)
294 | 			name := nameForField(sf)
295 | 			if name == "-" {
296 | 				continue
297 | 			}
298 | 
299 | 			ntf[name] = sf
300 | 		}
301 | 	}
302 | 
303 | 	var rc recordCodec
304 | 	rc.rtype = typ
305 | 
306 | 	// The schema is in the driving-seat here
307 | 	for _, schemaf := range schema.Object.Fields {
308 | 		offset := uintptr(math.MaxUint64)
309 | 		var fieldType reflect.Type
310 | 		sf, ok := ntf[schemaf.Name]
311 | 		if ok {
312 | 			offset = sf.Offset
313 | 			fieldType = sf.Type
314 | 		}
315 | 
316 | 		codec, err := buildCodec(schemaf.Type, fieldType, omitEmpty(sf))
317 | 		if err != nil {
318 | 			return nil, fmt.Errorf("failed to get codec for field %q: %w", schemaf.Name, err)
319 | 		}
320 | 
321 | 		rc.fields = append(rc.fields, recordCodecField{
322 | 			codec:  codec,
323 | 			offset: offset,
324 | 			name:   schemaf.Name,
325 | 		})
326 | 	}
327 | 
328 | 	return &rc, nil
329 | }
330 | 


--------------------------------------------------------------------------------
/build_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestBuildCodec(t *testing.T) {
  9 | 	t.Parallel()
 10 | 
 11 | 	type some struct {
 12 | 		I []int32
 13 | 	}
 14 | 
 15 | 	type all struct {
 16 | 		A bool
 17 | 		B int32
 18 | 		C int64
 19 | 		D float32
 20 | 		E float64
 21 | 		F []byte
 22 | 		G string
 23 | 		H some
 24 | 		J map[string]int
 25 | 		K [4]byte
 26 | 		L int16
 27 | 	}
 28 | 
 29 | 	allSchema := Schema{
 30 | 		Type: "record",
 31 | 		Object: &SchemaObject{
 32 | 			Fields: []SchemaRecordField{
 33 | 				{
 34 | 					Name: "A",
 35 | 					Type: Schema{Type: "boolean"},
 36 | 				},
 37 | 				{
 38 | 					Name: "B",
 39 | 					Type: Schema{Type: "int"},
 40 | 				},
 41 | 				{
 42 | 					Name: "C",
 43 | 					Type: Schema{Type: "long"},
 44 | 				},
 45 | 				{
 46 | 					Name: "D",
 47 | 					Type: Schema{Type: "float"},
 48 | 				},
 49 | 				{
 50 | 					Name: "E",
 51 | 					Type: Schema{Type: "double"},
 52 | 				},
 53 | 				{
 54 | 					Name: "F",
 55 | 					Type: Schema{Type: "bytes"},
 56 | 				},
 57 | 				{
 58 | 					Name: "G",
 59 | 					Type: Schema{Type: "string"},
 60 | 				},
 61 | 				{
 62 | 					Name: "H",
 63 | 					Type: Schema{
 64 | 						Type: "record",
 65 | 						Object: &SchemaObject{
 66 | 							Name: "some",
 67 | 							Fields: []SchemaRecordField{
 68 | 								{
 69 | 									Name: "I",
 70 | 									Type: Schema{
 71 | 										Type: "array",
 72 | 										Object: &SchemaObject{
 73 | 											Items: Schema{Type: "int"},
 74 | 										},
 75 | 									},
 76 | 								},
 77 | 							},
 78 | 						},
 79 | 					},
 80 | 				},
 81 | 				{
 82 | 					Name: "J",
 83 | 					Type: Schema{
 84 | 						Type: "map",
 85 | 						Object: &SchemaObject{
 86 | 							Values: Schema{Type: "long"},
 87 | 						},
 88 | 					},
 89 | 				},
 90 | 				{
 91 | 					Name: "K",
 92 | 					Type: Schema{
 93 | 						Type: "fixed",
 94 | 						Object: &SchemaObject{
 95 | 							Size: 4,
 96 | 						},
 97 | 					},
 98 | 				},
 99 | 				{
100 | 					Name: "L",
101 | 					Type: Schema{
102 | 						Type: "long",
103 | 					},
104 | 				},
105 | 			},
106 | 		},
107 | 	}
108 | 
109 | 	c, err := buildCodec(allSchema, reflect.TypeFor[all](), false)
110 | 	if err != nil {
111 | 		t.Fatal(err)
112 | 	}
113 | 
114 | 	_ = c
115 | }
116 | 


--------------------------------------------------------------------------------
/buildschema.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"strings"
  7 | 	"sync"
  8 | )
  9 | 
 10 | var (
 11 | 	schemaRegistryMutex sync.RWMutex
 12 | 	schemaRegistry      = make(map[reflect.Type]Schema)
 13 | )
 14 | 
 15 | // Call RegisterSchema to indicate what schema should be used for a given type.
 16 | // Use this to register the schema to use for a type for which you write a
 17 | // custom codec.
 18 | func RegisterSchema(typ reflect.Type, s Schema) {
 19 | 	schemaRegistryMutex.Lock()
 20 | 	defer schemaRegistryMutex.Unlock()
 21 | 	schemaRegistry[typ] = s
 22 | }
 23 | 
 24 | // SchemaForType returns a Schema for the given type. It aims to produce a
 25 | // Schema that's compatible with BigQuery.
 26 | func SchemaForType(item any) (Schema, error) {
 27 | 	typ := reflect.TypeOf(item)
 28 | 	if typ.Kind() == reflect.Ptr {
 29 | 		typ = typ.Elem()
 30 | 	}
 31 | 	if typ.Kind() != reflect.Struct {
 32 | 		return Schema{}, fmt.Errorf("item must be a struct or pointer to a struct")
 33 | 	}
 34 | 
 35 | 	return schemaForType(typ)
 36 | }
 37 | 
 38 | func isInSchemaRegistry(typ reflect.Type) (Schema, bool) {
 39 | 	schemaRegistryMutex.RLock()
 40 | 	defer schemaRegistryMutex.RUnlock()
 41 | 	s, ok := schemaRegistry[typ]
 42 | 	return s, ok
 43 | }
 44 | 
 45 | func schemaForType(typ reflect.Type) (Schema, error) {
 46 | 	if s, ok := isInSchemaRegistry(typ); ok {
 47 | 		return s, nil
 48 | 	}
 49 | 
 50 | 	// BigQuery makes every basic type nullable. We'll send null for the zero
 51 | 	// value if there's an "omitempty" tag.
 52 | 	switch typ.Kind() {
 53 | 	case reflect.Bool:
 54 | 		return Schema{Type: "boolean"}, nil
 55 | 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
 56 | 		return Schema{Type: "long"}, nil
 57 | 	case reflect.Float32, reflect.Float64:
 58 | 		return Schema{Type: "double"}, nil
 59 | 	case reflect.String:
 60 | 		return Schema{Type: "string"}, nil
 61 | 	case reflect.Struct:
 62 | 		return schemaForStruct(typ)
 63 | 	case reflect.Array, reflect.Slice:
 64 | 		return schemaForArray(typ)
 65 | 	case reflect.Map:
 66 | 		return schemaForMap(typ)
 67 | 	case reflect.Pointer:
 68 | 		// If this is a pointer to a basic type then we don't need to wrap in a union as all the basic types are nullable.
 69 | 		underlying, err := schemaForType(typ.Elem())
 70 | 		if err != nil {
 71 | 			return Schema{}, fmt.Errorf("getting underlying schema for pointer: %w", err)
 72 | 		}
 73 | 		if underlying.Type == "union" || underlying.Type == "array" || underlying.Type == "map" {
 74 | 			return underlying, nil
 75 | 		}
 76 | 		return nullableSchema(underlying), nil
 77 | 	default:
 78 | 		return Schema{}, fmt.Errorf("type %s not supported", typ)
 79 | 	}
 80 | }
 81 | 
 82 | func nullableSchema(s Schema) Schema {
 83 | 	return Schema{
 84 | 		Type: "union",
 85 | 		Union: []Schema{
 86 | 			{Type: "null"},
 87 | 			s,
 88 | 		},
 89 | 	}
 90 | }
 91 | 
 92 | func schemaForStruct(typ reflect.Type) (Schema, error) {
 93 | 	fields := make([]SchemaRecordField, 0, typ.NumField())
 94 | 	for i := range typ.NumField() {
 95 | 		field := typ.Field(i)
 96 | 		name := nameForField(field)
 97 | 		if name == "-" {
 98 | 			continue
 99 | 		}
100 | 
101 | 		s, err := schemaForType(field.Type)
102 | 		if err != nil {
103 | 			return Schema{}, fmt.Errorf("getting schema for field %s: %w", name, err)
104 | 		}
105 | 
106 | 		if omitEmpty(field) && s.Type != "union" {
107 | 			s = nullableSchema(s)
108 | 		}
109 | 
110 | 		fields = append(fields, SchemaRecordField{
111 | 			Name: name,
112 | 			Type: s,
113 | 		})
114 | 	}
115 | 
116 | 	return Schema{
117 | 		Type: "record",
118 | 		Object: &SchemaObject{
119 | 			Name: typ.Name(),
120 | 			// namespace must be a valid Avro namespace, which is a
121 | 			// dot-separated alphanumeric string.
122 | 			Namespace: namespaceReplacer.Replace(typ.PkgPath()),
123 | 			Fields:    fields,
124 | 		},
125 | 	}, nil
126 | }
127 | 
128 | var namespaceReplacer = strings.NewReplacer("/", ".", "-", "_")
129 | 
130 | func schemaForArray(typ reflect.Type) (Schema, error) {
131 | 	elem := typ.Elem()
132 | 	if elem.Kind() == reflect.Uint8 {
133 | 		return Schema{
134 | 			Type: "bytes",
135 | 		}, nil
136 | 	}
137 | 
138 | 	s, err := schemaForType(elem)
139 | 	if err != nil {
140 | 		return Schema{}, fmt.Errorf("building array schema: %w", err)
141 | 	}
142 | 
143 | 	return Schema{
144 | 		Type: "array",
145 | 		Object: &SchemaObject{
146 | 			Items: s,
147 | 		},
148 | 	}, nil
149 | }
150 | 
151 | func schemaForMap(typ reflect.Type) (Schema, error) {
152 | 	s, err := schemaForType(typ.Elem())
153 | 	if err != nil {
154 | 		return Schema{}, err
155 | 	}
156 | 
157 | 	return Schema{
158 | 		Type: "map",
159 | 		Object: &SchemaObject{
160 | 			Values: s,
161 | 		},
162 | 	}, nil
163 | }
164 | 


--------------------------------------------------------------------------------
/buildschema_test.go:
--------------------------------------------------------------------------------
  1 | package avro_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/google/go-cmp/cmp"
  7 | 	"github.com/philpearl/avro"
  8 | )
  9 | 
 10 | func TestBuildSchema(t *testing.T) {
 11 | 	tests := []struct {
 12 | 		name string
 13 | 		in   any
 14 | 		exp  avro.Schema
 15 | 	}{
 16 | 		{
 17 | 			name: "int",
 18 | 			in: struct {
 19 | 				A int `json:"aaa"`
 20 | 			}{},
 21 | 			exp: avro.Schema{
 22 | 				Type: "record",
 23 | 				Object: &avro.SchemaObject{
 24 | 					Fields: []avro.SchemaRecordField{
 25 | 						{
 26 | 							Name: "aaa",
 27 | 							Type: avro.Schema{
 28 | 								Type: "long",
 29 | 							},
 30 | 						},
 31 | 					},
 32 | 				},
 33 | 			},
 34 | 		},
 35 | 		{
 36 | 			name: "int omitempty",
 37 | 			in: struct {
 38 | 				A int `json:"aaa,omitempty"`
 39 | 			}{},
 40 | 			exp: avro.Schema{
 41 | 				Type: "record",
 42 | 				Object: &avro.SchemaObject{
 43 | 					Fields: []avro.SchemaRecordField{
 44 | 						{
 45 | 							Name: "aaa",
 46 | 							Type: avro.Schema{
 47 | 								Type: "union",
 48 | 								Union: []avro.Schema{
 49 | 									{Type: "null"},
 50 | 									{Type: "long"},
 51 | 								},
 52 | 							},
 53 | 						},
 54 | 					},
 55 | 				},
 56 | 			},
 57 | 		},
 58 | 
 59 | 		{
 60 | 			name: "int skip unexported",
 61 | 			in: struct {
 62 | 				A int `json:"aaa"`
 63 | 				b int
 64 | 			}{},
 65 | 			exp: avro.Schema{
 66 | 				Type: "record",
 67 | 				Object: &avro.SchemaObject{
 68 | 					Fields: []avro.SchemaRecordField{
 69 | 						{
 70 | 							Name: "aaa",
 71 | 							Type: avro.Schema{
 72 | 								Type: "long",
 73 | 							},
 74 | 						},
 75 | 					},
 76 | 				},
 77 | 			},
 78 | 		},
 79 | 		{
 80 | 			name: "int skip json",
 81 | 			in: struct {
 82 | 				A int `json:"aaa"`
 83 | 				B int `json:"-"`
 84 | 			}{},
 85 | 			exp: avro.Schema{
 86 | 				Type: "record",
 87 | 				Object: &avro.SchemaObject{
 88 | 					Fields: []avro.SchemaRecordField{
 89 | 						{
 90 | 							Name: "aaa",
 91 | 							Type: avro.Schema{
 92 | 								Type: "long",
 93 | 							},
 94 | 						},
 95 | 					},
 96 | 				},
 97 | 			},
 98 | 		},
 99 | 		{
100 | 			name: "int skip bq",
101 | 			in: struct {
102 | 				A int `json:"aaa"`
103 | 				B int `json:"bbb" bq:"-"`
104 | 			}{},
105 | 			exp: avro.Schema{
106 | 				Type: "record",
107 | 				Object: &avro.SchemaObject{
108 | 					Fields: []avro.SchemaRecordField{
109 | 						{
110 | 							Name: "aaa",
111 | 							Type: avro.Schema{
112 | 								Type: "long",
113 | 							},
114 | 						},
115 | 					},
116 | 				},
117 | 			},
118 | 		},
119 | 		{
120 | 			name: "bool",
121 | 			in: struct {
122 | 				A bool `json:"aaa"`
123 | 			}{},
124 | 			exp: avro.Schema{
125 | 				Type: "record",
126 | 				Object: &avro.SchemaObject{
127 | 					Fields: []avro.SchemaRecordField{
128 | 						{
129 | 							Name: "aaa",
130 | 							Type: avro.Schema{
131 | 								Type: "boolean",
132 | 							},
133 | 						},
134 | 					},
135 | 				},
136 | 			},
137 | 		},
138 | 		{
139 | 			name: "float32",
140 | 			in: struct {
141 | 				A float32 `json:"aaa"`
142 | 			}{},
143 | 			exp: avro.Schema{
144 | 				Type: "record",
145 | 				Object: &avro.SchemaObject{
146 | 					Fields: []avro.SchemaRecordField{
147 | 						{
148 | 							Name: "aaa",
149 | 							Type: avro.Schema{
150 | 								Type: "double",
151 | 							},
152 | 						},
153 | 					},
154 | 				},
155 | 			},
156 | 		},
157 | 		{
158 | 			name: "float64",
159 | 			in: struct {
160 | 				A float64 `json:"aaa"`
161 | 			}{},
162 | 			exp: avro.Schema{
163 | 				Type: "record",
164 | 				Object: &avro.SchemaObject{
165 | 					Fields: []avro.SchemaRecordField{
166 | 						{
167 | 							Name: "aaa",
168 | 							Type: avro.Schema{
169 | 								Type: "double",
170 | 							},
171 | 						},
172 | 					},
173 | 				},
174 | 			},
175 | 		},
176 | 		{
177 | 			name: "string",
178 | 			in: struct {
179 | 				A string `json:"aaa"`
180 | 			}{},
181 | 			exp: avro.Schema{
182 | 				Type: "record",
183 | 				Object: &avro.SchemaObject{
184 | 					Fields: []avro.SchemaRecordField{
185 | 						{
186 | 							Name: "aaa",
187 | 							Type: avro.Schema{
188 | 								Type: "string",
189 | 							},
190 | 						},
191 | 					},
192 | 				},
193 | 			},
194 | 		},
195 | 		{
196 | 			name: "bytes",
197 | 			in: struct {
198 | 				A []byte `json:"aaa"`
199 | 			}{},
200 | 			exp: avro.Schema{
201 | 				Type: "record",
202 | 				Object: &avro.SchemaObject{
203 | 					Fields: []avro.SchemaRecordField{
204 | 						{
205 | 							Name: "aaa",
206 | 							Type: avro.Schema{
207 | 								Type: "bytes",
208 | 							},
209 | 						},
210 | 					},
211 | 				},
212 | 			},
213 | 		},
214 | 		{
215 | 			name: "map",
216 | 			in: struct {
217 | 				A map[string]int `json:"aaa"`
218 | 			}{},
219 | 			exp: avro.Schema{
220 | 				Type: "record",
221 | 				Object: &avro.SchemaObject{
222 | 					Fields: []avro.SchemaRecordField{
223 | 						{
224 | 							Name: "aaa",
225 | 							Type: avro.Schema{
226 | 								Type: "map",
227 | 								Object: &avro.SchemaObject{
228 | 									Values: avro.Schema{
229 | 										Type: "long",
230 | 									},
231 | 								},
232 | 							},
233 | 						},
234 | 					},
235 | 				},
236 | 			},
237 | 		},
238 | 		{
239 | 			name: "pointer to int",
240 | 			in: struct {
241 | 				A *int `json:"aaa"`
242 | 			}{},
243 | 			exp: avro.Schema{
244 | 				Type: "record",
245 | 				Object: &avro.SchemaObject{
246 | 					Fields: []avro.SchemaRecordField{
247 | 						{
248 | 							Name: "aaa",
249 | 							Type: avro.Schema{
250 | 								Type:  "union",
251 | 								Union: []avro.Schema{{Type: "null"}, {Type: "long"}},
252 | 							},
253 | 						},
254 | 					},
255 | 				},
256 | 			},
257 | 		},
258 | 		{
259 | 			name: "struct",
260 | 			in: struct {
261 | 				A struct {
262 | 					B int `json:"bbb"`
263 | 				} `json:"aaa"`
264 | 			}{},
265 | 			exp: avro.Schema{
266 | 				Type: "record",
267 | 				Object: &avro.SchemaObject{
268 | 					Fields: []avro.SchemaRecordField{
269 | 						{
270 | 							Name: "aaa",
271 | 							Type: avro.Schema{
272 | 								Type: "record",
273 | 								Object: &avro.SchemaObject{
274 | 									Fields: []avro.SchemaRecordField{
275 | 										{
276 | 											Name: "bbb",
277 | 											Type: avro.Schema{
278 | 												Type: "long",
279 | 											},
280 | 										},
281 | 									},
282 | 								},
283 | 							},
284 | 						},
285 | 					},
286 | 				},
287 | 			},
288 | 		},
289 | 		{
290 | 			name: "struct ptr",
291 | 			in: struct {
292 | 				A *struct {
293 | 					B int `json:"bbb"`
294 | 				} `json:"aaa"`
295 | 			}{},
296 | 			exp: avro.Schema{
297 | 				Type: "record",
298 | 				Object: &avro.SchemaObject{
299 | 					Fields: []avro.SchemaRecordField{
300 | 						{
301 | 							Name: "aaa",
302 | 							Type: avro.Schema{
303 | 								Type: "union",
304 | 								Union: []avro.Schema{
305 | 									{Type: "null"},
306 | 									{
307 | 										Type: "record",
308 | 										Object: &avro.SchemaObject{
309 | 											Fields: []avro.SchemaRecordField{
310 | 												{
311 | 													Name: "bbb",
312 | 													Type: avro.Schema{
313 | 														Type: "long",
314 | 													},
315 | 												},
316 | 											},
317 | 										},
318 | 									},
319 | 								},
320 | 							},
321 | 						},
322 | 					},
323 | 				},
324 | 			},
325 | 		},
326 | 
327 | 		{
328 | 			name: "struct slice",
329 | 			in: struct {
330 | 				A []struct {
331 | 					B int `json:"bbb"`
332 | 				} `json:"aaa"`
333 | 			}{},
334 | 			exp: avro.Schema{
335 | 				Type: "record",
336 | 				Object: &avro.SchemaObject{
337 | 					Fields: []avro.SchemaRecordField{
338 | 						{
339 | 							Name: "aaa",
340 | 							Type: avro.Schema{
341 | 								Type: "array",
342 | 								Object: &avro.SchemaObject{
343 | 									Items: avro.Schema{
344 | 										Type: "record",
345 | 										Object: &avro.SchemaObject{
346 | 											Fields: []avro.SchemaRecordField{
347 | 												{
348 | 													Name: "bbb",
349 | 													Type: avro.Schema{
350 | 														Type: "long",
351 | 													},
352 | 												},
353 | 											},
354 | 										},
355 | 									},
356 | 								},
357 | 							},
358 | 						},
359 | 					},
360 | 				},
361 | 			},
362 | 		},
363 | 	}
364 | 
365 | 	for _, tt := range tests {
366 | 		t.Run(tt.name, func(t *testing.T) {
367 | 			got, err := avro.SchemaForType(tt.in)
368 | 			if err != nil {
369 | 				t.Fatal(err)
370 | 			}
371 | 			if diff := cmp.Diff(tt.exp, got); diff != "" {
372 | 				t.Errorf("BuildSchema() mismatch (-want +got):\n%s", diff)
373 | 			}
374 | 		})
375 | 	}
376 | }
377 | 


--------------------------------------------------------------------------------
/bytes.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"reflect"
 6 | 	"unsafe"
 7 | )
 8 | 
 9 | type BytesCodec struct{ omitEmpty bool }
10 | 
11 | func (BytesCodec) Read(r *ReadBuf, ptr unsafe.Pointer) error {
12 | 	l, err := r.Varint()
13 | 	if err != nil {
14 | 		return fmt.Errorf("failed to read length of bytes. %w", err)
15 | 	}
16 | 	if l == 0 {
17 | 		return nil
18 | 	}
19 | 	data, err := r.NextAsBytes(int(l))
20 | 	if err != nil {
21 | 		return fmt.Errorf("failed to read %d bytes of bytes body. %w", l, err)
22 | 	}
23 | 	*(*[]byte)(ptr) = data
24 | 	return nil
25 | }
26 | 
27 | func (BytesCodec) Skip(r *ReadBuf) error {
28 | 	l, err := r.Varint()
29 | 	if err != nil {
30 | 		return fmt.Errorf("failed to read length of bytes. %w", err)
31 | 	}
32 | 	return skip(r, l)
33 | }
34 | 
35 | var bytesType = reflect.TypeFor[[]byte]()
36 | 
37 | func (BytesCodec) New(r *ReadBuf) unsafe.Pointer {
38 | 	return r.Alloc(bytesType)
39 | }
40 | 
41 | func (rc BytesCodec) Omit(p unsafe.Pointer) bool {
42 | 	return rc.omitEmpty && len(*(*[]byte)(p)) == 0
43 | }
44 | 
45 | func (rc BytesCodec) Write(w *WriteBuf, p unsafe.Pointer) {
46 | 	sh := *(*[]byte)(p)
47 | 
48 | 	w.Varint(int64(len(sh)))
49 | 	w.Write(sh)
50 | }
51 | 


--------------------------------------------------------------------------------
/bytes_test.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"unsafe"
 6 | 
 7 | 	"github.com/google/go-cmp/cmp"
 8 | 	"github.com/google/go-cmp/cmp/cmpopts"
 9 | )
10 | 
11 | func TestBytesCodec(t *testing.T) {
12 | 	tests := []struct {
13 | 		name string
14 | 		data []byte
15 | 		exp  []byte
16 | 	}{
17 | 		{
18 | 			name: "empty",
19 | 			data: []byte{0},
20 | 		},
21 | 		{
22 | 			name: "small", // 10 is 5
23 | 			data: []byte{10, 1, 2, 3, 4, 5},
24 | 			exp:  []byte{1, 2, 3, 4, 5},
25 | 		},
26 | 	}
27 | 	var c BytesCodec
28 | 	for _, test := range tests {
29 | 		test := test
30 | 		t.Run(test.name, func(t *testing.T) {
31 | 			t.Parallel()
32 | 			r := NewReadBuf(test.data)
33 | 			var actual []byte
34 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
35 | 				t.Fatal(err)
36 | 			}
37 | 
38 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
39 | 				t.Fatalf("result not as expected. %s", diff)
40 | 			}
41 | 			if r.Len() != 0 {
42 | 				t.Fatalf("unread data %d", r.Len())
43 | 			}
44 | 		})
45 | 		t.Run(test.name+" skip", func(t *testing.T) {
46 | 			t.Parallel()
47 | 			r := NewReadBuf(test.data)
48 | 			if err := c.Skip(r); err != nil {
49 | 				t.Fatal(err)
50 | 			}
51 | 			if r.Len() != 0 {
52 | 				t.Fatalf("unread data %d", r.Len())
53 | 			}
54 | 		})
55 | 
56 | 	}
57 | }
58 | 
59 | func TestBytesRoundTrip(t *testing.T) {
60 | 	tests := []struct {
61 | 		name string
62 | 		in   []byte
63 | 	}{
64 | 		{
65 | 			name: "empty",
66 | 			in:   []byte{},
67 | 		},
68 | 		{
69 | 			name: "zero",
70 | 			in:   []byte{0},
71 | 		},
72 | 
73 | 		{
74 | 			name: "hello",
75 | 			in:   []byte("hello"),
76 | 		},
77 | 	}
78 | 
79 | 	var c BytesCodec
80 | 	for _, test := range tests {
81 | 		t.Run(test.name, func(t *testing.T) {
82 | 			buf := NewWriteBuf(nil)
83 | 			c.Write(buf, unsafe.Pointer(&test.in))
84 | 
85 | 			var actual []byte
86 | 			if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil {
87 | 				t.Fatal(err)
88 | 			}
89 | 
90 | 			if diff := cmp.Diff(test.in, actual, cmpopts.EquateEmpty()); diff != "" {
91 | 				t.Fatalf("output not as expected. %s", diff)
92 | 			}
93 | 		})
94 | 	}
95 | }
96 | 


--------------------------------------------------------------------------------
/codec.go:
--------------------------------------------------------------------------------
 1 | // Package avro is an AVRO encoder and decoder aimed principly at decoding AVRO
 2 | // output from Google's Big Query. It encodes directly from Go structs and
 3 | // decodes directly into Go structs, and uses json tags as naming hints.
 4 | //
 5 | // The primary decoding interface is ReadFile. This reads an AVRO file,
 6 | // combining the schema in the file with type information from the struct passed
 7 | // via the out parameter to decode the records. It then passes an instance of a
 8 | // struct of type out to the callback cb for each record in the file.
 9 | //
10 | // Use an Encoder to write AVRO files. Create an Encoder using NewEncoderFor, then
11 | // call Encode to write a record, and finally call Flush before closing the file.
12 | //
13 | // You can implement custom decoders for your own types and register them via
14 | // the Register function. github.com/phil/avro/null is an example of custom
15 | // decoders for the types defined in github.com/unravelin/null
16 | package avro
17 | 
18 | import (
19 | 	"unsafe"
20 | )
21 | 
22 | // Codec defines an encoder / decoder for a type.
23 | // You can write custom Codecs for types. See Register and CodecBuildFunc
24 | type Codec interface {
25 | 	// Read reads the wire format bytes for the current field from r and sets up
26 | 	// the value that p points to. The codec can assume that the memory for an
27 | 	// instance of the type for which the codec is registered is present behind
28 | 	// p
29 | 	Read(r *ReadBuf, p unsafe.Pointer) error
30 | 	// Skip advances the reader over the bytes for the current field.
31 | 	Skip(r *ReadBuf) error
32 | 	// New creates a pointer to the type for which the codec is registered. It is
33 | 	// used if the enclosing record has a field that is a pointer to this type
34 | 	New(r *ReadBuf) unsafe.Pointer
35 | 
36 | 	// Omit returns true if the value that p points to should be omitted from the
37 | 	// output. This is used for optional fields in records.
38 | 	Omit(p unsafe.Pointer) bool
39 | 
40 | 	// Write writes the wire format bytes for the value that p points to to w.
41 | 	Write(w *WriteBuf, p unsafe.Pointer)
42 | }
43 | 


--------------------------------------------------------------------------------
/discard.go:
--------------------------------------------------------------------------------
1 | package avro
2 | 
3 | func skip(r *ReadBuf, l int64) error {
4 | 	_, err := r.Next(int(l))
5 | 	return err
6 | }
7 | 


--------------------------------------------------------------------------------
/encoder.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"reflect"
 7 | 	"unsafe"
 8 | )
 9 | 
10 | type Encoder[T any] struct {
11 | 	schema Schema
12 | 	codec  Codec
13 | 	fw     *FileWriter
14 | 	w      io.Writer
15 | 
16 | 	approxBlockSize int
17 | 	wb              *WriteBuf
18 | 	count           int
19 | }
20 | 
21 | // NewEncoderFor returns a new Encoder. Data will be written to w in Avro format,
22 | // including a schema header. The data will be compressed using the specified
23 | // compression algorithm. Data is written in blocks of at least approxBlockSize
24 | // bytes. A block is written when it reaches that size, or when Flush is called.
25 | func NewEncoderFor[T any](w io.Writer, compression Compression, approxBlockSize int) (*Encoder[T], error) {
26 | 	var t T
27 | 
28 | 	typ := reflect.TypeFor[T]()
29 | 	if typ.Kind() != reflect.Struct {
30 | 		return nil, fmt.Errorf("only structs are supported, got %v", typ)
31 | 	}
32 | 
33 | 	s, err := schemaForType(typ)
34 | 	if err != nil {
35 | 		return nil, fmt.Errorf("generating schema: %w", err)
36 | 	}
37 | 
38 | 	c, err := s.Codec(t)
39 | 	if err != nil {
40 | 		return nil, fmt.Errorf("generating codec: %w", err)
41 | 	}
42 | 
43 | 	schemaBytes, err := s.Marshal()
44 | 	if err != nil {
45 | 		return nil, fmt.Errorf("marshaling schema: %w", err)
46 | 	}
47 | 
48 | 	fw, err := NewFileWriter(schemaBytes, compression)
49 | 	if err != nil {
50 | 		return nil, fmt.Errorf("creating file writer: %w", err)
51 | 	}
52 | 
53 | 	if err := fw.WriteHeader(w); err != nil {
54 | 		return nil, fmt.Errorf("writing file header: %w", err)
55 | 	}
56 | 
57 | 	return &Encoder[T]{
58 | 		schema: s,
59 | 		codec:  c,
60 | 		fw:     fw,
61 | 		w:      w,
62 | 
63 | 		approxBlockSize: approxBlockSize,
64 | 		wb:              NewWriteBuf(make([]byte, 0, approxBlockSize)),
65 | 	}, nil
66 | }
67 | 
68 | // Encode writes a new row to the Avro file.
69 | func (e *Encoder[T]) Encode(v *T) error {
70 | 	e.codec.Write(e.wb, unsafe.Pointer(v))
71 | 	e.count++
72 | 
73 | 	if e.wb.Len() >= e.approxBlockSize {
74 | 		if err := e.Flush(); err != nil {
75 | 			return fmt.Errorf("flushing: %w", err)
76 | 		}
77 | 	}
78 | 
79 | 	return nil
80 | }
81 | 
82 | // Flush writes any buffered data to the underlying writer. It completes the
83 | // current block. It must be called before closing the underlying file.
84 | func (e *Encoder[T]) Flush() error {
85 | 	if e.count > 0 {
86 | 		if err := e.fw.WriteBlock(e.w, e.count, e.wb.Bytes()); err != nil {
87 | 			return fmt.Errorf("writing block: %w", err)
88 | 		}
89 | 		e.count = 0
90 | 		e.wb.Reset()
91 | 	}
92 | 	return nil
93 | }
94 | 


--------------------------------------------------------------------------------
/encoder_test.go:
--------------------------------------------------------------------------------
  1 | package avro_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | 
  8 | 	"github.com/google/go-cmp/cmp"
  9 | 	"github.com/google/go-cmp/cmp/cmpopts"
 10 | 	"github.com/philpearl/avro"
 11 | )
 12 | 
 13 | func TestEncoder(t *testing.T) {
 14 | 	type myStruct struct {
 15 | 		Name  string `json:"name"`
 16 | 		Hat   string `json:",omitempty"`
 17 | 		V     int
 18 | 		Q     float64
 19 | 		Bytes []byte
 20 | 		La    []int  `json:"la"`
 21 | 		W     int32  `json:"w,omitempty"`
 22 | 		Z     *int64 `json:"z"`
 23 | 		Mmm   map[string]string
 24 | 	}
 25 | 
 26 | 	buf := bytes.NewBuffer(nil)
 27 | 
 28 | 	enc, err := avro.NewEncoderFor[myStruct](buf, avro.CompressionSnappy, 10_000)
 29 | 	if err != nil {
 30 | 		t.Fatal(err)
 31 | 	}
 32 | 
 33 | 	contents := []myStruct{
 34 | 		{
 35 | 			Name:  "jim",
 36 | 			Hat:   "cat",
 37 | 			V:     31,
 38 | 			Q:     3.14,
 39 | 			Bytes: []byte{1, 2, 3, 4},
 40 | 			La:    []int{1, 2, 3, 4},
 41 | 			W:     0,
 42 | 			Z:     new(int64),
 43 | 			Mmm:   map[string]string{"foo": "bar", "baz": "qux"},
 44 | 		},
 45 | 		{
 46 | 			Name:  "jim",
 47 | 			Hat:   "cat",
 48 | 			V:     31,
 49 | 			Q:     3.14,
 50 | 			Bytes: []byte{1, 2, 3, 4},
 51 | 			La:    []int{1, 2, 3, 4},
 52 | 			W:     0,
 53 | 			Z:     nil,
 54 | 			Mmm:   map[string]string{"foo": "bar", "baz": "qux"},
 55 | 		},
 56 | 		{
 57 | 			Name:  "jim",
 58 | 			Hat:   "cat",
 59 | 			V:     31,
 60 | 			Q:     0,
 61 | 			Bytes: []byte{1, 2, 3, 4},
 62 | 			W:     0,
 63 | 			Z:     new(int64),
 64 | 			Mmm:   map[string]string{"foo": "bar", "baz": "qux"},
 65 | 		},
 66 | 
 67 | 		{
 68 | 			Name:  "jim",
 69 | 			Hat:   "cat",
 70 | 			V:     31,
 71 | 			Q:     0,
 72 | 			Bytes: []byte{1, 2, 3, 4},
 73 | 			W:     0,
 74 | 			Z:     new(int64),
 75 | 		},
 76 | 		{},
 77 | 	}
 78 | 
 79 | 	for i := range contents {
 80 | 		if err := enc.Encode(&contents[i]); err != nil {
 81 | 			t.Fatal(err)
 82 | 		}
 83 | 	}
 84 | 
 85 | 	if err := enc.Flush(); err != nil {
 86 | 		t.Fatal(err)
 87 | 	}
 88 | 
 89 | 	var actual []myStruct
 90 | 	if err := avro.ReadFile(buf, myStruct{}, func(val unsafe.Pointer, rb *avro.ResourceBank) error {
 91 | 		v := *(*myStruct)(val)
 92 | 		actual = append(actual, v)
 93 | 		return nil
 94 | 	}); err != nil {
 95 | 		t.Fatal(err)
 96 | 	}
 97 | 
 98 | 	if diff := cmp.Diff(contents, actual, cmpopts.EquateEmpty()); diff != "" {
 99 | 		t.Fatalf("result not as expected. %s", diff)
100 | 	}
101 | }
102 | 


--------------------------------------------------------------------------------
/file.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"compress/flate"
  7 | 	"encoding/binary"
  8 | 	"errors"
  9 | 	"fmt"
 10 | 	"hash/crc32"
 11 | 	"io"
 12 | 	"os"
 13 | 	"reflect"
 14 | 	"unsafe"
 15 | 
 16 | 	"github.com/go-json-experiment/json"
 17 | 	"github.com/golang/snappy"
 18 | )
 19 | 
 20 | // FileHeader represents an AVRO file header
 21 | type FileHeader struct {
 22 | 	Magic [4]byte           `json:"magic"`
 23 | 	Meta  map[string][]byte `json:"meta"`
 24 | 	Sync  [16]byte          `json:"sync"`
 25 | }
 26 | 
 27 | // FileMagic is the magic number for AVRO files.
 28 | var FileMagic = [4]byte{'O', 'b', 'j', 1}
 29 | 
 30 | // Note this isn't actually used except in one test of schema encoding.
 31 | var avroFileSchema = Schema{
 32 | 	Type: "record",
 33 | 	Object: &SchemaObject{
 34 | 		Name: "org.apache.avro.file.Header",
 35 | 		Fields: []SchemaRecordField{
 36 | 			{
 37 | 				Name: "magic",
 38 | 				Type: Schema{
 39 | 					Type: "fixed",
 40 | 					Object: &SchemaObject{
 41 | 						Name: "Magic",
 42 | 						Size: 4,
 43 | 					},
 44 | 				},
 45 | 			},
 46 | 			{
 47 | 				Name: "meta",
 48 | 				Type: Schema{
 49 | 					Type: "map",
 50 | 					Object: &SchemaObject{
 51 | 						Values: Schema{
 52 | 							Type: "bytes",
 53 | 						},
 54 | 					},
 55 | 				},
 56 | 			},
 57 | 			{
 58 | 				Name: "sync",
 59 | 				Type: Schema{
 60 | 					Type: "fixed",
 61 | 					Object: &SchemaObject{
 62 | 						Name: "Sync",
 63 | 						Size: 16,
 64 | 					},
 65 | 				},
 66 | 			},
 67 | 		},
 68 | 	},
 69 | }
 70 | 
 71 | // FileSchema reads the Schema from an AVRO file.
 72 | func FileSchema(filename string) (Schema, error) {
 73 | 	f, err := os.Open(filename)
 74 | 	if err != nil {
 75 | 		return Schema{}, fmt.Errorf("failed to open file: %w", err)
 76 | 	}
 77 | 	defer f.Close()
 78 | 
 79 | 	r := bufio.NewReader(f)
 80 | 
 81 | 	fh, err := readFileHeader(r)
 82 | 	if err != nil {
 83 | 		return Schema{}, fmt.Errorf("failed to read AVRO file header: %w", err)
 84 | 	}
 85 | 
 86 | 	return fh.schema()
 87 | }
 88 | 
 89 | // Reader combines io.ByteReader and io.Reader. It's what we need to read
 90 | type Reader interface {
 91 | 	io.Reader
 92 | 	io.ByteReader
 93 | }
 94 | 
 95 | // ReadFileFor is a type-safe version of ReadFile.
 96 | //
 97 | //	var records []myrecord
 98 | //	if err := avro.ReadFileFor(f, func(val *myrecord, rb *avro.ResourceBank) error {
 99 | //	    defer rb.Close()
100 | //	    records = append(records, *val)
101 | //	    return nil
102 | //	}); err != nil {
103 | //	       return err
104 | //	}
105 | 
106 | func ReadFileFor[T any](r Reader, cb func(val *T, rb *ResourceBank) error) error {
107 | 	var t T
108 | 	return ReadFile(r, t, func(val unsafe.Pointer, rb *ResourceBank) error {
109 | 		return cb((*T)(val), rb)
110 | 	})
111 | }
112 | 
113 | // ReadFile reads from an AVRO file. The records in the file are decoded into
114 | // structs of the type indicated by out. These are fed back to the application
115 | // via the cb callback. ReadFile calls cb with a pointer to the struct and a
116 | // ResourceBank. The pointer is converted to an unsafe.Pointer. The pointer
117 | // should not be retained by the application past the return of cb.
118 | //
119 | // The data that val points to is allocated in a ResourceBank. When the
120 | // ResourceBank is closed the memory backing val is available for re-use. The
121 | // application should ensure data kept after that point is copied (e.g. by
122 | // calling strings.Clone for strings).
123 | //
124 | //	var records []myrecord
125 | //	if err := avro.ReadFile(f, myrecord{}, func(val unsafe.Pointer, rb *avro.ResourceBank) error {
126 | //	    defer rb.Close()
127 | //	    records = append(records, *(*record)(val))
128 | //	    return nil
129 | //	}); err != nil {
130 | //	       return err
131 | //	}
132 | func ReadFile(r Reader, out any, cb func(val unsafe.Pointer, rb *ResourceBank) error) error {
133 | 	fh, err := readFileHeader(r)
134 | 	if err != nil {
135 | 		return err
136 | 	}
137 | 
138 | 	var decoder compressionCodec
139 | 	if compress, ok := fh.Meta["avro.codec"]; ok {
140 | 		switch string(compress) {
141 | 		case "null":
142 | 			decoder = nullCompression{}
143 | 		case "deflate":
144 | 			decoder = &deflate{}
145 | 		case "snappy":
146 | 			decoder = &snappyCodec{}
147 | 		default:
148 | 			return fmt.Errorf("compression codec %s not supported", string(compress))
149 | 		}
150 | 	}
151 | 
152 | 	schema, err := fh.schema()
153 | 	if err != nil {
154 | 		return err
155 | 	}
156 | 
157 | 	codec, err := schema.Codec(out)
158 | 	if err != nil {
159 | 		return fmt.Errorf("failed to build codec. %w", err)
160 | 	}
161 | 
162 | 	// At this point we know out is either a struct or a pointer to a struct.
163 | 	// We repeat some work from schema.Codec
164 | 	typ := reflect.TypeOf(out)
165 | 	var rtyp, p unsafe.Pointer
166 | 
167 | 	if typ.Kind() == reflect.Ptr {
168 | 		// Pointer to a struct is what we really want. We can write to this as
169 | 		// Go semantics would allow us to write to the underlying struct without
170 | 		// weird unsafe tricks
171 | 		typ = typ.Elem()
172 | 		rtyp = unpackEFace(typ).data
173 | 		p = unpackEFace(out).data
174 | 	} else {
175 | 		// We don't try to re-use the memory of the out variable. If Go passes a
176 | 		// value type in an interface it may use memory that it doesn't expect
177 | 		// to be changed. Writing to the memory of go value types that can't be
178 | 		// changed except via unsafe mechanisms is almost certainly dangerous!
179 | 		// See see https://philpearl.github.io/post/anathema/ for one case
180 | 		rtyp = unpackEFace(typ).data
181 | 		p = unsafe_New(rtyp)
182 | 	}
183 | 
184 | 	var compressed []byte
185 | 	br := &ReadBuf{}
186 | 	for {
187 | 		count, err := binary.ReadVarint(r)
188 | 		if err != nil {
189 | 			if errors.Is(err, io.EOF) {
190 | 				return nil
191 | 			}
192 | 			return fmt.Errorf("reading item count. %w", err)
193 | 		}
194 | 		dataLength, err := binary.ReadVarint(r)
195 | 		if err != nil {
196 | 			return fmt.Errorf("reading data block length. %w", err)
197 | 		}
198 | 		if cap(compressed) < int(dataLength) {
199 | 			compressed = make([]byte, dataLength)
200 | 		} else {
201 | 			compressed = compressed[:dataLength]
202 | 		}
203 | 		if n, err := io.ReadFull(r, compressed); err != nil {
204 | 			return fmt.Errorf("reading %d bytes of compressed data: %w after %d bytes", dataLength, err, n)
205 | 		}
206 | 		uncompressed, err := decoder.decompress(compressed)
207 | 		if err != nil {
208 | 			return fmt.Errorf("decompress failed: %w", err)
209 | 		}
210 | 
211 | 		br.Reset(uncompressed)
212 | 
213 | 		for i := range count {
214 | 			// TODO: might be better to allocate vals in blocks
215 | 			// Zero the data
216 | 			typedmemclr(rtyp, p)
217 | 			if err := codec.Read(br, p); err != nil {
218 | 				return fmt.Errorf("failed to read item %d in file. %w", i, err)
219 | 			}
220 | 
221 | 			if err := cb(p, br.ExtractResourceBank()); err != nil {
222 | 				return err
223 | 			}
224 | 		}
225 | 
226 | 		// Check the signature.
227 | 		var sig [16]byte
228 | 		if _, err := io.ReadFull(r, sig[:]); err != nil {
229 | 			return fmt.Errorf("failed reading block signature. %w", err)
230 | 		}
231 | 		if sig != fh.Sync {
232 | 			return fmt.Errorf("sync block does not match. Have %X, want %X", sig, fh.Sync)
233 | 		}
234 | 	}
235 | }
236 | 
237 | func readFileHeader(r Reader) (fh FileHeader, err error) {
238 | 	// It would kind of make sense to use our codecs to read the header, but for
239 | 	// perf reasons we don't want to use a normal reader there
240 | 	if _, err := io.ReadFull(r, fh.Magic[:]); err != nil {
241 | 		return fh, fmt.Errorf("failed to read file magic: %w", err)
242 | 	}
243 | 	if fh.Magic != FileMagic {
244 | 		return fh, fmt.Errorf("file header Magic is not correct")
245 | 	}
246 | 
247 | 	fh.Meta = make(map[string][]byte)
248 | 	// Seriously there's only going to be one block
249 | 	for {
250 | 		count, err := binary.ReadVarint(r)
251 | 		if err != nil {
252 | 			return fh, fmt.Errorf("failed to read count of map block. %w", err)
253 | 		}
254 | 		if count == 0 {
255 | 			break
256 | 		}
257 | 		if count < 0 {
258 | 			return fh, fmt.Errorf("negative block size not supported in file header")
259 | 		}
260 | 
261 | 		for ; count > 0; count-- {
262 | 			key, err := readBytes(r)
263 | 			if err != nil {
264 | 				return fh, fmt.Errorf("failed to read key for map. %w", err)
265 | 			}
266 | 
267 | 			val, err := readBytes(r)
268 | 			if err != nil {
269 | 				return fh, fmt.Errorf("failed to read value for map. %w", err)
270 | 			}
271 | 			// Put the thing in the thing
272 | 			fh.Meta[string(key)] = val
273 | 		}
274 | 	}
275 | 
276 | 	if _, err := io.ReadFull(r, fh.Sync[:]); err != nil {
277 | 		return fh, fmt.Errorf("failed to read file sync: %w", err)
278 | 	}
279 | 
280 | 	return fh, nil
281 | }
282 | 
283 | func readBytes(r Reader) ([]byte, error) {
284 | 	l, err := binary.ReadVarint(r)
285 | 	if err != nil {
286 | 		return nil, err
287 | 	}
288 | 	v := make([]byte, l)
289 | 	_, err = io.ReadFull(r, v)
290 | 	return v, err
291 | }
292 | 
293 | func (fh FileHeader) schema() (schema Schema, err error) {
294 | 	schemaJSON, ok := fh.Meta["avro.schema"]
295 | 	if !ok {
296 | 		return schema, fmt.Errorf("no schema found in file header")
297 | 	}
298 | 
299 | 	if err := json.Unmarshal(schemaJSON, &schema); err != nil {
300 | 		return schema, fmt.Errorf("could not decode schema JSON from file header. %w", err)
301 | 	}
302 | 
303 | 	return schema, nil
304 | }
305 | 
306 | type compressionCodec interface {
307 | 	decompress(compressed []byte) ([]byte, error)
308 | 	compress(uncompressed []byte) ([]byte, error)
309 | }
310 | 
311 | type nullCompression struct{}
312 | 
313 | func (nullCompression) decompress(compressed []byte) ([]byte, error) {
314 | 	return compressed, nil
315 | }
316 | 
317 | func (nullCompression) compress(uncompressed []byte) ([]byte, error) {
318 | 	return uncompressed, nil
319 | }
320 | 
321 | type deflate struct {
322 | 	reader io.Reader
323 | 	writer *flate.Writer
324 | 	buf    bytes.Reader
325 | 	out    bytes.Buffer
326 | }
327 | 
328 | func (d *deflate) decompress(compressed []byte) ([]byte, error) {
329 | 	d.buf.Reset(compressed)
330 | 	if d.reader == nil {
331 | 		d.reader = flate.NewReader(nil)
332 | 	}
333 | 	d.reader.(flate.Resetter).Reset(&d.buf, nil)
334 | 
335 | 	d.out.Reset()
336 | 	d.out.ReadFrom(d.reader)
337 | 
338 | 	return d.out.Bytes(), nil
339 | }
340 | 
341 | func (d *deflate) compress(uncompressed []byte) ([]byte, error) {
342 | 	d.out.Reset()
343 | 	if d.writer == nil {
344 | 		d.writer, _ = flate.NewWriter(&d.out, flate.DefaultCompression)
345 | 	}
346 | 	d.writer.Reset(&d.out)
347 | 	if _, err := d.writer.Write(uncompressed); err != nil {
348 | 		return nil, fmt.Errorf("writing to deflate compressor: %w", err)
349 | 	}
350 | 	if err := d.writer.Close(); err != nil {
351 | 		return nil, fmt.Errorf("flushing deflate compressor: %w", err)
352 | 	}
353 | 
354 | 	return d.out.Bytes(), nil
355 | }
356 | 
357 | type snappyCodec struct {
358 | 	buf []byte
359 | }
360 | 
361 | func (s *snappyCodec) decompress(compressed []byte) ([]byte, error) {
362 | 	var err error
363 | 	s.buf, err = snappy.Decode(s.buf[:cap(s.buf)], compressed[:len(compressed)-4])
364 | 	if err != nil {
365 | 		return nil, fmt.Errorf("snappy decode failed: %w", err)
366 | 	}
367 | 
368 | 	crc := binary.BigEndian.Uint32(compressed[len(compressed)-4:])
369 | 	if crc32.ChecksumIEEE(s.buf) != crc {
370 | 		return nil, errors.New("snappy checksum mismatch")
371 | 	}
372 | 
373 | 	return s.buf, nil
374 | }
375 | 
376 | func (s *snappyCodec) compress(uncompressed []byte) ([]byte, error) {
377 | 	s.buf = snappy.Encode(s.buf[:cap(s.buf)], uncompressed)
378 | 	crc := crc32.ChecksumIEEE(uncompressed)
379 | 	s.buf = binary.BigEndian.AppendUint32(s.buf, crc)
380 | 
381 | 	return s.buf, nil
382 | }
383 | 


--------------------------------------------------------------------------------
/file_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"os"
  6 | 	"testing"
  7 | 	"unsafe"
  8 | 
  9 | 	"github.com/google/go-cmp/cmp"
 10 | )
 11 | 
 12 | func TestReadFile(t *testing.T) {
 13 | 	f, err := os.Open("./testdata/avro1")
 14 | 	if err != nil {
 15 | 		t.Fatal(err)
 16 | 	}
 17 | 	defer f.Close()
 18 | 
 19 | 	type obj struct {
 20 | 		Typ  string  `json:"typ,omitempty"`
 21 | 		Size float64 `json:"size,omitempty"`
 22 | 	}
 23 | 	type entry struct {
 24 | 		Name   string `json:"name,omitempty"`
 25 | 		Number int64  `json:"number"`
 26 | 		Owns   []obj  `json:"owns,omitempty"`
 27 | 	}
 28 | 
 29 | 	var actual []entry
 30 | 	if err := ReadFileFor(bufio.NewReader(f), func(val *entry, sb *ResourceBank) error {
 31 | 		actual = append(actual, *val)
 32 | 		return nil
 33 | 	}); err != nil {
 34 | 		t.Fatal(err)
 35 | 	}
 36 | 
 37 | 	exp := []entry{
 38 | 		{
 39 | 			Name:   "jim",
 40 | 			Number: 1,
 41 | 			Owns: []obj{
 42 | 				{
 43 | 					Typ:  "hat",
 44 | 					Size: 1,
 45 | 				},
 46 | 				{
 47 | 					Typ:  "shoe",
 48 | 					Size: 42,
 49 | 				},
 50 | 			},
 51 | 		},
 52 | 		{
 53 | 			Name:   "fred",
 54 | 			Number: 1,
 55 | 			Owns: []obj{
 56 | 				{
 57 | 					Typ:  "bag",
 58 | 					Size: 3.7,
 59 | 				},
 60 | 			},
 61 | 		},
 62 | 	}
 63 | 
 64 | 	if diff := cmp.Diff(exp, actual); diff != "" {
 65 | 		t.Fatalf("result differs. %s", diff)
 66 | 	}
 67 | }
 68 | 
 69 | func TestReadFileAlt(t *testing.T) {
 70 | 	f, err := os.Open("./testdata/avro1")
 71 | 	if err != nil {
 72 | 		t.Fatal(err)
 73 | 	}
 74 | 	defer f.Close()
 75 | 
 76 | 	type obj struct {
 77 | 		Typ  string   `json:"typ,omitempty"`
 78 | 		Size *float32 `json:"size,omitempty"`
 79 | 	}
 80 | 	type entry struct {
 81 | 		Name   *string `json:"name,omitempty"`
 82 | 		Number **int32 `json:"number"`
 83 | 		Owns   *[]*obj `json:"owns,omitempty"`
 84 | 	}
 85 | 
 86 | 	var actual []entry
 87 | 	var sbs []*ResourceBank
 88 | 	if err := ReadFile(bufio.NewReader(f), &entry{}, func(val unsafe.Pointer, sb *ResourceBank) error {
 89 | 		actual = append(actual, *(*entry)(val))
 90 | 		sbs = append(sbs, sb)
 91 | 		return nil
 92 | 	}); err != nil {
 93 | 		t.Fatal(err)
 94 | 	}
 95 | 
 96 | 	strptr := func(v string) *string {
 97 | 		return &v
 98 | 	}
 99 | 	floatptr := func(v float32) *float32 {
100 | 		return &v
101 | 	}
102 | 	var one int32 = 1
103 | 	oneptr := &one
104 | 
105 | 	exp := []entry{
106 | 		{
107 | 			Name:   strptr("jim"),
108 | 			Number: &oneptr,
109 | 			Owns: &[]*obj{
110 | 				{
111 | 					Typ:  "hat",
112 | 					Size: floatptr(1),
113 | 				},
114 | 				{
115 | 					Typ:  "shoe",
116 | 					Size: floatptr(42),
117 | 				},
118 | 			},
119 | 		},
120 | 		{
121 | 			Name:   strptr("fred"),
122 | 			Number: &oneptr,
123 | 			Owns: &[]*obj{
124 | 				{
125 | 					Typ:  "bag",
126 | 					Size: floatptr(3.7),
127 | 				},
128 | 			},
129 | 		},
130 | 	}
131 | 
132 | 	if diff := cmp.Diff(exp, actual); diff != "" {
133 | 		t.Fatalf("result differs. %s", diff)
134 | 	}
135 | 	for _, sb := range sbs {
136 | 		sb.Close()
137 | 	}
138 | }
139 | 
140 | func TestFileSchema(t *testing.T) {
141 | 	schema, err := FileSchema("./testdata/avro1")
142 | 	if err != nil {
143 | 		t.Fatal(err)
144 | 	}
145 | 	if diff := cmp.Diff(Schema{
146 | 		Type: "record",
147 | 		Object: &SchemaObject{
148 | 			Name: "Root",
149 | 			Fields: []SchemaRecordField{
150 | 				{
151 | 					Name: "name",
152 | 					Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "string"}}},
153 | 				},
154 | 				{
155 | 					Name: "number",
156 | 					Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "long"}}},
157 | 				},
158 | 				{
159 | 					Name: "owns",
160 | 					Type: Schema{
161 | 						Type: "array",
162 | 						Object: &SchemaObject{
163 | 							Items: Schema{
164 | 								Type: "record",
165 | 								Object: &SchemaObject{
166 | 									Name:      "Owns",
167 | 									Namespace: "root",
168 | 									Fields: []SchemaRecordField{
169 | 										{
170 | 											Name: "typ",
171 | 											Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "string"}}},
172 | 										},
173 | 										{
174 | 											Name: "size",
175 | 											Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "double"}}},
176 | 										},
177 | 									},
178 | 								},
179 | 							},
180 | 						},
181 | 					},
182 | 				},
183 | 			},
184 | 		},
185 | 	}, schema); diff != "" {
186 | 		t.Fatalf("not as expected: %s", diff)
187 | 	}
188 | }
189 | 


--------------------------------------------------------------------------------
/filewriter.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"crypto/rand"
  5 | 	"encoding/binary"
  6 | 	"fmt"
  7 | 	"io"
  8 | )
  9 | 
 10 | type Compression string
 11 | 
 12 | const (
 13 | 	CompressionNull    Compression = "null"
 14 | 	CompressionDeflate Compression = "deflate"
 15 | 	CompressionSnappy  Compression = "snappy"
 16 | )
 17 | 
 18 | // FileWriter provides limited support for writing AVRO files. It allows you to
 19 | // write blocks of already encoded data. Actually encoding data as AVRO is supported
 20 | // by the Encoder type.
 21 | type FileWriter struct {
 22 | 	sync [16]byte
 23 | 	// It may make sense for the schema to be a Schema object. But we won't use
 24 | 	// that until we have encoding support.
 25 | 	schema      []byte
 26 | 	compression Compression
 27 | 	varintBuf   [binary.MaxVarintLen64]byte
 28 | 	compressor  compressionCodec
 29 | }
 30 | 
 31 | // NewFileWriter creates a new FileWriter. The schema is the JSON encoded
 32 | // schema. The compression parameter indicates the compression codec to use.
 33 | func NewFileWriter(schema []byte, compression Compression) (*FileWriter, error) {
 34 | 	// Generate a random sync value
 35 | 	f := &FileWriter{
 36 | 		schema:      schema,
 37 | 		compression: compression,
 38 | 	}
 39 | 	_, err := rand.Read(f.sync[:])
 40 | 	if err != nil {
 41 | 		return nil, fmt.Errorf("creating sync value: %w", err)
 42 | 	}
 43 | 
 44 | 	switch compression {
 45 | 	case CompressionNull:
 46 | 		f.compressor = nullCompression{}
 47 | 	case CompressionDeflate:
 48 | 		f.compressor = &deflate{}
 49 | 	case CompressionSnappy:
 50 | 		f.compressor = &snappyCodec{}
 51 | 	default:
 52 | 		return nil, fmt.Errorf("compression codec %s not supported", compression)
 53 | 	}
 54 | 
 55 | 	return f, nil
 56 | }
 57 | 
 58 | // WriteHeader writes the AVRO file header to the writer.
 59 | func (f *FileWriter) WriteHeader(w io.Writer) error {
 60 | 	buf := make([]byte, 0, 1024)
 61 | 	buf = f.AppendHeader(buf)
 62 | 	_, err := w.Write(buf)
 63 | 	return err
 64 | }
 65 | 
 66 | // AppendHeader appends the AVRO file header to the provided buffer.
 67 | func (f *FileWriter) AppendHeader(buf []byte) []byte {
 68 | 	// Write the magic bytes
 69 | 	buf = append(buf, FileMagic[:]...)
 70 | 
 71 | 	// Count of how many metadata blocks there are.
 72 | 	buf = binary.AppendVarint(buf, 2)
 73 | 
 74 | 	// Write the metadata block. There will be an entry for the compression type
 75 | 	// and an entry for the schema. Each entry is a string key followed by a
 76 | 	// string value. Strings are written as a varint encoded length and then the
 77 | 	// bytes of the string.
 78 | 	buf = appendString(buf, "avro.schema")
 79 | 	buf = appendString(buf, f.schema)
 80 | 	buf = appendString(buf, "avro.codec")
 81 | 	buf = appendString(buf, f.compression)
 82 | 
 83 | 	// Append a zero count to indicate no more header blocks.
 84 | 	buf = binary.AppendVarint(buf, 0)
 85 | 
 86 | 	// Write the sync bytes. This is just the 16 bytes of the sync field.
 87 | 	buf = append(buf, f.sync[:]...)
 88 | 	return buf
 89 | }
 90 | 
 91 | type appendable interface {
 92 | 	~string | ~[]byte
 93 | }
 94 | 
 95 | func appendString[T appendable](buf []byte, s T) []byte {
 96 | 	buf = binary.AppendVarint(buf, int64(len(s)))
 97 | 	buf = append(buf, s...)
 98 | 	return buf
 99 | }
100 | 
101 | func (f *FileWriter) writeVarInt(w io.Writer, v int) error {
102 | 	n := binary.PutVarint(f.varintBuf[:], int64(v))
103 | 	_, err := w.Write(f.varintBuf[:n])
104 | 	return err
105 | }
106 | 
107 | // WriteBlock writes a block of data to the writer. The block must be rowCount
108 | // rows of AVRO encoded data.
109 | func (f *FileWriter) WriteBlock(w io.Writer, rowCount int, block []byte) error {
110 | 	// Write the count of rows in the block
111 | 	if err := f.writeVarInt(w, rowCount); err != nil {
112 | 		return fmt.Errorf("writing row count: %w", err)
113 | 	}
114 | 
115 | 	compressed, err := f.compressor.compress(block)
116 | 	if err != nil {
117 | 		return fmt.Errorf("compressing block: %w", err)
118 | 	}
119 | 
120 | 	// Write the (compressed) block size
121 | 	if err := f.writeVarInt(w, len(compressed)); err != nil {
122 | 		return fmt.Errorf("writing block len: %w", err)
123 | 	}
124 | 
125 | 	// Write the block data.
126 | 	if _, err := w.Write(compressed); err != nil {
127 | 		return fmt.Errorf("writing block: %w", err)
128 | 	}
129 | 
130 | 	// Write the sync block
131 | 	if _, err := w.Write(f.sync[:]); err != nil {
132 | 		return fmt.Errorf("writing sync: %w", err)
133 | 	}
134 | 	return nil
135 | }
136 | 


--------------------------------------------------------------------------------
/filewriter_test.go:
--------------------------------------------------------------------------------
  1 | package avro_test
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"testing"
  8 | 	"unsafe"
  9 | 
 10 | 	"github.com/google/go-cmp/cmp"
 11 | 	"github.com/philpearl/avro"
 12 | )
 13 | 
 14 | func TestWritingFile(t *testing.T) {
 15 | 	type record struct {
 16 | 		Name string `json:"name"`
 17 | 		Hat  string `json:"hat"`
 18 | 	}
 19 | 
 20 | 	schema := avro.Schema{
 21 | 		Type: "record",
 22 | 		Object: &avro.SchemaObject{
 23 | 			Name: "Record",
 24 | 			Fields: []avro.SchemaRecordField{
 25 | 				{
 26 | 					Name: "name",
 27 | 					Type: avro.Schema{
 28 | 						Type: "string",
 29 | 					},
 30 | 				},
 31 | 				{
 32 | 					Name: "hat",
 33 | 					Type: avro.Schema{
 34 | 						Type: "string",
 35 | 					},
 36 | 				},
 37 | 			},
 38 | 		},
 39 | 	}
 40 | 
 41 | 	schemaJSON, err := schema.Marshal()
 42 | 	if err != nil {
 43 | 		t.Fatal(err)
 44 | 	}
 45 | 
 46 | 	data := []byte{
 47 | 		6, 'j', 'i', 'm',
 48 | 		6, 'c', 'a', 't',
 49 | 
 50 | 		6, 's', 'i', 'm',
 51 | 		6, 'h', 'a', 't',
 52 | 	}
 53 | 
 54 | 	for _, compression := range []avro.Compression{avro.CompressionDeflate, avro.CompressionSnappy} {
 55 | 		t.Run(string(compression), func(t *testing.T) {
 56 | 			dir := t.TempDir()
 57 | 			filename := filepath.Join(dir, "test.avro")
 58 | 
 59 | 			f, err := os.Create(filename)
 60 | 			if err != nil {
 61 | 				t.Fatal(err)
 62 | 			}
 63 | 			defer f.Close()
 64 | 
 65 | 			fw, err := avro.NewFileWriter(schemaJSON, compression)
 66 | 			if err != nil {
 67 | 				t.Fatal(err)
 68 | 			}
 69 | 
 70 | 			if err := fw.WriteHeader(f); err != nil {
 71 | 				t.Fatal(err)
 72 | 			}
 73 | 
 74 | 			if err := fw.WriteBlock(f, 2, data); err != nil {
 75 | 				t.Fatal(err)
 76 | 			}
 77 | 
 78 | 			if err := f.Close(); err != nil {
 79 | 				t.Fatal(err)
 80 | 			}
 81 | 
 82 | 			r, err := os.Open(filename)
 83 | 			if err != nil {
 84 | 				t.Fatal(err)
 85 | 			}
 86 | 			defer r.Close()
 87 | 
 88 | 			var records []record
 89 | 
 90 | 			if err := avro.ReadFile(bufio.NewReader(r), record{}, func(val unsafe.Pointer, rb *avro.ResourceBank) error {
 91 | 				r := (*record)(val)
 92 | 				t.Logf("read record: %+v", r)
 93 | 				records = append(records, *r)
 94 | 				return nil
 95 | 			}); err != nil {
 96 | 				t.Fatal(err)
 97 | 			}
 98 | 
 99 | 			if diff := cmp.Diff([]record{
100 | 				{Name: "jim", Hat: "cat"},
101 | 				{Name: "sim", Hat: "hat"},
102 | 			}, records); diff != "" {
103 | 				t.Fatal(diff)
104 | 			}
105 | 		})
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/fixed.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | type fixedCodec struct {
 9 | 	Size int
10 | }
11 | 
12 | type sliceHeader struct {
13 | 	Data unsafe.Pointer
14 | 	Len  int
15 | 	Cap  int
16 | }
17 | 
18 | func (f fixedCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
19 | 	// p points to an array of size f.Size
20 | 	sh := unsafe.Slice((*byte)(p), f.Size)
21 | 	data, err := r.Next(f.Size)
22 | 	copy(sh, data)
23 | 	return err
24 | }
25 | 
26 | func (f fixedCodec) Skip(r *ReadBuf) error {
27 | 	return skip(r, int64(f.Size))
28 | }
29 | 
30 | func (f fixedCodec) New(r *ReadBuf) unsafe.Pointer {
31 | 	return r.AllocArray(reflect.TypeFor[byte](), f.Size)
32 | }
33 | 
34 | func (rc fixedCodec) Omit(p unsafe.Pointer) bool {
35 | 	return false
36 | }
37 | 
38 | func (rc fixedCodec) Write(w *WriteBuf, p unsafe.Pointer) {
39 | 	sh := unsafe.Slice((*byte)(p), rc.Size)
40 | 	w.Write(sh)
41 | }
42 | 


--------------------------------------------------------------------------------
/fixed_test.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"unsafe"
 6 | 
 7 | 	"github.com/google/go-cmp/cmp"
 8 | )
 9 | 
10 | func TestFixed(t *testing.T) {
11 | 	tests := []struct {
12 | 		name string
13 | 		data []byte
14 | 		exp  [3]byte
15 | 	}{
16 | 		{
17 | 			name: "basic",
18 | 			data: []byte{1, 2, 3},
19 | 			exp:  [3]byte{1, 2, 3},
20 | 		},
21 | 	}
22 | 
23 | 	for _, test := range tests {
24 | 		test := test
25 | 		t.Run(test.name, func(t *testing.T) {
26 | 			t.Parallel()
27 | 			c := fixedCodec{Size: 3}
28 | 			b := NewReadBuf(test.data)
29 | 			var actual [3]byte
30 | 			if err := c.Read(b, unsafe.Pointer(&actual)); err != nil {
31 | 				t.Fatal(err)
32 | 			}
33 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
34 | 				t.Fatalf("result differs: %s", diff)
35 | 			}
36 | 			if b.Len() != 0 {
37 | 				t.Fatalf("Not all data read: %d", b.Len())
38 | 			}
39 | 		})
40 | 		t.Run(test.name+" skip", func(t *testing.T) {
41 | 			t.Parallel()
42 | 			c := fixedCodec{Size: 3}
43 | 			b := NewReadBuf(test.data)
44 | 			if err := c.Skip(b); err != nil {
45 | 				t.Fatal(err)
46 | 			}
47 | 			if b.Len() != 0 {
48 | 				t.Fatalf("Not all data read: %d", b.Len())
49 | 			}
50 | 		})
51 | 
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/float.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"reflect"
 6 | 	"unsafe"
 7 | )
 8 | 
 9 | type floatCodec[t float32 | float64] struct{ omitEmpty bool }
10 | 
11 | func (floatCodec[T]) Read(r *ReadBuf, p unsafe.Pointer) error {
12 | 	// This works for little-endian only (or is it bigendian?)
13 | 	return fixedCodec{Size: int(unsafe.Sizeof(T(0)))}.Read(r, p)
14 | }
15 | 
16 | func (floatCodec[T]) Skip(r *ReadBuf) error {
17 | 	return skip(r, int64(unsafe.Sizeof(T(0))))
18 | }
19 | 
20 | var (
21 | 	floatType  = reflect.TypeFor[float32]()
22 | 	doubleType = reflect.TypeFor[float64]()
23 | )
24 | 
25 | func (floatCodec[T]) New(r *ReadBuf) unsafe.Pointer {
26 | 	switch unsafe.Sizeof(T(0)) {
27 | 	case 4:
28 | 		return r.Alloc(floatType)
29 | 	case 8:
30 | 		return r.Alloc(doubleType)
31 | 	}
32 | 	panic(fmt.Sprintf("unexpected float size %d", unsafe.Sizeof(T(0))))
33 | }
34 | 
35 | func (rc floatCodec[T]) Omit(p unsafe.Pointer) bool {
36 | 	return rc.omitEmpty && *(*T)(p) == 0
37 | }
38 | 
39 | func (rc floatCodec[T]) Write(w *WriteBuf, p unsafe.Pointer) {
40 | 	fixedCodec{Size: int(unsafe.Sizeof(T(0)))}.Write(w, p)
41 | }
42 | 
43 | type (
44 | 	FloatCodec  = floatCodec[float32]
45 | 	DoubleCodec = floatCodec[float64]
46 | )
47 | 
48 | type Float32DoubleCodec struct {
49 | 	DoubleCodec
50 | }
51 | 
52 | func (c Float32DoubleCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
53 | 	var f float64
54 | 	if err := c.DoubleCodec.Read(r, unsafe.Pointer(&f)); err != nil {
55 | 		return err
56 | 	}
57 | 	*(*float32)(p) = float32(f)
58 | 	return nil
59 | }
60 | 
61 | func (Float32DoubleCodec) New(r *ReadBuf) unsafe.Pointer {
62 | 	return r.Alloc(floatType)
63 | }
64 | 
65 | func (rc Float32DoubleCodec) Omit(p unsafe.Pointer) bool {
66 | 	return rc.omitEmpty && *(*float32)(p) == 0
67 | }
68 | 
69 | func (rc Float32DoubleCodec) Write(w *WriteBuf, p unsafe.Pointer) {
70 | 	q := float64(*(*float32)(p))
71 | 	fixedCodec{Size: 8}.Write(w, unsafe.Pointer(&q))
72 | }
73 | 


--------------------------------------------------------------------------------
/float_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | 
  8 | 	"github.com/google/go-cmp/cmp"
  9 | 	"github.com/google/go-cmp/cmp/cmpopts"
 10 | )
 11 | 
 12 | func TestFloatCodec(t *testing.T) {
 13 | 	tests := []struct {
 14 | 		name string
 15 | 		data []byte
 16 | 		exp  float32
 17 | 	}{
 18 | 		{
 19 | 			name: "zero",
 20 | 			data: []byte{0, 0, 0, 0},
 21 | 		},
 22 | 		{
 23 | 			name: "something",
 24 | 			data: []byte{0, 1, 0, 0},
 25 | 			exp:  3.587324068671532e-43,
 26 | 		},
 27 | 	}
 28 | 	var c FloatCodec
 29 | 	for _, test := range tests {
 30 | 		test := test
 31 | 		t.Run(test.name, func(t *testing.T) {
 32 | 			t.Parallel()
 33 | 			r := NewReadBuf(test.data)
 34 | 			var actual float32
 35 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
 36 | 				t.Fatal(err)
 37 | 			}
 38 | 
 39 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
 40 | 				t.Fatalf("result not as expected. %s", diff)
 41 | 			}
 42 | 			if r.Len() != 0 {
 43 | 				t.Fatalf("unread data %d", r.Len())
 44 | 			}
 45 | 		})
 46 | 		t.Run(test.name+" skip", func(t *testing.T) {
 47 | 			t.Parallel()
 48 | 			r := NewReadBuf(test.data)
 49 | 			if err := c.Skip(r); err != nil {
 50 | 				t.Fatal(err)
 51 | 			}
 52 | 			if r.Len() != 0 {
 53 | 				t.Fatalf("unread data %d", r.Len())
 54 | 			}
 55 | 		})
 56 | 	}
 57 | }
 58 | 
 59 | func TestDoubleCodec(t *testing.T) {
 60 | 	tests := []struct {
 61 | 		name string
 62 | 		data []byte
 63 | 		exp  float64
 64 | 	}{
 65 | 		{
 66 | 			name: "zero",
 67 | 			data: []byte{0, 0, 0, 0, 0, 0, 0, 0},
 68 | 		},
 69 | 		{
 70 | 			name: "something",
 71 | 			data: []byte{0, 1, 0, 0, 0, 0, 0, 0},
 72 | 			exp:  1.265e-321,
 73 | 		},
 74 | 	}
 75 | 	var c DoubleCodec
 76 | 	for _, test := range tests {
 77 | 		test := test
 78 | 		t.Run(test.name, func(t *testing.T) {
 79 | 			t.Parallel()
 80 | 			r := NewReadBuf(test.data)
 81 | 			var actual float64
 82 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
 83 | 				t.Fatal(err)
 84 | 			}
 85 | 
 86 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
 87 | 				t.Fatalf("result not as expected. %s", diff)
 88 | 			}
 89 | 			if r.Len() != 0 {
 90 | 				t.Fatalf("unread data %d", r.Len())
 91 | 			}
 92 | 		})
 93 | 		t.Run(test.name+" skip", func(t *testing.T) {
 94 | 			t.Parallel()
 95 | 			r := NewReadBuf(test.data)
 96 | 			if err := c.Skip(r); err != nil {
 97 | 				t.Fatal(err)
 98 | 			}
 99 | 			if r.Len() != 0 {
100 | 				t.Fatalf("unread data %d", r.Len())
101 | 			}
102 | 		})
103 | 	}
104 | }
105 | 
106 | func TestFloat32DoubleCodec(t *testing.T) {
107 | 	tests := []struct {
108 | 		name string
109 | 		data []byte
110 | 		exp  float32
111 | 	}{
112 | 		{
113 | 			name: "zero",
114 | 			data: []byte{0, 0, 0, 0, 0, 0, 0, 0},
115 | 		},
116 | 		{
117 | 			name: "something",
118 | 			data: []byte{0, 1, 0, 0, 0, 0, 0, 0},
119 | 			exp:  1.265e-321,
120 | 		},
121 | 	}
122 | 	var c Float32DoubleCodec
123 | 	for _, test := range tests {
124 | 		test := test
125 | 		t.Run(test.name, func(t *testing.T) {
126 | 			t.Parallel()
127 | 			r := NewReadBuf(test.data)
128 | 			var actual float32
129 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
130 | 				t.Fatal(err)
131 | 			}
132 | 
133 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
134 | 				t.Fatalf("result not as expected. %s", diff)
135 | 			}
136 | 			if r.Len() != 0 {
137 | 				t.Fatalf("unread data %d", r.Len())
138 | 			}
139 | 		})
140 | 		t.Run(test.name+" skip", func(t *testing.T) {
141 | 			t.Parallel()
142 | 			r := NewReadBuf(test.data)
143 | 			if err := c.Skip(r); err != nil {
144 | 				t.Fatal(err)
145 | 			}
146 | 			if r.Len() != 0 {
147 | 				t.Fatalf("unread data %d", r.Len())
148 | 			}
149 | 		})
150 | 	}
151 | }
152 | 
153 | func TestFloatRoundTrip(t *testing.T) {
154 | 	tests := []struct {
155 | 		name string
156 | 		val  float32
157 | 	}{
158 | 		{
159 | 			name: "zero",
160 | 			val:  0,
161 | 		},
162 | 		{
163 | 			name: "something",
164 | 			val:  3.587324068671532e-43,
165 | 		},
166 | 		{
167 | 			name: "negative",
168 | 			val:  -3.587324068671532e-43,
169 | 		},
170 | 
171 | 		{
172 | 			name: "max",
173 | 			val:  3.4028234663852886e+38,
174 | 		},
175 | 		{
176 | 			name: "NAN",
177 | 			val:  float32(math.NaN()),
178 | 		},
179 | 	}
180 | 
181 | 	for _, test := range tests {
182 | 		t.Run(test.name, func(t *testing.T) {
183 | 			t.Parallel()
184 | 			w := NewWriteBuf(nil)
185 | 			var c FloatCodec
186 | 			c.Write(w, unsafe.Pointer(&test.val))
187 | 			r := NewReadBuf(w.Bytes())
188 | 			var actual float32
189 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
190 | 				t.Fatal(err)
191 | 			}
192 | 			if diff := cmp.Diff(test.val, actual, cmpopts.EquateNaNs()); diff != "" {
193 | 				t.Fatalf("result not as expected. %s", diff)
194 | 			}
195 | 		})
196 | 	}
197 | }
198 | 
199 | func TestDoubleRoundTrip(t *testing.T) {
200 | 	tests := []struct {
201 | 		name string
202 | 		val  float64
203 | 	}{
204 | 		{
205 | 			name: "zero",
206 | 			val:  0,
207 | 		},
208 | 		{
209 | 			name: "something",
210 | 			val:  3.587324068671532e-43,
211 | 		},
212 | 		{
213 | 			name: "negative",
214 | 			val:  -3.587324068671532e-43,
215 | 		},
216 | 
217 | 		{
218 | 			name: "max",
219 | 			val:  3.4028234663852886e+38,
220 | 		},
221 | 		{
222 | 			name: "NAN",
223 | 			val:  math.NaN(),
224 | 		},
225 | 	}
226 | 
227 | 	for _, test := range tests {
228 | 		t.Run(test.name, func(t *testing.T) {
229 | 			t.Parallel()
230 | 			w := NewWriteBuf(nil)
231 | 			var c DoubleCodec
232 | 			c.Write(w, unsafe.Pointer(&test.val))
233 | 			r := NewReadBuf(w.Bytes())
234 | 			var actual float64
235 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
236 | 				t.Fatal(err)
237 | 			}
238 | 			if diff := cmp.Diff(test.val, actual, cmpopts.EquateNaNs()); diff != "" {
239 | 				t.Fatalf("result not as expected. %s", diff)
240 | 			}
241 | 		})
242 | 	}
243 | }
244 | 
245 | func TestFloat32DoubleRoundTrip(t *testing.T) {
246 | 	tests := []struct {
247 | 		name string
248 | 		val  float32
249 | 	}{
250 | 		{
251 | 			name: "zero",
252 | 			val:  0,
253 | 		},
254 | 		{
255 | 			name: "something",
256 | 			val:  3.587324068671532e-43,
257 | 		},
258 | 		{
259 | 			name: "negative",
260 | 			val:  -3.587324068671532e-43,
261 | 		},
262 | 
263 | 		{
264 | 			name: "max",
265 | 			val:  3.4028234663852886e+38,
266 | 		},
267 | 		{
268 | 			name: "NAN",
269 | 			val:  float32(math.NaN()),
270 | 		},
271 | 	}
272 | 
273 | 	for _, test := range tests {
274 | 		t.Run(test.name, func(t *testing.T) {
275 | 			t.Parallel()
276 | 			w := NewWriteBuf(nil)
277 | 			var c Float32DoubleCodec
278 | 			c.Write(w, unsafe.Pointer(&test.val))
279 | 			r := NewReadBuf(w.Bytes())
280 | 			var actual float32
281 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
282 | 				t.Fatal(err)
283 | 			}
284 | 			if diff := cmp.Diff(test.val, actual, cmpopts.EquateNaNs()); diff != "" {
285 | 				t.Fatalf("result not as expected. %s", diff)
286 | 			}
287 | 		})
288 | 	}
289 | }
290 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/philpearl/avro
 2 | 
 3 | go 1.24
 4 | 
 5 | toolchain go1.24.0
 6 | 
 7 | replace github.com/unravelin/null => github.com/unravelin/null/v5 v5.0.1
 8 | 
 9 | require (
10 | 	github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1
11 | 	github.com/golang/snappy v1.0.0
12 | 	github.com/google/go-cmp v0.7.0
13 | 	github.com/unravelin/null v1.0.2
14 | )
15 | 
16 | require (
17 | 	github.com/josharian/intern v1.0.0 // indirect
18 | 	github.com/mailru/easyjson v0.9.0 // indirect
19 | )
20 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1 h1:+VexzzkMLb1tnvpuQdGT/DicIRW7MN8ozsXqBMgp0Hk=
 4 | github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
 5 | github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
 6 | github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 7 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 8 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 9 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
10 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
11 | github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
12 | github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
13 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
14 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
15 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
16 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
17 | github.com/unravelin/null/v5 v5.0.1 h1:FFAIq7N231O4CJreN7azzDPdtwIzJ3X+D4N/Gz3kHHE=
18 | github.com/unravelin/null/v5 v5.0.1/go.mod h1:W48ySiXKyk9D4taw9pUl3jYuUjsfWDXEDSu6CEBp1Cw=
19 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
20 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
21 | 


--------------------------------------------------------------------------------
/int.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"reflect"
 6 | 	"unsafe"
 7 | )
 8 | 
 9 | // Int64Codec is an avro codec for int64
10 | type IntCodec[T int64 | int32 | int16] struct{ omitEmpty bool }
11 | 
12 | func (IntCodec[T]) Read(r *ReadBuf, p unsafe.Pointer) error {
13 | 	i, err := r.Varint()
14 | 
15 | 	if i > int64(uint64(1)<<(unsafe.Sizeof(T(0))*8-1)-1) ||
16 | 		i < -1<<(unsafe.Sizeof(T(0))*8-1) {
17 | 		return fmt.Errorf("value %d will not fit in %T", i, T(0))
18 | 	}
19 | 
20 | 	*(*T)(p) = T(i)
21 | 	return err
22 | }
23 | 
24 | // Skip skips over an int
25 | func (IntCodec[T]) Skip(r *ReadBuf) error {
26 | 	_, err := r.Varint()
27 | 	return err
28 | }
29 | 
30 | var (
31 | 	int64Type = reflect.TypeFor[int64]()
32 | 	int32Type = reflect.TypeFor[int32]()
33 | 	int16Type = reflect.TypeFor[int16]()
34 | )
35 | 
36 | // New creates a pointer to a new int64
37 | func (IntCodec[T]) New(r *ReadBuf) unsafe.Pointer {
38 | 	switch unsafe.Sizeof(T(0)) {
39 | 	case 8:
40 | 		return r.Alloc(int64Type)
41 | 	case 4:
42 | 		return r.Alloc(int32Type)
43 | 	case 2:
44 | 		return r.Alloc(int16Type)
45 | 	}
46 | 	panic(fmt.Sprintf("unexpected int size %d", unsafe.Sizeof(T(0))))
47 | }
48 | 
49 | func (rc IntCodec[T]) Omit(p unsafe.Pointer) bool {
50 | 	return rc.omitEmpty && *(*T)(p) == 0
51 | }
52 | 
53 | func (rc IntCodec[T]) Write(w *WriteBuf, p unsafe.Pointer) {
54 | 	w.Varint(int64(*(*T)(p)))
55 | }
56 | 
57 | type (
58 | 	Int64Codec = IntCodec[int64]
59 | 	Int32Codec = IntCodec[int32]
60 | 	Int16Codec = IntCodec[int16]
61 | )
62 | 


--------------------------------------------------------------------------------
/int_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | 
  8 | 	"github.com/google/go-cmp/cmp"
  9 | )
 10 | 
 11 | func TestInt64Codec(t *testing.T) {
 12 | 	tests := []struct {
 13 | 		name string
 14 | 		data []byte
 15 | 		exp  int64
 16 | 	}{
 17 | 		{
 18 | 			name: "zero",
 19 | 			data: []byte{0},
 20 | 		},
 21 | 		{
 22 | 			name: "something",
 23 | 			data: []byte{46},
 24 | 			exp:  23,
 25 | 		},
 26 | 		{
 27 | 			name: "-something",
 28 | 			data: []byte{45},
 29 | 			exp:  -23,
 30 | 		},
 31 | 		{
 32 | 			name: "max",
 33 | 			data: []byte{254, 255, 255, 255, 255, 255, 255, 255, 255, 1},
 34 | 			exp:  math.MaxInt64,
 35 | 		},
 36 | 		{
 37 | 			name: "min",
 38 | 			data: []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 1},
 39 | 			exp:  math.MinInt64,
 40 | 		},
 41 | 	}
 42 | 	var c Int64Codec
 43 | 	for _, test := range tests {
 44 | 		test := test
 45 | 		t.Run(test.name, func(t *testing.T) {
 46 | 			t.Parallel()
 47 | 			r := NewReadBuf(test.data)
 48 | 			var actual int64
 49 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
 50 | 				t.Fatal(err)
 51 | 			}
 52 | 
 53 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
 54 | 				t.Fatalf("result not as expected. %s", diff)
 55 | 			}
 56 | 			if r.Len() != 0 {
 57 | 				t.Fatalf("unread data %d", r.Len())
 58 | 			}
 59 | 		})
 60 | 		t.Run(test.name+" skip", func(t *testing.T) {
 61 | 			t.Parallel()
 62 | 			r := NewReadBuf(test.data)
 63 | 			if err := c.Skip(r); err != nil {
 64 | 				t.Fatal(err)
 65 | 			}
 66 | 			if r.Len() != 0 {
 67 | 				t.Fatalf("unread data %d", r.Len())
 68 | 			}
 69 | 		})
 70 | 	}
 71 | }
 72 | 
 73 | func TestInt32Codec(t *testing.T) {
 74 | 	tests := []struct {
 75 | 		name string
 76 | 		data []byte
 77 | 		exp  int32
 78 | 	}{
 79 | 		{
 80 | 			name: "zero",
 81 | 			data: []byte{0},
 82 | 		},
 83 | 		{
 84 | 			name: "something",
 85 | 			data: []byte{46},
 86 | 			exp:  23,
 87 | 		},
 88 | 		{
 89 | 			name: "-something",
 90 | 			data: []byte{45},
 91 | 			exp:  -23,
 92 | 		},
 93 | 		{
 94 | 			name: "max",
 95 | 			data: []byte{254, 255, 255, 255, 15},
 96 | 			exp:  math.MaxInt32,
 97 | 		},
 98 | 		{
 99 | 			name: "min",
100 | 			data: []byte{255, 255, 255, 255, 15},
101 | 			exp:  math.MinInt32,
102 | 		},
103 | 	}
104 | 	var c Int32Codec
105 | 	for _, test := range tests {
106 | 		test := test
107 | 		t.Run(test.name, func(t *testing.T) {
108 | 			t.Parallel()
109 | 			r := NewReadBuf(test.data)
110 | 			var actual int32
111 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
112 | 				t.Fatal(err)
113 | 			}
114 | 
115 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
116 | 				t.Fatalf("result not as expected. %s", diff)
117 | 			}
118 | 			if r.Len() != 0 {
119 | 				t.Fatalf("unread data %d", r.Len())
120 | 			}
121 | 		})
122 | 		t.Run(test.name+" skip", func(t *testing.T) {
123 | 			t.Parallel()
124 | 			r := NewReadBuf(test.data)
125 | 			if err := c.Skip(r); err != nil {
126 | 				t.Fatal(err)
127 | 			}
128 | 			if r.Len() != 0 {
129 | 				t.Fatalf("unread data %d", r.Len())
130 | 			}
131 | 		})
132 | 	}
133 | }
134 | 
135 | func TestInt16TooBig(t *testing.T) {
136 | 	var c Int16Codec
137 | 	r := NewReadBuf([]byte{128, 128, 4})
138 | 	var actual int16
139 | 	err := c.Read(r, unsafe.Pointer(&actual))
140 | 	if err == nil {
141 | 		t.Fatal("expected an error")
142 | 	}
143 | 	if s := err.Error(); s != "value 32768 will not fit in int16" {
144 | 		t.Fatalf("error not as expected: %q", s)
145 | 	}
146 | }
147 | 
148 | func TestInt16Codec(t *testing.T) {
149 | 	tests := []struct {
150 | 		name string
151 | 		data []byte
152 | 		exp  int16
153 | 	}{
154 | 		{
155 | 			name: "zero",
156 | 			data: []byte{0},
157 | 		},
158 | 		{
159 | 			name: "something",
160 | 			data: []byte{46},
161 | 			exp:  23,
162 | 		},
163 | 		{
164 | 			name: "-something",
165 | 			data: []byte{45},
166 | 			exp:  -23,
167 | 		},
168 | 		{
169 | 			name: "max",
170 | 			data: []byte{254, 255, 3},
171 | 			exp:  math.MaxInt16,
172 | 		},
173 | 		{
174 | 			name: "min",
175 | 			data: []byte{255, 255, 3},
176 | 			exp:  math.MinInt16,
177 | 		},
178 | 	}
179 | 	var c Int16Codec
180 | 	for _, test := range tests {
181 | 		test := test
182 | 		t.Run(test.name, func(t *testing.T) {
183 | 			t.Parallel()
184 | 			r := NewReadBuf(test.data)
185 | 			var actual int16
186 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
187 | 				t.Fatal(err)
188 | 			}
189 | 
190 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
191 | 				t.Fatalf("result not as expected. %s", diff)
192 | 			}
193 | 			if r.Len() != 0 {
194 | 				t.Fatalf("unread data %d", r.Len())
195 | 			}
196 | 		})
197 | 		t.Run(test.name+" skip", func(t *testing.T) {
198 | 			t.Parallel()
199 | 			r := NewReadBuf(test.data)
200 | 			if err := c.Skip(r); err != nil {
201 | 				t.Fatal(err)
202 | 			}
203 | 			if r.Len() != 0 {
204 | 				t.Fatalf("unread data %d", r.Len())
205 | 			}
206 | 		})
207 | 	}
208 | }
209 | 
210 | func TestInt64RoundTrip(t *testing.T) {
211 | 	tests := []struct {
212 | 		name string
213 | 		in   int64
214 | 	}{
215 | 		{
216 | 			name: "zero",
217 | 			in:   0,
218 | 		},
219 | 		{
220 | 			name: "something",
221 | 			in:   23,
222 | 		},
223 | 		{
224 | 			name: "-something",
225 | 			in:   -23,
226 | 		},
227 | 		{
228 | 			name: "max",
229 | 			in:   math.MaxInt64,
230 | 		},
231 | 		{
232 | 			name: "min",
233 | 			in:   math.MinInt64,
234 | 		},
235 | 	}
236 | 	var c Int64Codec
237 | 	for _, test := range tests {
238 | 		t.Run(test.name, func(t *testing.T) {
239 | 			buf := NewWriteBuf(nil)
240 | 			c.Write(buf, unsafe.Pointer(&test.in))
241 | 			var actual int64
242 | 			if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil {
243 | 				t.Fatal(err)
244 | 			}
245 | 			if actual != test.in {
246 | 				t.Fatalf("%d does not match expected %d", actual, test.in)
247 | 			}
248 | 		})
249 | 	}
250 | }
251 | 
252 | func TestInt32RoundTrip(t *testing.T) {
253 | 	tests := []struct {
254 | 		name string
255 | 		in   int32
256 | 	}{
257 | 		{
258 | 			name: "zero",
259 | 			in:   0,
260 | 		},
261 | 		{
262 | 			name: "something",
263 | 			in:   23,
264 | 		},
265 | 		{
266 | 			name: "-something",
267 | 			in:   -23,
268 | 		},
269 | 		{
270 | 			name: "max",
271 | 			in:   math.MaxInt32,
272 | 		},
273 | 		{
274 | 			name: "min",
275 | 			in:   math.MinInt32,
276 | 		},
277 | 	}
278 | 	var c Int64Codec
279 | 	for _, test := range tests {
280 | 		t.Run(test.name, func(t *testing.T) {
281 | 			buf := NewWriteBuf(nil)
282 | 			c.Write(buf, unsafe.Pointer(&test.in))
283 | 			var actual int32
284 | 			if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil {
285 | 				t.Fatal(err)
286 | 			}
287 | 			if actual != test.in {
288 | 				t.Fatalf("%d does not match expected %d", actual, test.in)
289 | 			}
290 | 		})
291 | 	}
292 | }
293 | 
294 | func TestInt16RoundTrip(t *testing.T) {
295 | 	tests := []struct {
296 | 		name string
297 | 		in   int16
298 | 	}{
299 | 		{
300 | 			name: "zero",
301 | 			in:   0,
302 | 		},
303 | 		{
304 | 			name: "something",
305 | 			in:   23,
306 | 		},
307 | 		{
308 | 			name: "-something",
309 | 			in:   -23,
310 | 		},
311 | 		{
312 | 			name: "max",
313 | 			in:   math.MaxInt16,
314 | 		},
315 | 		{
316 | 			name: "min",
317 | 			in:   math.MinInt16,
318 | 		},
319 | 	}
320 | 	var c Int64Codec
321 | 	for _, test := range tests {
322 | 		t.Run(test.name, func(t *testing.T) {
323 | 			buf := NewWriteBuf(nil)
324 | 			c.Write(buf, unsafe.Pointer(&test.in))
325 | 			var actual int16
326 | 			if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil {
327 | 				t.Fatal(err)
328 | 			}
329 | 			if actual != test.in {
330 | 				t.Fatalf("%d does not match expected %d", actual, test.in)
331 | 			}
332 | 		})
333 | 	}
334 | }
335 | 


--------------------------------------------------------------------------------
/interface.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import "unsafe"
 4 | 
 5 | type eface struct {
 6 | 	rtype unsafe.Pointer
 7 | 	data  unsafe.Pointer
 8 | }
 9 | 
10 | func unpackEFace(obj interface{}) *eface {
11 | 	return (*eface)(unsafe.Pointer(&obj))
12 | }
13 | 


--------------------------------------------------------------------------------
/map.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"unsafe"
  7 | )
  8 | 
  9 | // MapCodec is a decoder for map types. The key must always be string
 10 | type MapCodec struct {
 11 | 	valueCodec Codec
 12 | 	rtype      reflect.Type
 13 | 	omitEmpty  bool
 14 | }
 15 | 
 16 | func (m *MapCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
 17 | 	// p is a pointer to a map pointer
 18 | 	if *(*unsafe.Pointer)(p) == nil {
 19 | 		*(*unsafe.Pointer)(p) = m.New(r)
 20 | 	}
 21 | 	mp := *(*unsafe.Pointer)(p)
 22 | 
 23 | 	// Blocks are repeated until there's a zero count block
 24 | 	for {
 25 | 		count, err := r.Varint()
 26 | 		if err != nil {
 27 | 			return fmt.Errorf("failed to read count of map block. %w", err)
 28 | 		}
 29 | 		if count == 0 {
 30 | 			break
 31 | 		}
 32 | 
 33 | 		if count < 0 {
 34 | 			count = -count
 35 | 			// Block size is more useful if we're skipping over the map
 36 | 			if _, err := r.Varint(); err != nil {
 37 | 				return fmt.Errorf("failed to read block size of map block. %w", err)
 38 | 			}
 39 | 		}
 40 | 
 41 | 		var sc StringCodec
 42 | 		for ; count > 0; count-- {
 43 | 			var key string
 44 | 			if err := sc.Read(r, unsafe.Pointer(&key)); err != nil {
 45 | 				return fmt.Errorf("failed to read key for map. %w", err)
 46 | 			}
 47 | 
 48 | 			// TODO: can we just reuse one val?
 49 | 			val := m.valueCodec.New(r)
 50 | 			if err := m.valueCodec.Read(r, val); err != nil {
 51 | 				return fmt.Errorf("failed to read value for map key %s. %w", key, err)
 52 | 			}
 53 | 			// Put the thing in the thing
 54 | 			mapassign(unpackEFace(m.rtype).data, mp, unsafe.Pointer(&key), val)
 55 | 		}
 56 | 	}
 57 | 
 58 | 	return nil
 59 | }
 60 | 
 61 | func (m *MapCodec) Skip(r *ReadBuf) error {
 62 | 	for {
 63 | 		count, err := r.Varint()
 64 | 		if err != nil {
 65 | 			return fmt.Errorf("failed to read count of map block. %w", err)
 66 | 		}
 67 | 
 68 | 		if count == 0 {
 69 | 			break
 70 | 		}
 71 | 
 72 | 		if count < 0 {
 73 | 			bs, err := r.Varint()
 74 | 			if err != nil {
 75 | 				return fmt.Errorf("failed to read block size of map block. %w", err)
 76 | 			}
 77 | 			if err := skip(r, bs); err != nil {
 78 | 				return fmt.Errorf("failed skipping block of map. %w", err)
 79 | 			}
 80 | 			continue
 81 | 		}
 82 | 
 83 | 		var sc StringCodec
 84 | 		for ; count > 0; count-- {
 85 | 			if err := sc.Skip(r); err != nil {
 86 | 				return fmt.Errorf("failed to skip key for map. %w", err)
 87 | 			}
 88 | 
 89 | 			if err := m.valueCodec.Skip(r); err != nil {
 90 | 				return fmt.Errorf("failed to skip value for map. %w", err)
 91 | 			}
 92 | 		}
 93 | 	}
 94 | 
 95 | 	return nil
 96 | }
 97 | 
 98 | func (m *MapCodec) New(r *ReadBuf) unsafe.Pointer {
 99 | 	return unsafe.Pointer(reflect.MakeMap(m.rtype).Pointer())
100 | }
101 | 
102 | func (m *MapCodec) Omit(p unsafe.Pointer) bool {
103 | 	return m.omitEmpty && maplen(p) == 0
104 | }
105 | 
106 | func (m *MapCodec) Write(w *WriteBuf, p unsafe.Pointer) {
107 | 	// p is a pointer to a map pointer, but maps are already pointery
108 | 	p = *(*unsafe.Pointer)(p)
109 | 
110 | 	// Start with the count. Note the same ability to use a negative count to
111 | 	// record a block size exists here too.
112 | 	l := maplen(p)
113 | 	w.Varint(int64(l))
114 | 	if l == 0 {
115 | 		return
116 | 	}
117 | 
118 | 	var iterM mapiter
119 | 	iter := (unsafe.Pointer)(&iterM)
120 | 	mapiterinit(unpackEFace(m.rtype).data, p, iter)
121 | 
122 | 	var sc StringCodec
123 | 
124 | 	for {
125 | 		k := mapiterkey(iter)
126 | 		if k == nil {
127 | 			break
128 | 		}
129 | 		v := mapiterelem(iter)
130 | 
131 | 		sc.Write(w, k)
132 | 		m.valueCodec.Write(w, v)
133 | 
134 | 		mapiternext(iter)
135 | 	}
136 | 
137 | 	// like arrays, theoretically there can be multiple blocks so we need to write a zero count to say there's no more.
138 | 	w.Varint(0)
139 | }
140 | 


--------------------------------------------------------------------------------
/map_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | 
  8 | 	"github.com/google/go-cmp/cmp"
  9 | )
 10 | 
 11 | func TestMapCodec(t *testing.T) {
 12 | 	tests := []struct {
 13 | 		name string
 14 | 		data []byte
 15 | 		exp  map[string][]byte
 16 | 	}{
 17 | 		{
 18 | 			name: "1 simple block",
 19 | 			data: []byte{
 20 | 				// block count
 21 | 				2, // meaning 1
 22 | 				// no block size for positive count
 23 | 				// key
 24 | 				6, 'f', 'o', 'o',
 25 | 				// value
 26 | 				8, 1, 2, 3, 4,
 27 | 				// zero block
 28 | 				0,
 29 | 			},
 30 | 			exp: map[string][]byte{
 31 | 				"foo": {1, 2, 3, 4},
 32 | 			},
 33 | 		},
 34 | 		{
 35 | 			name: "block with size",
 36 | 			data: []byte{
 37 | 				// block count
 38 | 				1,
 39 | 				18,
 40 | 				// key
 41 | 				6, 'f', 'o', 'o',
 42 | 				// value
 43 | 				8, 1, 2, 3, 4,
 44 | 				// zero block
 45 | 				0,
 46 | 			},
 47 | 			exp: map[string][]byte{
 48 | 				"foo": {1, 2, 3, 4},
 49 | 			},
 50 | 		},
 51 | 
 52 | 		{
 53 | 			name: "1 simple block, 2 vals",
 54 | 			data: []byte{
 55 | 				// block count
 56 | 				4, // meaning 2
 57 | 				// no block size for positive count
 58 | 				// key
 59 | 				6, 'f', 'o', 'o',
 60 | 				// value
 61 | 				8, 1, 2, 3, 4,
 62 | 				// key
 63 | 				6, 'b', 'a', 'r',
 64 | 				// value
 65 | 				8, 4, 3, 2, 1,
 66 | 				// zero block
 67 | 				0,
 68 | 			},
 69 | 			exp: map[string][]byte{
 70 | 				"foo": {1, 2, 3, 4},
 71 | 				"bar": {4, 3, 2, 1},
 72 | 			},
 73 | 		},
 74 | 		{
 75 | 			name: "2 simple blocks",
 76 | 			data: []byte{
 77 | 				// block count
 78 | 				2, // meaning 1
 79 | 				// no block size for positive count
 80 | 				// key
 81 | 				6, 'f', 'o', 'o',
 82 | 				// value
 83 | 				8, 1, 2, 3, 4,
 84 | 				// Next block
 85 | 				2, // meaning 1
 86 | 				// no block size for positive count
 87 | 				// key
 88 | 				6, 'b', 'a', 'r',
 89 | 				// value
 90 | 				8, 4, 3, 2, 1,
 91 | 				// zero block
 92 | 				0,
 93 | 			},
 94 | 			exp: map[string][]byte{
 95 | 				"foo": {1, 2, 3, 4},
 96 | 				"bar": {4, 3, 2, 1},
 97 | 			},
 98 | 		},
 99 | 	}
100 | 
101 | 	for _, test := range tests {
102 | 		t.Run(test.name, func(t *testing.T) {
103 | 			var m map[string][]byte
104 | 			typ := reflect.TypeOf(m)
105 | 			c := MapCodec{rtype: typ, valueCodec: BytesCodec{}}
106 | 
107 | 			r := NewReadBuf(test.data)
108 | 
109 | 			if err := c.Read(r, unsafe.Pointer(&m)); err != nil {
110 | 				t.Fatal(err)
111 | 			}
112 | 
113 | 			if diff := cmp.Diff(test.exp, m); diff != "" {
114 | 				t.Fatalf("map not as expected. %s", diff)
115 | 			}
116 | 
117 | 			if r.Len() != 0 {
118 | 				t.Fatalf("unread bytes. %d", r.Len())
119 | 			}
120 | 		})
121 | 
122 | 		t.Run(test.name+" skip", func(t *testing.T) {
123 | 			c := MapCodec{valueCodec: BytesCodec{}}
124 | 			r := NewReadBuf(test.data)
125 | 			if err := c.Skip(r); err != nil {
126 | 				t.Fatal(err)
127 | 			}
128 | 			if r.Len() != 0 {
129 | 				t.Fatalf("unread bytes. %d", r.Len())
130 | 			}
131 | 		})
132 | 
133 | 		t.Run(test.name+" roundtrip", func(t *testing.T) {
134 | 			typ := reflect.TypeOf(test.exp)
135 | 			c := MapCodec{rtype: typ, valueCodec: BytesCodec{}}
136 | 			w := NewWriteBuf(nil)
137 | 
138 | 			c.Write(w, (unsafe.Pointer)(&test.exp))
139 | 			var actual map[string][]byte
140 | 			r := NewReadBuf(w.Bytes())
141 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
142 | 				t.Fatal(err)
143 | 			}
144 | 			if diff := cmp.Diff(test.exp, actual); diff != "" {
145 | 				t.Fatal(diff)
146 | 			}
147 | 		})
148 | 	}
149 | }
150 | 


--------------------------------------------------------------------------------
/null.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"unsafe"
 5 | )
 6 | 
 7 | type nullCodec struct{}
 8 | 
 9 | func (nullCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
10 | 	// TODO: could consider nil-ing the pointer
11 | 	return nil
12 | }
13 | 
14 | func (nullCodec) Skip(r *ReadBuf) error {
15 | 	return nil
16 | }
17 | 
18 | func (nullCodec) New(r *ReadBuf) unsafe.Pointer {
19 | 	return nil
20 | }
21 | 
22 | func (rc nullCodec) Omit(p unsafe.Pointer) bool {
23 | 	return true
24 | }
25 | 
26 | func (rc nullCodec) Write(w *WriteBuf, p unsafe.Pointer) {
27 | }
28 | 


--------------------------------------------------------------------------------
/null/null.go:
--------------------------------------------------------------------------------
  1 | // Package null contains avro decoders for the types in github.com/unravelin/null.
  2 | // Call RegisterCodecs to make these codecs available to avro
  3 | package null
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"reflect"
  8 | 	"unsafe"
  9 | 
 10 | 	"github.com/philpearl/avro"
 11 | 	avrotime "github.com/philpearl/avro/time"
 12 | 	"github.com/unravelin/null"
 13 | )
 14 | 
 15 | // RegisterCodecs registers the codecs from this package and makes them
 16 | // available to avro.
 17 | func RegisterCodecs() {
 18 | 	avro.Register(reflect.TypeFor[null.Int](), buildNullIntCodec)
 19 | 	avro.Register(reflect.TypeFor[null.Bool](), buildNullBoolCodec)
 20 | 	avro.Register(reflect.TypeFor[null.Float](), buildNullFloatCodec)
 21 | 	avro.Register(reflect.TypeFor[null.String](), buildNullStringCodec)
 22 | 	avro.Register(reflect.TypeFor[null.Time](), buildNullTimeCodec)
 23 | 
 24 | 	avro.RegisterSchema(reflect.TypeFor[null.Int](), nullableSchema(avro.Schema{Type: "long"}))
 25 | 	avro.RegisterSchema(reflect.TypeFor[null.Bool](), nullableSchema(avro.Schema{Type: "boolean"}))
 26 | 	avro.RegisterSchema(reflect.TypeFor[null.Float](), nullableSchema(avro.Schema{Type: "double"}))
 27 | 	avro.RegisterSchema(reflect.TypeFor[null.String](), nullableSchema(avro.Schema{Type: "string"}))
 28 | 
 29 | 	// This reflects the common use of null.Time within Ravelin, the owner of the null package.
 30 | 	avro.RegisterSchema(reflect.TypeFor[null.Time](), nullableSchema(avro.Schema{Type: "string"}))
 31 | }
 32 | 
 33 | func nullableSchema(s avro.Schema) avro.Schema {
 34 | 	return avro.Schema{
 35 | 		Type: "union",
 36 | 		Union: []avro.Schema{
 37 | 			{Type: "null"},
 38 | 			s,
 39 | 		},
 40 | 	}
 41 | }
 42 | 
 43 | func buildNullIntCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) {
 44 | 	if schema.Type != "long" && schema.Type != "int" {
 45 | 		return nil, fmt.Errorf("null.Int can only be used with long and int schema types")
 46 | 	}
 47 | 	return nullIntCodec{}, nil
 48 | }
 49 | 
 50 | type nullIntCodec struct {
 51 | 	avro.Int64Codec
 52 | }
 53 | 
 54 | func (c nullIntCodec) Read(data *avro.ReadBuf, p unsafe.Pointer) error {
 55 | 	ni := (*null.Int)(p)
 56 | 	ni.Valid = true
 57 | 
 58 | 	return c.Int64Codec.Read(data, unsafe.Pointer(&ni.Int64))
 59 | }
 60 | 
 61 | var intType = reflect.TypeFor[null.Int]()
 62 | 
 63 | func (c nullIntCodec) New(r *avro.ReadBuf) unsafe.Pointer {
 64 | 	return r.Alloc(intType)
 65 | }
 66 | 
 67 | func (c nullIntCodec) Omit(p unsafe.Pointer) bool {
 68 | 	ni := (*null.Int)(p)
 69 | 	return !ni.Valid
 70 | }
 71 | 
 72 | func (c nullIntCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
 73 | 	// I think we'll expect this codec to always be wrapped by a null union
 74 | 	// codec, so checking for empty would be done elsewhere.
 75 | 	ni := *(*null.Int)(p)
 76 | 	c.Int64Codec.Write(w, unsafe.Pointer(&ni.Int64))
 77 | }
 78 | 
 79 | func buildNullBoolCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) {
 80 | 	if schema.Type != "boolean" {
 81 | 		return nil, fmt.Errorf("null.Bool can only be used with boolean schema types")
 82 | 	}
 83 | 	return nullBoolCodec{}, nil
 84 | }
 85 | 
 86 | type nullBoolCodec struct {
 87 | 	avro.BoolCodec
 88 | }
 89 | 
 90 | func (c nullBoolCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error {
 91 | 	nb := (*null.Bool)(ptr)
 92 | 	nb.Valid = true
 93 | 	return c.BoolCodec.Read(data, unsafe.Pointer(&nb.Bool))
 94 | }
 95 | 
 96 | var boolType = reflect.TypeFor[null.Bool]()
 97 | 
 98 | func (c nullBoolCodec) New(r *avro.ReadBuf) unsafe.Pointer {
 99 | 	return r.Alloc(boolType)
100 | }
101 | 
102 | func (c nullBoolCodec) Omit(p unsafe.Pointer) bool {
103 | 	ni := (*null.Bool)(p)
104 | 	return !ni.Valid
105 | }
106 | 
107 | func (c nullBoolCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
108 | 	// I think we'll expect this codec to always be wrapped by a null union
109 | 	// codec, so checking for empty would be done elsewhere.
110 | 	ni := *(*null.Bool)(p)
111 | 	c.BoolCodec.Write(w, unsafe.Pointer(&ni.Bool))
112 | }
113 | 
114 | func buildNullFloatCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) {
115 | 	if schema.Type == "double" {
116 | 		return nullDoubleCodec{}, nil
117 | 	}
118 | 
119 | 	if schema.Type == "float" {
120 | 		return nullFloatCodec{}, nil
121 | 	}
122 | 
123 | 	return nil, fmt.Errorf("null.Float can only be used with double & float schema types")
124 | }
125 | 
126 | type nullDoubleCodec struct {
127 | 	avro.DoubleCodec
128 | }
129 | 
130 | func (c nullDoubleCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error {
131 | 	nf := (*null.Float)(ptr)
132 | 	nf.Valid = true
133 | 	return c.DoubleCodec.Read(data, unsafe.Pointer(&nf.Float64))
134 | }
135 | 
136 | func (c nullDoubleCodec) Omit(p unsafe.Pointer) bool {
137 | 	ni := (*null.Float)(p)
138 | 	return !ni.Valid
139 | }
140 | 
141 | var floatType = reflect.TypeFor[null.Float]()
142 | 
143 | func (c nullDoubleCodec) New(r *avro.ReadBuf) unsafe.Pointer {
144 | 	return r.Alloc(floatType)
145 | }
146 | 
147 | func (c nullDoubleCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
148 | 	// I think we'll expect this codec to always be wrapped by a null union
149 | 	// codec, so checking for empty would be done elsewhere.
150 | 	ni := *(*null.Float)(p)
151 | 	c.DoubleCodec.Write(w, unsafe.Pointer(&ni.Float64))
152 | }
153 | 
154 | type nullFloatCodec struct {
155 | 	avro.FloatCodec
156 | }
157 | 
158 | func (c nullFloatCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error {
159 | 	var f float32
160 | 	if err := c.FloatCodec.Read(data, unsafe.Pointer(&f)); err != nil {
161 | 		return err
162 | 	}
163 | 	nf := (*null.Float)(ptr)
164 | 	nf.Valid = true
165 | 	nf.Float64 = float64(f)
166 | 	return nil
167 | }
168 | 
169 | func (c nullFloatCodec) New(r *avro.ReadBuf) unsafe.Pointer {
170 | 	return r.Alloc(floatType)
171 | }
172 | 
173 | func (c nullFloatCodec) Omit(p unsafe.Pointer) bool {
174 | 	ni := (*null.Float)(p)
175 | 	return !ni.Valid
176 | }
177 | 
178 | func (c nullFloatCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
179 | 	// I think we'll expect this codec to always be wrapped by a null union
180 | 	// codec, so checking for empty would be done elsewhere.
181 | 	ni := *(*null.Float)(p)
182 | 	c.FloatCodec.Write(w, unsafe.Pointer(&ni.Float64))
183 | }
184 | 
185 | func buildNullStringCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) {
186 | 	if schema.Type != "string" {
187 | 		return nil, fmt.Errorf("null.String can only be used with string schema type, not %s", schema.Type)
188 | 	}
189 | 	return nullStringCodec{}, nil
190 | }
191 | 
192 | type nullStringCodec struct {
193 | 	avro.StringCodec
194 | }
195 | 
196 | func (c nullStringCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error {
197 | 	ns := (*null.String)(ptr)
198 | 	ns.Valid = true
199 | 	return c.StringCodec.Read(data, unsafe.Pointer(&ns.String))
200 | }
201 | 
202 | var stringType = reflect.TypeFor[null.String]()
203 | 
204 | func (c nullStringCodec) New(r *avro.ReadBuf) unsafe.Pointer {
205 | 	return r.Alloc(stringType)
206 | }
207 | 
208 | func (c nullStringCodec) Omit(p unsafe.Pointer) bool {
209 | 	ni := (*null.String)(p)
210 | 	return !ni.Valid
211 | }
212 | 
213 | func (c nullStringCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
214 | 	// I think we'll expect this codec to always be wrapped by a null union
215 | 	// codec, so checking for empty would be done elsewhere.
216 | 	ni := *(*null.String)(p)
217 | 	c.StringCodec.Write(w, unsafe.Pointer(&ni.String))
218 | }
219 | 
220 | func buildNullTimeCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) {
221 | 	if schema.Type != "string" {
222 | 		return nil, fmt.Errorf("null.Time is only supported for string, not for %s", schema.Type)
223 | 	}
224 | 	return nullTimeCodec{}, nil
225 | }
226 | 
227 | type nullTimeCodec struct {
228 | 	avrotime.StringCodec
229 | }
230 | 
231 | func (c nullTimeCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error {
232 | 	nt := (*null.Time)(ptr)
233 | 	nt.Valid = true
234 | 	return c.StringCodec.Read(data, unsafe.Pointer(&nt.Time))
235 | }
236 | 
237 | var timeType = reflect.TypeFor[null.Time]()
238 | 
239 | func (c nullTimeCodec) New(r *avro.ReadBuf) unsafe.Pointer {
240 | 	return r.Alloc(timeType)
241 | }
242 | 
243 | func (c nullTimeCodec) Omit(p unsafe.Pointer) bool {
244 | 	ni := (*null.Time)(p)
245 | 	return !ni.Valid
246 | }
247 | 
248 | func (c nullTimeCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
249 | 	// I think we'll expect this codec to always be wrapped by a null union
250 | 	// codec, so checking for empty would be done elsewhere.
251 | 	ni := *(*null.Time)(p)
252 | 	c.StringCodec.Write(w, unsafe.Pointer(&ni.Time))
253 | }
254 | 


--------------------------------------------------------------------------------
/null/null_test.go:
--------------------------------------------------------------------------------
  1 | package null
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"os"
  7 | 	"testing"
  8 | 	"time"
  9 | 	"unsafe"
 10 | 
 11 | 	"github.com/google/go-cmp/cmp"
 12 | 	"github.com/philpearl/avro"
 13 | 	"github.com/unravelin/null"
 14 | )
 15 | 
 16 | func TestNullThings(t *testing.T) {
 17 | 	RegisterCodecs()
 18 | 
 19 | 	type mystruct struct {
 20 | 		String null.String `json:"string,omitempty"`
 21 | 		Int    null.Int    `json:"int,omitempty"`
 22 | 		Bool   null.Bool   `json:"bool,omitempty"`
 23 | 		Float  null.Float  `json:"float,omitempty"`
 24 | 	}
 25 | 
 26 | 	f, err := os.Open("./testdata/nullavro")
 27 | 	if err != nil {
 28 | 		t.Fatal(err)
 29 | 	}
 30 | 	defer f.Close()
 31 | 
 32 | 	var actual []mystruct
 33 | 	var sbs []*avro.ResourceBank
 34 | 	if err := avro.ReadFile(bufio.NewReader(f), mystruct{}, func(val unsafe.Pointer, sb *avro.ResourceBank) error {
 35 | 		actual = append(actual, *(*mystruct)(val))
 36 | 		sbs = append(sbs, sb)
 37 | 		return nil
 38 | 	}); err != nil {
 39 | 		t.Fatal(err)
 40 | 	}
 41 | 
 42 | 	exp := []mystruct{
 43 | 		{
 44 | 			String: null.StringFrom("String"),
 45 | 			Int:    null.IntFrom(42),
 46 | 			Bool:   null.BoolFrom(false),
 47 | 			Float:  null.FloatFrom(13.37),
 48 | 		},
 49 | 		{},
 50 | 	}
 51 | 
 52 | 	if diff := cmp.Diff(exp, actual); diff != "" {
 53 | 		t.Fatalf("result differs. %s", diff)
 54 | 	}
 55 | 	for _, sb := range sbs {
 56 | 		sb.Close()
 57 | 	}
 58 | }
 59 | 
 60 | func TestNullRoundTrip(t *testing.T) {
 61 | 	RegisterCodecs()
 62 | 
 63 | 	type mystruct struct {
 64 | 		String null.String `json:"string,omitempty"`
 65 | 		Int    null.Int    `json:"int,omitempty"`
 66 | 		Bool   null.Bool   `json:"bool,omitempty"`
 67 | 		Float  null.Float  `json:"float,omitempty"`
 68 | 		Time   null.Time   `json:"time,omitempty"`
 69 | 	}
 70 | 
 71 | 	var buf bytes.Buffer
 72 | 
 73 | 	enc, err := avro.NewEncoderFor[mystruct](&buf, avro.CompressionSnappy, 1024)
 74 | 	if err != nil {
 75 | 		t.Fatal(err)
 76 | 	}
 77 | 
 78 | 	if err := enc.Encode(&mystruct{
 79 | 		String: null.StringFrom("String"),
 80 | 		Int:    null.IntFrom(42),
 81 | 		Bool:   null.BoolFrom(true),
 82 | 		Float:  null.FloatFrom(13.37),
 83 | 		Time:   null.TimeFrom(time.Date(1970, 3, 15, 13, 37, 42, 0, time.UTC)),
 84 | 	}); err != nil {
 85 | 		t.Fatal(err)
 86 | 	}
 87 | 
 88 | 	if err := enc.Encode(&mystruct{}); err != nil {
 89 | 		t.Fatal(err)
 90 | 	}
 91 | 
 92 | 	if err := enc.Encode(&mystruct{
 93 | 		String: null.StringFrom(""),
 94 | 		Int:    null.IntFrom(0),
 95 | 		Bool:   null.BoolFrom(false),
 96 | 		Float:  null.FloatFrom(0.0),
 97 | 		Time:   null.TimeFrom(time.Time{}),
 98 | 	}); err != nil {
 99 | 		t.Fatal(err)
100 | 	}
101 | 
102 | 	if err := enc.Flush(); err != nil {
103 | 		t.Fatal(err)
104 | 	}
105 | 
106 | 	var actual []mystruct
107 | 	var sbs []*avro.ResourceBank
108 | 	if err := avro.ReadFile(&buf, mystruct{}, func(val unsafe.Pointer, sb *avro.ResourceBank) error {
109 | 		actual = append(actual, *(*mystruct)(val))
110 | 		sbs = append(sbs, sb)
111 | 		return nil
112 | 	}); err != nil {
113 | 		t.Fatal(err)
114 | 	}
115 | 
116 | 	exp := []mystruct{
117 | 		{
118 | 			String: null.StringFrom("String"),
119 | 			Int:    null.IntFrom(42),
120 | 			Bool:   null.BoolFrom(true),
121 | 			Float:  null.FloatFrom(13.37),
122 | 			Time:   null.TimeFrom(time.Date(1970, 3, 15, 13, 37, 42, 0, time.UTC)),
123 | 		},
124 | 		{},
125 | 		{
126 | 			String: null.StringFrom(""),
127 | 			Int:    null.IntFrom(0),
128 | 			Bool:   null.BoolFrom(false),
129 | 			Float:  null.FloatFrom(0.0),
130 | 			Time:   null.TimeFrom(time.Time{}),
131 | 		},
132 | 	}
133 | 
134 | 	if diff := cmp.Diff(exp, actual); diff != "" {
135 | 		t.Fatalf("result differs. %s", diff)
136 | 	}
137 | 	for _, sb := range sbs {
138 | 		sb.Close()
139 | 	}
140 | }
141 | 


--------------------------------------------------------------------------------
/null/testdata/nullavro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philpearl/avro/bd3141c9da8fcf1cc4d374434d25f2a019fb2d68/null/testdata/nullavro


--------------------------------------------------------------------------------
/pointer.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | type PointerCodec struct {
 9 | 	Codec
10 | }
11 | 
12 | func (c *PointerCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
13 | 	pp := (*unsafe.Pointer)(p)
14 | 	if *pp == nil {
15 | 		*pp = c.Codec.New(r)
16 | 	}
17 | 	return c.Codec.Read(r, *pp)
18 | }
19 | 
20 | var pointerType = reflect.TypeFor[unsafe.Pointer]()
21 | 
22 | func (c *PointerCodec) New(r *ReadBuf) unsafe.Pointer {
23 | 	return r.Alloc(pointerType)
24 | }
25 | 
26 | func (c *PointerCodec) Omit(p unsafe.Pointer) bool {
27 | 	return *(*unsafe.Pointer)(p) == nil
28 | }
29 | 
30 | func (c *PointerCodec) Write(w *WriteBuf, p unsafe.Pointer) {
31 | 	// Note this codec will normally be wrapped by a union codec, so we don't
32 | 	// need to worry about writing the union selector.
33 | 	pp := *(*unsafe.Pointer)(p)
34 | 	if pp == nil {
35 | 		return
36 | 	}
37 | 	c.Codec.Write(w, pp)
38 | }
39 | 


--------------------------------------------------------------------------------
/pointer_test.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"unsafe"
 6 | 
 7 | 	"github.com/google/go-cmp/cmp"
 8 | )
 9 | 
10 | func TestPointerCodec(t *testing.T) {
11 | 	type inStruct struct {
12 | 		A string
13 | 		B int
14 | 	}
15 | 	type myStruct struct {
16 | 		P *inStruct `json:",omitempty"`
17 | 		B int
18 | 	}
19 | 
20 | 	s, err := SchemaForType(myStruct{})
21 | 	if err != nil {
22 | 		t.Fatal(err)
23 | 	}
24 | 
25 | 	if diff := cmp.Diff(Schema{
26 | 		Type: "record",
27 | 		Object: &SchemaObject{
28 | 			Name:      "myStruct",
29 | 			Namespace: "github.com.philpearl.avro",
30 | 			Fields: []SchemaRecordField{
31 | 				{
32 | 					Name: "P",
33 | 					Type: Schema{
34 | 						Type: "union",
35 | 						Union: []Schema{
36 | 							{Type: "null"},
37 | 							{
38 | 								Type: "record",
39 | 								Object: &SchemaObject{
40 | 									Name:      "inStruct",
41 | 									Namespace: "github.com.philpearl.avro",
42 | 									Fields: []SchemaRecordField{
43 | 										{Name: "A", Type: Schema{Type: "string"}},
44 | 										{Name: "B", Type: Schema{Type: "long"}},
45 | 									},
46 | 								},
47 | 							},
48 | 						},
49 | 					},
50 | 				},
51 | 				{
52 | 					Name: "B",
53 | 					Type: Schema{Type: "long"},
54 | 				},
55 | 			},
56 | 		},
57 | 	}, s); diff != "" {
58 | 		t.Fatal(diff)
59 | 	}
60 | 
61 | 	c, err := s.Codec(myStruct{})
62 | 	if err != nil {
63 | 		t.Fatal(err)
64 | 	}
65 | 
66 | 	w := NewWriteBuf(nil)
67 | 	c.Write(w, unsafe.Pointer(&myStruct{}))
68 | 
69 | 	if diff := cmp.Diff([]byte{0x00, 0x00}, w.Bytes()); diff != "" {
70 | 		t.Fatal(diff)
71 | 	}
72 | 
73 | 	var out myStruct
74 | 	if err := c.Read(NewReadBuf(w.Bytes()), unsafe.Pointer(&out)); err != nil {
75 | 		t.Fatal(err)
76 | 	}
77 | 
78 | 	if diff := cmp.Diff(myStruct{}, out); diff != "" {
79 | 		t.Fatal(diff)
80 | 	}
81 | }
82 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # avro 
2 | 
3 | [![GoDoc](https://godoc.org/github.com/philpearl/avro?status.svg)](https://godoc.org/github.com/philpearl/avro) 
4 | 
5 | avro is an encoder & decoder for Apache AVRO that decodes directly into Go structs and follows naming from JSON tags. It is intended primarily for decoding output from Google's Big Query. 
6 | 
7 | https://avro.apache.org/docs/1.8.1/spec.html
8 | 


--------------------------------------------------------------------------------
/record.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | 	"reflect"
 7 | 	"unsafe"
 8 | )
 9 | 
10 | type recordCodecField struct {
11 | 	// Codec for this field
12 | 	codec Codec
13 | 	// offset of this field within the struct representing the record. -1 if this
14 | 	// field is not in the struct and therefore should be skipped
15 | 	offset uintptr
16 | 	name   string
17 | }
18 | 
19 | type recordCodec struct {
20 | 	rtype  reflect.Type
21 | 	fields []recordCodecField
22 | }
23 | 
24 | func (rc *recordCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
25 | 	for i, f := range rc.fields {
26 | 		if f.offset == math.MaxUint64 {
27 | 			if err := f.codec.Skip(r); err != nil {
28 | 				return fmt.Errorf("failed to skip field %d %q of record. %w", i, f.name, err)
29 | 			}
30 | 		} else {
31 | 			if err := f.codec.Read(r, unsafe.Add(p, f.offset)); err != nil {
32 | 				return fmt.Errorf("failed reading field %d %q of record. %w", i, f.name, err)
33 | 			}
34 | 		}
35 | 	}
36 | 	return nil
37 | }
38 | 
39 | func (rc *recordCodec) Skip(r *ReadBuf) error {
40 | 	for i, f := range rc.fields {
41 | 		if err := f.codec.Skip(r); err != nil {
42 | 			return fmt.Errorf("failed to skip field %d %q of record. %w", i, f.name, err)
43 | 		}
44 | 	}
45 | 	return nil
46 | }
47 | 
48 | func (rc *recordCodec) New(r *ReadBuf) unsafe.Pointer {
49 | 	return r.Alloc(rc.rtype)
50 | }
51 | 
52 | func (rc *recordCodec) Omit(p unsafe.Pointer) bool {
53 | 	return false
54 | }
55 | 
56 | func (rc *recordCodec) Write(w *WriteBuf, p unsafe.Pointer) {
57 | 	for _, rf := range rc.fields {
58 | 		fp := unsafe.Add(p, rf.offset)
59 | 		rf.codec.Write(w, fp)
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/record_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | 	"unsafe"
  7 | 
  8 | 	"github.com/google/go-cmp/cmp"
  9 | 	"github.com/google/go-cmp/cmp/cmpopts"
 10 | )
 11 | 
 12 | func TestRecordCodec(t *testing.T) {
 13 | 	type record struct {
 14 | 		Name string `json:"name"`
 15 | 		Hat  string `json:"-"`
 16 | 	}
 17 | 
 18 | 	schema := Schema{
 19 | 		Type: "record",
 20 | 		Object: &SchemaObject{
 21 | 			Name: "Record",
 22 | 			Fields: []SchemaRecordField{
 23 | 				{
 24 | 					Name: "name",
 25 | 					Type: Schema{
 26 | 						Type: "string",
 27 | 					},
 28 | 				},
 29 | 				{
 30 | 					Name: "Hat",
 31 | 					Type: Schema{
 32 | 						Type: "string",
 33 | 					},
 34 | 				},
 35 | 			},
 36 | 		},
 37 | 	}
 38 | 
 39 | 	data := []byte{
 40 | 		6, 'j', 'i', 'm',
 41 | 		6, 'c', 'a', 't',
 42 | 	}
 43 | 
 44 | 	var r record
 45 | 	c, err := buildRecordCodec(schema, reflect.TypeOf(r))
 46 | 	if err != nil {
 47 | 		t.Fatal(err)
 48 | 	}
 49 | 
 50 | 	buf := NewReadBuf(data)
 51 | 	if err := c.Read(buf, unsafe.Pointer(&r)); err != nil {
 52 | 		t.Fatal(err)
 53 | 	}
 54 | 
 55 | 	if diff := cmp.Diff(record{Name: "jim"}, r); diff != "" {
 56 | 		t.Fatalf("record differs. %s", diff)
 57 | 	}
 58 | 
 59 | 	if buf.Len() != 0 {
 60 | 		t.Fatalf("unread data (%d)", buf.Len())
 61 | 	}
 62 | 
 63 | 	// Now test skip
 64 | 	buf.Reset(data)
 65 | 	if err := c.Skip(buf); err != nil {
 66 | 		t.Fatal(err)
 67 | 	}
 68 | 	if buf.Len() != 0 {
 69 | 		t.Fatalf("unread data (%d)", buf.Len())
 70 | 	}
 71 | }
 72 | 
 73 | func TestRecordRoundTrip(t *testing.T) {
 74 | 	type mustruct struct {
 75 | 		Name  string `json:"name"`
 76 | 		Hat   string `json:",omitempty"`
 77 | 		V     int
 78 | 		Q     float64
 79 | 		Bytes []byte
 80 | 		La    []int  `json:"la"`
 81 | 		W     int32  `json:"w,omitempty"`
 82 | 		Z     *int64 `json:"z"`
 83 | 		Mmm   map[string]string
 84 | 	}
 85 | 
 86 | 	var zval int64 = 1020202
 87 | 
 88 | 	tests := []struct {
 89 | 		name string
 90 | 		data mustruct
 91 | 	}{
 92 | 		{
 93 | 			name: "basic",
 94 | 			data: mustruct{
 95 | 				Name:  "jim",
 96 | 				Hat:   "cat",
 97 | 				V:     31,
 98 | 				Q:     3.14,
 99 | 				Bytes: []byte{1, 2, 3, 4},
100 | 				La:    []int{1, 2, 3, 4},
101 | 				W:     0,
102 | 				Z:     &zval,
103 | 				Mmm:   map[string]string{"foo": "bar", "baz": "qux"},
104 | 			},
105 | 		},
106 | 		{
107 | 			name: "empty",
108 | 			data: mustruct{},
109 | 		},
110 | 	}
111 | 
112 | 	for _, test := range tests {
113 | 		t.Run(test.name, func(t *testing.T) {
114 | 			s, err := SchemaForType(&test.data)
115 | 			if err != nil {
116 | 				t.Fatal(err)
117 | 			}
118 | 
119 | 			c, err := s.Codec(&test.data)
120 | 			if err != nil {
121 | 				t.Fatal(err)
122 | 			}
123 | 
124 | 			buf := NewWriteBuf(nil)
125 | 
126 | 			c.Write(buf, unsafe.Pointer(&test.data))
127 | 
128 | 			var actual mustruct
129 | 			r := NewReadBuf(buf.Bytes())
130 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
131 | 				t.Fatal(err)
132 | 			}
133 | 
134 | 			if diff := cmp.Diff(test.data, actual, cmpopts.EquateEmpty()); diff != "" {
135 | 				t.Fatalf("record differs. %s", diff)
136 | 			}
137 | 		})
138 | 	}
139 | }
140 | 


--------------------------------------------------------------------------------
/schema.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 
  7 | 	"github.com/go-json-experiment/json"
  8 | 	"github.com/go-json-experiment/json/jsontext"
  9 | )
 10 | 
 11 | // Schema is a representation of AVRO schema JSON. Primitive types populate Type
 12 | // only. UnionTypes populate Type and Union fields. All other types populate
 13 | // Type and a subset of Object fields.
 14 | type Schema struct {
 15 | 	Type   string
 16 | 	Object *SchemaObject
 17 | 	Union  []Schema
 18 | }
 19 | 
 20 | // Codec creates a codec for the given schema and output type
 21 | func (s Schema) Codec(out any) (Codec, error) {
 22 | 	typ := reflect.TypeOf(out)
 23 | 	if typ.Kind() == reflect.Ptr {
 24 | 		typ = typ.Elem()
 25 | 	}
 26 | 	if typ.Kind() != reflect.Struct {
 27 | 		return nil, fmt.Errorf("out must be a struct or pointer to a struct")
 28 | 	}
 29 | 
 30 | 	return buildCodec(s, typ, false)
 31 | }
 32 | 
 33 | func (s *Schema) Marshal() ([]byte, error) {
 34 | 	return json.Marshal(s)
 35 | }
 36 | 
 37 | // SchemaFromString decodes a JSON string into a Schema
 38 | func SchemaFromString(in string) (Schema, error) {
 39 | 	var schema Schema
 40 | 	if err := json.Unmarshal([]byte(in), &schema); err != nil {
 41 | 		return schema, fmt.Errorf("could not decode schema JSON. %w", err)
 42 | 	}
 43 | 	return schema, nil
 44 | }
 45 | 
 46 | // SchemaObject contains all the fields of more complex schema types
 47 | type SchemaObject struct {
 48 | 	Type        string `json:"type"`
 49 | 	LogicalType string `json:"logicalType,omitempty"`
 50 | 	Name        string `json:"name,omitempty"`
 51 | 	Namespace   string `json:"namespace,omitempty"`
 52 | 	// Fields in a record
 53 | 	Fields []SchemaRecordField `json:"fields,omitempty"`
 54 | 	// The type of each item in an array
 55 | 	Items Schema `json:"items,omitempty"`
 56 | 	// The value types of a map (keys are strings)
 57 | 	Values Schema `json:"values,omitempty"`
 58 | 	// The size of a fixed type
 59 | 	Size int `json:"size,omitempty"`
 60 | 	// The values of an enum
 61 | 	Symbols []string `json:"symbols,omitempty"`
 62 | }
 63 | 
 64 | // SchemaRecordField represents one field of a Record schema
 65 | type SchemaRecordField struct {
 66 | 	Name string `json:"name,omitempty"`
 67 | 	Type Schema `json:"type,omitempty"`
 68 | }
 69 | 
 70 | func (s *Schema) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
 71 | 	switch dec.PeekKind() {
 72 | 	case '"':
 73 | 		token, err := dec.ReadToken()
 74 | 		if err != nil {
 75 | 			return fmt.Errorf("reading string: %w", err)
 76 | 		}
 77 | 		s.Type = token.String()
 78 | 	case '[':
 79 | 		// This is an array of Schemas
 80 | 		s.Type = "union"
 81 | 		if err := json.UnmarshalDecode(dec, &s.Union); err != nil {
 82 | 			return fmt.Errorf("decoding union: %w", err)
 83 | 		}
 84 | 	case '{':
 85 | 		s.Object = &SchemaObject{}
 86 | 		// do we need to isolate these decoders?
 87 | 		if err := json.UnmarshalDecode(dec, s.Object); err != nil {
 88 | 			return fmt.Errorf("decoding union: %w", err)
 89 | 		}
 90 | 
 91 | 		s.Type = s.Object.Type
 92 | 		s.Object.Type = ""
 93 | 
 94 | 	default:
 95 | 		return fmt.Errorf("unexpected token unmarshalling schema: %s", dec.PeekKind())
 96 | 	}
 97 | 	return nil
 98 | }
 99 | 
100 | func (s *Schema) MarshalJSONTo(enc *jsontext.Encoder) error {
101 | 	switch {
102 | 	case s.Object != nil:
103 | 		if err := enc.WriteToken(jsontext.BeginObject); err != nil {
104 | 			return fmt.Errorf("writing begin object: %w", err)
105 | 		}
106 | 		if err := enc.WriteToken(jsontext.String("type")); err != nil {
107 | 			return fmt.Errorf("writing type key: %w", err)
108 | 		}
109 | 		if err := enc.WriteToken(jsontext.String(s.Type)); err != nil {
110 | 			return fmt.Errorf("writing type value: %w", err)
111 | 		}
112 | 		if s.Object.LogicalType != "" {
113 | 			if err := enc.WriteToken(jsontext.String("logicalType")); err != nil {
114 | 				return fmt.Errorf("writing logicalType key: %w", err)
115 | 			}
116 | 			if err := enc.WriteToken(jsontext.String(s.Object.LogicalType)); err != nil {
117 | 				return fmt.Errorf("writing logicalType value: %w", err)
118 | 			}
119 | 		}
120 | 		if s.Object.Name != "" {
121 | 			if err := enc.WriteToken(jsontext.String("name")); err != nil {
122 | 				return fmt.Errorf("writing name key: %w", err)
123 | 			}
124 | 			if err := enc.WriteToken(jsontext.String(s.Object.Name)); err != nil {
125 | 				return fmt.Errorf("writing name value: %w", err)
126 | 			}
127 | 		}
128 | 		if s.Object.Namespace != "" {
129 | 			if err := enc.WriteToken(jsontext.String("namespace")); err != nil {
130 | 				return fmt.Errorf("writing namespace key: %w", err)
131 | 			}
132 | 			if err := enc.WriteToken(jsontext.String(s.Object.Namespace)); err != nil {
133 | 				return fmt.Errorf("writing namespace value: %w", err)
134 | 			}
135 | 		}
136 | 		switch s.Type {
137 | 		case "record":
138 | 			if err := enc.WriteToken(jsontext.String("fields")); err != nil {
139 | 				return fmt.Errorf("writing fields key: %w", err)
140 | 			}
141 | 			if err := json.MarshalEncode(enc, s.Object.Fields); err != nil {
142 | 				return fmt.Errorf("encoding record fields: %w", err)
143 | 			}
144 | 		case "enum":
145 | 			if err := enc.WriteToken(jsontext.String("symbols")); err != nil {
146 | 				return fmt.Errorf("writing symbols key: %w", err)
147 | 			}
148 | 			if err := json.MarshalEncode(enc, s.Object.Symbols); err != nil {
149 | 				return fmt.Errorf("encoding enum symbols: %w", err)
150 | 			}
151 | 		case "array":
152 | 			if err := enc.WriteToken(jsontext.String("items")); err != nil {
153 | 				return fmt.Errorf("writing items key: %w", err)
154 | 			}
155 | 			if err := json.MarshalEncode(enc, s.Object.Items); err != nil {
156 | 				return fmt.Errorf("encoding items: %w", err)
157 | 			}
158 | 		case "map":
159 | 			if err := enc.WriteToken(jsontext.String("values")); err != nil {
160 | 				return fmt.Errorf("writing values key: %w", err)
161 | 			}
162 | 			if err := json.MarshalEncode(enc, s.Object.Values); err != nil {
163 | 				return fmt.Errorf("encoding values: %w", err)
164 | 			}
165 | 		case "fixed":
166 | 			if err := enc.WriteToken(jsontext.String("size")); err != nil {
167 | 				return fmt.Errorf("writing size key: %w", err)
168 | 			}
169 | 			if err := enc.WriteToken(jsontext.Int(int64(s.Object.Size))); err != nil {
170 | 				return fmt.Errorf("writing size value: %w", err)
171 | 			}
172 | 		}
173 | 		if err := enc.WriteToken(jsontext.EndObject); err != nil {
174 | 			return fmt.Errorf("writing end object: %w", err)
175 | 		}
176 | 
177 | 	case len(s.Union) != 0:
178 | 		if err := json.MarshalEncode(enc, s.Union); err != nil {
179 | 			return fmt.Errorf("encoding union: %w", err)
180 | 		}
181 | 	default:
182 | 		enc.WriteToken(jsontext.String(s.Type))
183 | 	}
184 | 	return nil
185 | }
186 | 


--------------------------------------------------------------------------------
/schema_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/go-json-experiment/json"
  7 | 	"github.com/google/go-cmp/cmp"
  8 | )
  9 | 
 10 | func TestSchemaEncoding(t *testing.T) {
 11 | 	data, err := avroFileSchema.Marshal()
 12 | 	if err != nil {
 13 | 		t.Fatal(err)
 14 | 	}
 15 | 
 16 | 	if diff := cmp.Diff(`{"type":"record","name":"org.apache.avro.file.Header","fields":[{"name":"magic","type":{"type":"fixed","name":"Magic","size":4}},{"name":"meta","type":{"type":"map","values":"bytes"}},{"name":"sync","type":{"type":"fixed","name":"Sync","size":16}}]}`, string(data)); diff != "" {
 17 | 		t.Fatalf("results differ. %s", diff)
 18 | 	}
 19 | 
 20 | 	var out Schema
 21 | 	if err := json.Unmarshal(data, &out); err != nil {
 22 | 		t.Fatal(err)
 23 | 	}
 24 | 	if diff := cmp.Diff(avroFileSchema, out); diff != "" {
 25 | 		t.Fatalf("results differ. %s", diff)
 26 | 	}
 27 | 
 28 | 	out2, err := SchemaFromString(string(data))
 29 | 	if err != nil {
 30 | 		t.Fatal(err)
 31 | 	}
 32 | 	if diff := cmp.Diff(avroFileSchema, out2); diff != "" {
 33 | 		t.Fatalf("results differ. %s", diff)
 34 | 	}
 35 | }
 36 | 
 37 | func TestUnmarshal(t *testing.T) {
 38 | 	// This tests we can unmarshal loads of different schemas correctly.
 39 | 	tests := []struct {
 40 | 		schema string
 41 | 		want   Schema
 42 | 	}{
 43 | 		{
 44 | 			schema: `{"type":"record","name":"test","fields":[{"name":"a","type":"int"}]}`,
 45 | 			want: Schema{
 46 | 				Type: "record",
 47 | 				Object: &SchemaObject{
 48 | 					Name: "test",
 49 | 					Fields: []SchemaRecordField{
 50 | 						{
 51 | 							Name: "a",
 52 | 							Type: Schema{
 53 | 								Type: "int",
 54 | 							},
 55 | 						},
 56 | 					},
 57 | 				},
 58 | 			},
 59 | 		},
 60 | 		{
 61 | 			schema: `{"type":"enum","name":"test","symbols":["a","b"]}`,
 62 | 			want: Schema{
 63 | 				Type: "enum",
 64 | 				Object: &SchemaObject{
 65 | 					Name:    "test",
 66 | 					Symbols: []string{"a", "b"},
 67 | 				},
 68 | 			},
 69 | 		},
 70 | 		{
 71 | 			schema: `{"type":"fixed","name":"test","size":4}`,
 72 | 			want: Schema{
 73 | 				Type: "fixed",
 74 | 				Object: &SchemaObject{
 75 | 					Name: "test",
 76 | 					Size: 4,
 77 | 				},
 78 | 			},
 79 | 		},
 80 | 		{
 81 | 			schema: `{"type":"array","items":"int"}`,
 82 | 			want: Schema{
 83 | 				Type: "array",
 84 | 				Object: &SchemaObject{
 85 | 					Items: Schema{
 86 | 						Type: "int",
 87 | 					},
 88 | 				},
 89 | 			},
 90 | 		},
 91 | 		{
 92 | 			schema: `{"type":"map","values":"int"}`,
 93 | 			want: Schema{
 94 | 				Type: "map",
 95 | 				Object: &SchemaObject{
 96 | 					Values: Schema{
 97 | 						Type: "int",
 98 | 					},
 99 | 				},
100 | 			},
101 | 		},
102 | 		{
103 | 			schema: `"null"`,
104 | 			want: Schema{
105 | 				Type: "null",
106 | 			},
107 | 		},
108 | 		{
109 | 			schema: `"boolean"`,
110 | 			want: Schema{
111 | 				Type: "boolean",
112 | 			},
113 | 		},
114 | 		{
115 | 			schema: `"int"`,
116 | 			want: Schema{
117 | 				Type: "int",
118 | 			},
119 | 		},
120 | 		{
121 | 			schema: `"long"`,
122 | 			want: Schema{
123 | 				Type: "long",
124 | 			},
125 | 		},
126 | 		{
127 | 			schema: `"float"`,
128 | 			want: Schema{
129 | 				Type: "float",
130 | 			},
131 | 		},
132 | 		{
133 | 			schema: `"double"`,
134 | 			want: Schema{
135 | 				Type: "double",
136 | 			},
137 | 		},
138 | 		{
139 | 			schema: `"bytes"`,
140 | 			want: Schema{
141 | 				Type: "bytes",
142 | 			},
143 | 		},
144 | 		{
145 | 			schema: `"string"`,
146 | 			want: Schema{
147 | 				Type: "string",
148 | 			},
149 | 		},
150 | 
151 | 		{
152 | 			schema: `["null","int"]`,
153 | 			want: Schema{
154 | 				Type: "union",
155 | 				Union: []Schema{
156 | 					{
157 | 						Type: "null",
158 | 					},
159 | 					{
160 | 						Type: "int",
161 | 					},
162 | 				},
163 | 			},
164 | 		},
165 | 	}
166 | 
167 | 	for _, test := range tests {
168 | 		var got Schema
169 | 		if err := json.Unmarshal([]byte(test.schema), &got); err != nil {
170 | 			t.Fatalf("failed to unmarshal %s. %v", test.schema, err)
171 | 		}
172 | 		if diff := cmp.Diff(test.want, got); diff != "" {
173 | 			t.Fatalf("results differ. %s", diff)
174 | 		}
175 | 
176 | 		data, err := json.Marshal(&test.want)
177 | 		if err != nil {
178 | 			t.Fatalf("failed to marshal. %v", err)
179 | 		}
180 | 		if string(data) != test.schema {
181 | 			t.Fatalf("expected %s got %s", test.schema, string(data))
182 | 		}
183 | 	}
184 | }
185 | 


--------------------------------------------------------------------------------
/string.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"reflect"
 6 | 	"unsafe"
 7 | )
 8 | 
 9 | // StringCodec is a decoder for strings
10 | type StringCodec struct{ omitEmpty bool }
11 | 
12 | func (StringCodec) Read(r *ReadBuf, ptr unsafe.Pointer) error {
13 | 	// ptr is a *string
14 | 	l, err := r.Varint()
15 | 	if err != nil {
16 | 		return fmt.Errorf("failed to read length of string. %w", err)
17 | 	}
18 | 	if l < 0 {
19 | 		return fmt.Errorf("cannot make string with length %d", l)
20 | 	}
21 | 	data, err := r.NextAsString(int(l))
22 | 	if err != nil {
23 | 		return fmt.Errorf("failed to read %d bytes of string body. %w", l, err)
24 | 	}
25 | 	*(*string)(ptr) = data
26 | 	return nil
27 | }
28 | 
29 | func (StringCodec) Skip(r *ReadBuf) error {
30 | 	l, err := r.Varint()
31 | 	if err != nil {
32 | 		return fmt.Errorf("failed to read length of string. %w", err)
33 | 	}
34 | 	return skip(r, l)
35 | }
36 | 
37 | var stringType = reflect.TypeFor[string]()
38 | 
39 | func (StringCodec) New(r *ReadBuf) unsafe.Pointer {
40 | 	return r.Alloc(stringType)
41 | }
42 | 
43 | func (sc StringCodec) Omit(p unsafe.Pointer) bool {
44 | 	return sc.omitEmpty && len(*(*string)(p)) == 0
45 | }
46 | 
47 | func (StringCodec) Write(w *WriteBuf, p unsafe.Pointer) {
48 | 	s := *(*string)(p)
49 | 	w.Varint(int64(len(s)))
50 | 	w.Write([]byte(s))
51 | }
52 | 


--------------------------------------------------------------------------------
/string_test.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | func TestStringCodec(t *testing.T) {
 9 | 	tests := []struct {
10 | 		name string
11 | 		data []byte
12 | 		exp  string
13 | 	}{
14 | 		{
15 | 			name: "empty",
16 | 			data: []byte{0},
17 | 			exp:  "",
18 | 		},
19 | 		{
20 | 			name: "hello",
21 | 
22 | 			data: []byte{10, 'h', 'e', 'l', 'l', 'o'},
23 | 			exp:  "hello",
24 | 		},
25 | 	}
26 | 	c := StringCodec{}
27 | 	for _, test := range tests {
28 | 		t.Run(test.name, func(t *testing.T) {
29 | 			r := NewReadBuf(test.data)
30 | 			var actual string
31 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
32 | 				t.Fatal(err)
33 | 			}
34 | 			if test.exp != actual {
35 | 				t.Fatalf("%q does not match expected %q", actual, test.exp)
36 | 			}
37 | 			if r.Len() != 0 {
38 | 				t.Fatalf("%d bytes left", r.Len())
39 | 			}
40 | 		})
41 | 
42 | 		t.Run(test.name+" skip", func(t *testing.T) {
43 | 			r := NewReadBuf(test.data)
44 | 
45 | 			if err := c.Skip(r); err != nil {
46 | 				t.Fatal(err)
47 | 			}
48 | 			if r.Len() != 0 {
49 | 				t.Fatalf("%d bytes left", r.Len())
50 | 			}
51 | 		})
52 | 
53 | 	}
54 | }
55 | 
56 | func TestStringRoundTrip(t *testing.T) {
57 | 	tests := []struct {
58 | 		name string
59 | 		in   string
60 | 	}{
61 | 		{
62 | 			name: "empty",
63 | 			in:   "",
64 | 		},
65 | 		{
66 | 			name: "hello",
67 | 			in:   "hello",
68 | 		},
69 | 		{
70 | 			name: "unicode",
71 | 			in:   "こんにちは",
72 | 		},
73 | 
74 | 		{
75 | 			name: "emoji",
76 | 			in:   "👋",
77 | 		},
78 | 	}
79 | 
80 | 	c := StringCodec{}
81 | 	for _, test := range tests {
82 | 		t.Run(test.name, func(t *testing.T) {
83 | 			w := NewWriteBuf(nil)
84 | 			c.Write(w, unsafe.Pointer(&test.in))
85 | 			var actual string
86 | 			r := NewReadBuf(w.Bytes())
87 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
88 | 				t.Fatal(err)
89 | 			}
90 | 			if test.in != actual {
91 | 				t.Fatalf("%q does not match expected %q", actual, test.in)
92 | 			}
93 | 		})
94 | 	}
95 | }
96 | 


--------------------------------------------------------------------------------
/testdata/avro1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philpearl/avro/bd3141c9da8fcf1cc4d374434d25f2a019fb2d68/testdata/avro1


--------------------------------------------------------------------------------
/time/parse.go:
--------------------------------------------------------------------------------
  1 | package time
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 	"time"
  8 | )
  9 | 
 10 | // parseTime parses an RFC3339 timestamp. It exists because custom parsing of
 11 | // this particular timezone is faster than using time.Parse, and parsing string
 12 | // timestamps comes up rather more often than is ideal
 13 | func parseTime(in string) (time.Time, error) {
 14 | 	if len(in) < 10 {
 15 | 		return time.Time{}, fmt.Errorf("expect time string to be at least 10 characters long %q", in)
 16 | 	}
 17 | 	if in[4] != '-' || in[7] != '-' {
 18 | 		return time.Time{}, fmt.Errorf("date not formatted as expected, missing -")
 19 | 	}
 20 | 
 21 | 	// "2006-01-02T15:04:05Z07:00"
 22 | 	y, err := atoi4(in[:4])
 23 | 	if err != nil {
 24 | 		return time.Time{}, fmt.Errorf("could not parse year %q: %w", in[:4], err)
 25 | 	}
 26 | 	m, err := atoi2(in[5:7])
 27 | 	if err != nil {
 28 | 		return time.Time{}, fmt.Errorf("could not parse month %q: %w", in[5:7], err)
 29 | 	}
 30 | 	d, err := atoi2(in[8:10])
 31 | 	if err != nil {
 32 | 		return time.Time{}, fmt.Errorf("could not parse day %q: %w", in[8:10], err)
 33 | 	}
 34 | 
 35 | 	if len(in) == 10 {
 36 | 		return time.Date(y, time.Month(m), d, 0, 0, 0, 0, time.UTC), nil
 37 | 	}
 38 | 
 39 | 	if len(in) < 20 {
 40 | 		return time.Time{}, fmt.Errorf("expect time string to be at least 20 characters long if greater than 10 characters %q", in)
 41 | 	}
 42 | 
 43 | 	if in[10] != 'T' {
 44 | 		return time.Time{}, fmt.Errorf("time not formatted as expected, missing T")
 45 | 	}
 46 | 
 47 | 	if in[13] != ':' || in[16] != ':' {
 48 | 		return time.Time{}, fmt.Errorf("time not formatted as expected, missing ':': %q", in)
 49 | 	}
 50 | 
 51 | 	h, err := atoi2(in[11:13])
 52 | 	if err != nil {
 53 | 		return time.Time{}, fmt.Errorf("could not parse hour %q: %w", in[11:13], err)
 54 | 	}
 55 | 	min, err := atoi2(in[14:16])
 56 | 	if err != nil {
 57 | 		return time.Time{}, fmt.Errorf("could not parse minute %q: %w", in[14:16], err)
 58 | 	}
 59 | 	s, err := atoi2(in[17:19])
 60 | 	if err != nil {
 61 | 		return time.Time{}, fmt.Errorf("could not parse seconds %q: %w", in[17:19], err)
 62 | 	}
 63 | 
 64 | 	remaining := in[19:]
 65 | 	c := remaining[0]
 66 | 
 67 | 	var nsec int
 68 | 	if c == '.' || c == ',' {
 69 | 		remaining = remaining[1:]
 70 | 		// Fractional seconds!
 71 | 		var val, i int
 72 | 		var c rune
 73 | 		var mult int = 1e9
 74 | 		for i, c = range remaining {
 75 | 			if c >= '0' && c <= '9' {
 76 | 				val = val*10 + int(c-'0')
 77 | 				mult /= 10
 78 | 			} else {
 79 | 				i -= 1
 80 | 				break
 81 | 			}
 82 | 		}
 83 | 		nsec = val * mult
 84 | 		remaining = remaining[i+1:]
 85 | 		if len(remaining) == 0 {
 86 | 			return time.Time{}, fmt.Errorf("too short to contain timezone")
 87 | 		}
 88 | 	}
 89 | 
 90 | 	c = remaining[0]
 91 | 	remaining = remaining[1:]
 92 | 	var tz *time.Location
 93 | 	if c == 'Z' {
 94 | 		tz = time.UTC
 95 | 	} else {
 96 | 		var sign int
 97 | 		switch c {
 98 | 		case '+':
 99 | 			sign = 1
100 | 		case '-':
101 | 			sign = -1
102 | 		default:
103 | 			return time.Time{}, fmt.Errorf("TZ must start with + or -, not %c", c)
104 | 		}
105 | 		if len(remaining) < 5 {
106 | 			return time.Time{}, fmt.Errorf("TZ info wrong length")
107 | 		}
108 | 		if remaining[2] != ':' {
109 | 			return time.Time{}, fmt.Errorf("TZ info does not include ':'")
110 | 		}
111 | 		tzh, err := atoi2(remaining[:2])
112 | 		if err != nil {
113 | 			return time.Time{}, fmt.Errorf("could not parse timezone offset hours %q: %w", remaining[:2], err)
114 | 		}
115 | 		tzm, err := atoi2(remaining[3:5])
116 | 		if err != nil {
117 | 			return time.Time{}, fmt.Errorf("could not parse timezone offset minutes %q: %w", remaining[3:5], err)
118 | 		}
119 | 
120 | 		tz = getTimezone(sign * (tzh*60*60 + tzm*60))
121 | 
122 | 		remaining = remaining[5:]
123 | 	}
124 | 
125 | 	if len(remaining) != 0 {
126 | 		return time.Time{}, fmt.Errorf("unparsed data remains after parsing complete (%q)", remaining)
127 | 	}
128 | 
129 | 	return time.Date(y, time.Month(m), d, h, min, s, nsec, tz), nil
130 | }
131 | 
132 | var (
133 | 	tzLock sync.Mutex
134 | 	tzMap  = make(map[int]*time.Location)
135 | )
136 | 
137 | func getTimezone(offset int) *time.Location {
138 | 	tzLock.Lock()
139 | 	defer tzLock.Unlock()
140 | 	tz, ok := tzMap[offset]
141 | 	if !ok {
142 | 		tz = time.FixedZone("", offset)
143 | 		tzMap[offset] = tz
144 | 	}
145 | 	return tz
146 | }
147 | 
148 | var errCannotParseNumber = errors.New("couldn't parse number")
149 | 
150 | func atoi2(in string) (int, error) {
151 | 	_ = in[1]
152 | 	a, b := int(in[0]-'0'), int(in[1]-'0')
153 | 	if a < 0 || a > 9 || b < 0 || b > 9 {
154 | 		return 0, errCannotParseNumber
155 | 	}
156 | 	return a*10 + b, nil
157 | }
158 | 
159 | func atoi4(in string) (int, error) {
160 | 	_ = in[3]
161 | 	a, b, c, d := int(in[0]-'0'), int(in[1]-'0'), int(in[2]-'0'), int(in[3]-'0')
162 | 	if a < 0 || a > 9 || b < 0 || b > 9 || c < 0 || c > 9 || d < 0 || d > 9 {
163 | 		return 0, errCannotParseNumber
164 | 	}
165 | 	return a*1000 + b*100 + c*10 + d, nil
166 | }
167 | 


--------------------------------------------------------------------------------
/time/parse_test.go:
--------------------------------------------------------------------------------
  1 | package time
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | )
  7 | 
  8 | func TestParse(t *testing.T) {
  9 | 	tests := []string{
 10 | 		"2006-01-02T13:37:42Z",
 11 | 		"2006-01-02T13:37:42,326Z",
 12 | 		"2006-01-02T13:37:42.0Z",
 13 | 		"2006-01-02T13:37:42.0000Z",
 14 | 		"2006-01-02T13:37:42,326876Z",
 15 | 		"2006-01-02T13:37:42,326876123Z",
 16 | 		"2006-01-02T13:37:42.326876123Z",
 17 | 		"2006-01-02T13:37:42,326+08:00",
 18 | 		"2006-01-02T13:37:42.326-08:00",
 19 | 		"2006-01-02T13:37:42.326-08:21",
 20 | 		"2006-01-02T13:37:42.326+08:21",
 21 | 		"2021-09-30T08:28:33.137578Z",
 22 | 	}
 23 | 
 24 | 	for _, test := range tests {
 25 | 		t.Run(test, func(t *testing.T) {
 26 | 			exp, err := time.Parse(time.RFC3339, test)
 27 | 			if err != nil {
 28 | 				t.Fatal(err)
 29 | 			}
 30 | 
 31 | 			actual, err := parseTime(test)
 32 | 			if err != nil {
 33 | 				t.Fatal(err)
 34 | 			}
 35 | 
 36 | 			if !exp.Equal(actual) {
 37 | 				t.Fatalf("parsed time incorrect. Got %s for %s", actual, test)
 38 | 			}
 39 | 		})
 40 | 	}
 41 | }
 42 | 
 43 | func TestParseDate(t *testing.T) {
 44 | 	tests := []string{
 45 | 		"2021-09-30",
 46 | 		"1970-01-01",
 47 | 	}
 48 | 
 49 | 	for _, test := range tests {
 50 | 		t.Run(test, func(t *testing.T) {
 51 | 			exp, err := time.Parse(time.DateOnly, test)
 52 | 			if err != nil {
 53 | 				t.Fatal(err)
 54 | 			}
 55 | 
 56 | 			actual, err := parseTime(test)
 57 | 			if err != nil {
 58 | 				t.Fatal(err)
 59 | 			}
 60 | 
 61 | 			if !exp.Equal(actual) {
 62 | 				t.Fatalf("parsed time incorrect. Got %s for %s", actual, test)
 63 | 			}
 64 | 		})
 65 | 	}
 66 | }
 67 | 
 68 | func TestParseFails(t *testing.T) {
 69 | 	tests := []string{
 70 | 		"",
 71 | 		"2006",
 72 | 		"2006-01-02T13:37:42",
 73 | 		"2006-01-02T13:37:42,326",
 74 | 		// "2006-01-02T13:37:42.Z",
 75 | 		"2006:01-02T13:37:42Z",
 76 | 		"2006-01:02T13:37:42Z",
 77 | 		"2006-01-02 13:37:42Z",
 78 | 		"2006-01-02T13-37:42Z",
 79 | 		"2006-01-02T13:37-42Z",
 80 | 		"200a-01-02T13:37:42Z",
 81 | 		"2006-0b-02T13:37:42Z",
 82 | 		"2006-01-0cT13:37:42Z",
 83 | 		"2006-01-02T1d:37:42Z",
 84 | 		"2006-01-02T13:3e:42Z",
 85 | 		"2006-01-02T13:37:4fZ",
 86 | 		"2006-01-02T13:37:42.727",
 87 | 		"2006-01-02T13:37:42x08:00",
 88 | 		"2006-01-02T13:37:42+08x00",
 89 | 		"2006-01-02T13:37:42+0a:00",
 90 | 		"2006-01-02T13:37:42+08:0a",
 91 | 		"2006-01-02T13:37:42+08:0",
 92 | 		"2006-01-02T13:37:42+08:00hello",
 93 | 		"2006-01-02§13:37:42+08:00",
 94 | 	}
 95 | 
 96 | 	for _, test := range tests {
 97 | 		t.Run(test, func(t *testing.T) {
 98 | 			if _, err := time.Parse(time.RFC3339, test); err == nil {
 99 | 				t.Errorf("%q parsed OK for the standard library", test)
100 | 			}
101 | 
102 | 			if _, err := parseTime(test); err == nil {
103 | 				t.Errorf("%q parsed OK", test)
104 | 			}
105 | 		})
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/time/time.go:
--------------------------------------------------------------------------------
  1 | // Package time contains avro decoders for time.Time.
  2 | package time
  3 | 
  4 | import (
  5 | 	"fmt"
  6 | 	"reflect"
  7 | 	"time"
  8 | 	"unsafe"
  9 | 
 10 | 	"github.com/philpearl/avro"
 11 | )
 12 | 
 13 | // RegisterCodecs makes the codecs in this package available to avro
 14 | func RegisterCodecs() {
 15 | 	avro.Register(reflect.TypeFor[time.Time](), buildTimeCodec)
 16 | 	avro.RegisterSchema(reflect.TypeFor[time.Time](), avro.Schema{
 17 | 		Type: "union",
 18 | 		Union: []avro.Schema{
 19 | 			{Type: "null"},
 20 | 			{Type: "string"},
 21 | 		},
 22 | 	})
 23 | }
 24 | 
 25 | func buildTimeCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) {
 26 | 	// If in future we want to decode an integer unix epoc time we can add a
 27 | 	// switch here
 28 | 	switch schema.Type {
 29 | 	case "string":
 30 | 		return StringCodec{}, nil
 31 | 	case "long":
 32 | 		var c LongCodec
 33 | 		c.mult = 1
 34 | 		if schema.Object != nil {
 35 | 			switch schema.Object.LogicalType {
 36 | 			case "timestamp-micros":
 37 | 				c.mult = 1000
 38 | 			case "timestamp-millis":
 39 | 				c.mult = 1e6
 40 | 			}
 41 | 		}
 42 | 		return c, nil
 43 | 	case "int":
 44 | 		if schema.Object != nil {
 45 | 			switch schema.Object.LogicalType {
 46 | 			// BigQuery claims to use this for it's DATE type but doesn't. We've
 47 | 			// seen DATEs as strings with no logical type. Format is 2006-01-02
 48 | 			case "date":
 49 | 				return DateCodec{}, nil
 50 | 			}
 51 | 		}
 52 | 	}
 53 | 
 54 | 	return nil, fmt.Errorf("time.Time codec works only with string and long schema, not %q", schema.Type)
 55 | }
 56 | 
 57 | // DateCodec is a decoder from an AVRO date logical type, which is a number of
 58 | // days since 1 Jan 1970
 59 | type DateCodec struct{ avro.Int32Codec }
 60 | 
 61 | func (c DateCodec) Read(r *avro.ReadBuf, p unsafe.Pointer) error {
 62 | 	var l int64
 63 | 	if err := c.Int32Codec.Read(r, unsafe.Pointer(&l)); err != nil {
 64 | 		return err
 65 | 	}
 66 | 
 67 | 	*(*time.Time)(p) = time.Date(1970, 1, 1+int(l), 0, 0, 0, 0, time.UTC)
 68 | 	return nil
 69 | }
 70 | 
 71 | // New create a pointer to a new time.Time
 72 | func (c DateCodec) New(r *avro.ReadBuf) unsafe.Pointer {
 73 | 	return r.Alloc(timeType)
 74 | }
 75 | 
 76 | func (c DateCodec) Omit(p unsafe.Pointer) bool {
 77 | 	t := (*time.Time)(p)
 78 | 	return t.IsZero()
 79 | }
 80 | 
 81 | func (c DateCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
 82 | 	t := *(*time.Time)(p)
 83 | 	// TODO: wrangle this into Time.AppendFormat?
 84 | 	day := int32(t.Unix() / (60 * 60 * 24))
 85 | 
 86 | 	c.Int32Codec.Write(w, unsafe.Pointer(&day))
 87 | }
 88 | 
 89 | // StringCodec is a decoder from an AVRO string with RFC3339 encoding to a time.Time
 90 | type StringCodec struct{ avro.StringCodec }
 91 | 
 92 | func (c StringCodec) Read(r *avro.ReadBuf, p unsafe.Pointer) error {
 93 | 	// Can we do better than using the underlying string codec?
 94 | 	l, err := r.Varint()
 95 | 	if err != nil {
 96 | 		return fmt.Errorf("failed to read length of time: %w", err)
 97 | 	}
 98 | 
 99 | 	if l == 0 {
100 | 		// pragmatically better to just leave the time alone if there's no
101 | 		// content to parse.
102 | 		return nil
103 | 	}
104 | 
105 | 	data, err := r.Next(int(l))
106 | 	if err != nil {
107 | 		return fmt.Errorf("failed to read %d bytes of time string body: %w", l, err)
108 | 	}
109 | 
110 | 	s := *(*string)(unsafe.Pointer(&data))
111 | 	t, err := parseTime(s)
112 | 	if err != nil {
113 | 		return fmt.Errorf("failed to parse time: %w", err)
114 | 	}
115 | 	*(*time.Time)(p) = t
116 | 	return nil
117 | }
118 | 
119 | var timeType = reflect.TypeFor[time.Time]()
120 | 
121 | // New create a pointer to a new time.Time
122 | func (c StringCodec) New(r *avro.ReadBuf) unsafe.Pointer {
123 | 	return r.Alloc(timeType)
124 | }
125 | 
126 | func (c StringCodec) Omit(p unsafe.Pointer) bool {
127 | 	t := (*time.Time)(p)
128 | 	return t.IsZero()
129 | }
130 | 
131 | func (c StringCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
132 | 	t := *(*time.Time)(p)
133 | 	// TODO: wrangle this into Time.AppendFormat?
134 | 	s := t.Format(time.RFC3339Nano)
135 | 
136 | 	c.StringCodec.Write(w, unsafe.Pointer(&s))
137 | }
138 | 
139 | // LongCodec is a decoder from an AVRO long where the time is encoded as
140 | // nanoseconds since the UNIX epoch
141 | type LongCodec struct {
142 | 	avro.Int64Codec
143 | 	mult int64
144 | }
145 | 
146 | func (c LongCodec) Read(r *avro.ReadBuf, p unsafe.Pointer) error {
147 | 	var l int64
148 | 	if err := c.Int64Codec.Read(r, unsafe.Pointer(&l)); err != nil {
149 | 		return err
150 | 	}
151 | 
152 | 	*(*time.Time)(p) = time.Unix(0, l*c.mult).UTC()
153 | 	return nil
154 | }
155 | 
156 | // New create a pointer to a new time.Time
157 | func (c LongCodec) New(r *avro.ReadBuf) unsafe.Pointer {
158 | 	return r.Alloc(timeType)
159 | }
160 | 
161 | func (c LongCodec) Omit(p unsafe.Pointer) bool {
162 | 	t := (*time.Time)(p)
163 | 	return t.IsZero()
164 | }
165 | 
166 | func (c LongCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) {
167 | 	t := *(*time.Time)(p)
168 | 	l := t.UnixMicro()
169 | 
170 | 	c.Int64Codec.Write(w, unsafe.Pointer(&l))
171 | }
172 | 


--------------------------------------------------------------------------------
/time/time_test.go:
--------------------------------------------------------------------------------
  1 | package time
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"strconv"
  6 | 	"testing"
  7 | 	"time"
  8 | 	"unsafe"
  9 | 
 10 | 	"github.com/google/go-cmp/cmp"
 11 | 	"github.com/philpearl/avro"
 12 | )
 13 | 
 14 | func TestTime(t *testing.T) {
 15 | 	now := time.Now()
 16 | 	ts := now.Format(time.RFC3339Nano)
 17 | 	data := []byte{byte(len(ts) << 1)}
 18 | 	data = append(data, ts...)
 19 | 
 20 | 	b := avro.NewReadBuf(data)
 21 | 	c := StringCodec{}
 22 | 
 23 | 	var out time.Time
 24 | 	if err := c.Read(b, unsafe.Pointer(&out)); err != nil {
 25 | 		t.Fatal(err)
 26 | 	}
 27 | 
 28 | 	if !out.Equal(now) {
 29 | 		t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(out))
 30 | 	}
 31 | }
 32 | 
 33 | func TestTimeEmpty(t *testing.T) {
 34 | 	b := avro.NewReadBuf([]byte{0})
 35 | 	c := StringCodec{}
 36 | 
 37 | 	var out time.Time
 38 | 	if err := c.Read(b, unsafe.Pointer(&out)); err != nil {
 39 | 		t.Fatal(err)
 40 | 	}
 41 | 
 42 | 	if !out.IsZero() {
 43 | 		t.Fatalf("times %s but expected zero", out)
 44 | 	}
 45 | }
 46 | 
 47 | func TestTimePtr(t *testing.T) {
 48 | 	now := time.Now()
 49 | 	ts := now.Format(time.RFC3339Nano)
 50 | 	data := []byte{byte(len(ts) << 1)}
 51 | 	data = append(data, ts...)
 52 | 
 53 | 	b := avro.NewReadBuf(data)
 54 | 
 55 | 	c := avro.PointerCodec{
 56 | 		Codec: StringCodec{},
 57 | 	}
 58 | 
 59 | 	var out *time.Time
 60 | 	if err := c.Read(b, unsafe.Pointer(&out)); err != nil {
 61 | 		t.Fatal(err)
 62 | 	}
 63 | 
 64 | 	if !out.Equal(now) {
 65 | 		t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(*out))
 66 | 	}
 67 | }
 68 | 
 69 | func TestTimeLong(t *testing.T) {
 70 | 	now := time.Now()
 71 | 	data := make([]byte, binary.MaxVarintLen64)
 72 | 	l := binary.PutVarint(data, now.UnixNano())
 73 | 	data = data[:l]
 74 | 
 75 | 	b := avro.NewReadBuf(data)
 76 | 	c := LongCodec{mult: 1}
 77 | 
 78 | 	var out time.Time
 79 | 	if err := c.Read(b, unsafe.Pointer(&out)); err != nil {
 80 | 		t.Fatal(err)
 81 | 	}
 82 | 
 83 | 	if !out.Equal(now) {
 84 | 		t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(out))
 85 | 	}
 86 | }
 87 | 
 88 | func TestDate(t *testing.T) {
 89 | 	t0 := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
 90 | 	for _, l := range []int{0, 1, 573} {
 91 | 		t.Run(strconv.Itoa(l), func(t *testing.T) {
 92 | 			exp := t0.AddDate(0, 0, l)
 93 | 			data := make([]byte, binary.MaxVarintLen64)
 94 | 			l := binary.PutVarint(data, int64(l))
 95 | 			data = data[:l]
 96 | 
 97 | 			b := avro.NewReadBuf(data)
 98 | 			c := DateCodec{}
 99 | 
100 | 			var out time.Time
101 | 			if err := c.Read(b, unsafe.Pointer(&out)); err != nil {
102 | 				t.Fatal(err)
103 | 			}
104 | 
105 | 			if !out.Equal(exp) {
106 | 				t.Fatalf("times %s & %s differ by %s", exp, out, exp.Sub(out))
107 | 			}
108 | 		})
109 | 	}
110 | }
111 | 
112 | func TestTimeLongPtr(t *testing.T) {
113 | 	now := time.Now()
114 | 	data := make([]byte, binary.MaxVarintLen64)
115 | 	l := binary.PutVarint(data, now.UnixNano())
116 | 	data = data[:l]
117 | 
118 | 	b := avro.NewReadBuf(data)
119 | 
120 | 	c := avro.PointerCodec{
121 | 		Codec: LongCodec{mult: 1},
122 | 	}
123 | 
124 | 	var out *time.Time
125 | 	if err := c.Read(b, unsafe.Pointer(&out)); err != nil {
126 | 		t.Fatal(err)
127 | 	}
128 | 
129 | 	if !out.Equal(now) {
130 | 		t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(*out))
131 | 	}
132 | }
133 | 
134 | func BenchmarkTime(b *testing.B) {
135 | 	now := time.Now().UTC()
136 | 	ts := now.Format(time.RFC3339Nano)
137 | 	data := []byte{byte(len(ts) << 1)}
138 | 	data = append(data, ts...)
139 | 
140 | 	buf := avro.NewReadBuf(data)
141 | 	c := StringCodec{}
142 | 
143 | 	b.ReportAllocs()
144 | 	b.ResetTimer()
145 | 
146 | 	for b.Loop() {
147 | 		buf.Reset(data)
148 | 
149 | 		var out time.Time
150 | 		if err := c.Read(buf, unsafe.Pointer(&out)); err != nil {
151 | 			b.Fatal(err)
152 | 		}
153 | 	}
154 | }
155 | 
156 | func BenchmarkLongTime(b *testing.B) {
157 | 	now := time.Now()
158 | 	data := make([]byte, binary.MaxVarintLen64)
159 | 	l := binary.PutVarint(data, now.UnixNano())
160 | 	data = data[:l]
161 | 
162 | 	buf := avro.NewReadBuf(data)
163 | 	c := LongCodec{mult: 1}
164 | 
165 | 	b.ReportAllocs()
166 | 	b.ResetTimer()
167 | 
168 | 	for b.Loop() {
169 | 		buf.Reset(data)
170 | 
171 | 		var out time.Time
172 | 		if err := c.Read(buf, unsafe.Pointer(&out)); err != nil {
173 | 			b.Fatal(err)
174 | 		}
175 | 	}
176 | }
177 | 
178 | func BenchmarkParseTime(b *testing.B) {
179 | 	ts := time.Now().UTC().Format(time.RFC3339Nano)
180 | 	b.ResetTimer()
181 | 	b.ReportAllocs()
182 | 
183 | 	for b.Loop() {
184 | 		_, err := time.Parse(time.RFC3339, ts)
185 | 		if err != nil {
186 | 			b.Fatal(err)
187 | 		}
188 | 	}
189 | }
190 | 
191 | func BenchmarkParseTimeOurselves(b *testing.B) {
192 | 	ts := time.Now().UTC().Format(time.RFC3339Nano)
193 | 	b.ResetTimer()
194 | 	b.ReportAllocs()
195 | 
196 | 	for b.Loop() {
197 | 		_, err := parseTime(ts)
198 | 		if err != nil {
199 | 			b.Fatal(err)
200 | 		}
201 | 	}
202 | }
203 | 
204 | func TestASchema(t *testing.T) {
205 | 	RegisterCodecs()
206 | 
207 | 	s, err := avro.SchemaFromString(`{
208 |   "type": "record",
209 |   "name": "Root",
210 |   "fields": [
211 |     {
212 |       "name": "timestamp",
213 |       "type": [
214 |         "null",
215 |         {
216 |           "type": "long",
217 |           "logicalType": "timestamp-micros"
218 |         }
219 |       ],
220 |       "default": null
221 |     }
222 | 	]
223 | }
224 | 	`)
225 | 	if err != nil {
226 | 		t.Fatal(err)
227 | 	}
228 | 
229 | 	if diff := cmp.Diff(avro.Schema{
230 | 		Type: "record",
231 | 		Object: &avro.SchemaObject{
232 | 			Name: "Root",
233 | 			Fields: []avro.SchemaRecordField{
234 | 				{
235 | 					Name: "timestamp",
236 | 					Type: avro.Schema{
237 | 						Type: "union",
238 | 						Union: []avro.Schema{
239 | 							{Type: "null"},
240 | 							{
241 | 								Type:   "long",
242 | 								Object: &avro.SchemaObject{LogicalType: "timestamp-micros"},
243 | 							},
244 | 						},
245 | 					},
246 | 				},
247 | 			},
248 | 		},
249 | 	}, s); diff != "" {
250 | 		t.Fatal(diff)
251 | 	}
252 | 
253 | 	type Thing struct {
254 | 		Timestamp time.Time `json:"timestamp"`
255 | 	}
256 | 
257 | 	if _, err := s.Codec(Thing{}); err != nil {
258 | 		t.Fatal(err)
259 | 	}
260 | }
261 | 


--------------------------------------------------------------------------------
/union.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"unsafe"
  6 | )
  7 | 
  8 | type unionCodec struct {
  9 | 	codecs []Codec
 10 | }
 11 | 
 12 | func (u *unionCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
 13 | 	index, err := r.Varint()
 14 | 	if err != nil {
 15 | 		return fmt.Errorf("failed reading union selector. %w", err)
 16 | 	}
 17 | 	if index < 0 || index >= int64(len(u.codecs)) {
 18 | 		return fmt.Errorf("union selector %d out of range (%d types)", index, len(u.codecs))
 19 | 	}
 20 | 
 21 | 	c := u.codecs[index]
 22 | 	return c.Read(r, p)
 23 | }
 24 | 
 25 | func (u *unionCodec) Skip(r *ReadBuf) error {
 26 | 	index, err := r.Varint()
 27 | 	if err != nil {
 28 | 		return fmt.Errorf("failed reading union selector. %w", err)
 29 | 	}
 30 | 	if index < 0 || index >= int64(len(u.codecs)) {
 31 | 		return fmt.Errorf("union selector %d out of range (%d types)", index, len(u.codecs))
 32 | 	}
 33 | 
 34 | 	c := u.codecs[index]
 35 | 	return c.Skip(r)
 36 | }
 37 | 
 38 | func (u *unionCodec) New(r *ReadBuf) unsafe.Pointer {
 39 | 	return nil
 40 | }
 41 | 
 42 | func (u *unionCodec) Omit(p unsafe.Pointer) bool {
 43 | 	return false
 44 | }
 45 | 
 46 | func (u *unionCodec) Write(w *WriteBuf, p unsafe.Pointer) {
 47 | 	// TODO: Need a way to determine which type!
 48 | 	panic("union codec not implemented!")
 49 | }
 50 | 
 51 | type unionOneAndNullCodec struct {
 52 | 	codec   Codec
 53 | 	nonNull uint8
 54 | }
 55 | 
 56 | func (u *unionOneAndNullCodec) Read(r *ReadBuf, p unsafe.Pointer) error {
 57 | 	// index must be less than 1 byte in this case.
 58 | 	// The result should be 2 or 4
 59 | 	index, err := r.ReadByte()
 60 | 	if err != nil {
 61 | 		return fmt.Errorf("failed reading union selector. %w", err)
 62 | 	}
 63 | 	index /= 2
 64 | 	if (index)&0xFE != 0 {
 65 | 		return fmt.Errorf("union selector %d out of range (2 types)", index)
 66 | 	}
 67 | 
 68 | 	if index == u.nonNull {
 69 | 		return u.codec.Read(r, p)
 70 | 	}
 71 | 	return nil
 72 | }
 73 | 
 74 | func (u *unionOneAndNullCodec) Skip(r *ReadBuf) error {
 75 | 	// index must be less than 1 byte in this case
 76 | 	index, err := r.ReadByte()
 77 | 	if err != nil {
 78 | 		return fmt.Errorf("failed reading union selector. %w", err)
 79 | 	}
 80 | 	index /= 2
 81 | 	if (index)&0xFE != 0 {
 82 | 		return fmt.Errorf("union selector %d out of range (2 types)", index)
 83 | 	}
 84 | 
 85 | 	if index == u.nonNull {
 86 | 		return u.codec.Skip(r)
 87 | 	}
 88 | 	return nil
 89 | }
 90 | 
 91 | func (u *unionOneAndNullCodec) New(r *ReadBuf) unsafe.Pointer {
 92 | 	return nil
 93 | }
 94 | 
 95 | func (u *unionOneAndNullCodec) Omit(p unsafe.Pointer) bool {
 96 | 	// The union codec itself is never omitted
 97 | 	return false
 98 | }
 99 | 
100 | func (u *unionOneAndNullCodec) Write(w *WriteBuf, p unsafe.Pointer) {
101 | 	if u.codec.Omit(p) {
102 | 		// TODO: this assumes the null type is always first.
103 | 		w.Varint(0)
104 | 		return
105 | 	}
106 | 	w.Varint(int64(u.nonNull))
107 | 	u.codec.Write(w, p)
108 | }
109 | 
110 | type unionNullString struct {
111 | 	codec   StringCodec
112 | 	nonNull byte
113 | }
114 | 
115 | func (u *unionNullString) Read(r *ReadBuf, p unsafe.Pointer) error {
116 | 	// index must be less than 1 byte in this case
117 | 	index, err := r.ReadByte()
118 | 	if err != nil {
119 | 		return fmt.Errorf("failed reading union selector. %w", err)
120 | 	}
121 | 	index /= 2
122 | 	if (index)&0xFE != 0 {
123 | 		return fmt.Errorf("union selector %d out of range (2 types)", index)
124 | 	}
125 | 
126 | 	if index == u.nonNull {
127 | 		return u.codec.Read(r, p)
128 | 	}
129 | 	return nil
130 | }
131 | 
132 | func (u *unionNullString) Skip(r *ReadBuf) error {
133 | 	// index must be less than 1 byte in this case
134 | 	index, err := r.ReadByte()
135 | 	if err != nil {
136 | 		return fmt.Errorf("failed reading union selector. %w", err)
137 | 	}
138 | 	index /= 2
139 | 	if (index)&0xFE != 0 {
140 | 		return fmt.Errorf("union selector %d out of range (2 types)", index)
141 | 	}
142 | 
143 | 	if index == u.nonNull {
144 | 		return u.codec.Skip(r)
145 | 	}
146 | 	return nil
147 | }
148 | 
149 | func (u *unionNullString) New(r *ReadBuf) unsafe.Pointer {
150 | 	return nil
151 | }
152 | 
153 | func (u *unionNullString) Omit(p unsafe.Pointer) bool {
154 | 	return false
155 | }
156 | 
157 | func (u *unionNullString) Write(w *WriteBuf, p unsafe.Pointer) {
158 | 	if u.codec.Omit(p) {
159 | 		w.Varint(0)
160 | 	}
161 | 
162 | 	w.Varint(1)
163 | 	u.codec.Write(w, p)
164 | }
165 | 


--------------------------------------------------------------------------------
/union_test.go:
--------------------------------------------------------------------------------
  1 | package avro
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"unsafe"
  6 | )
  7 | 
  8 | func TestUnionCodec(t *testing.T) {
  9 | 	c := unionCodec{
 10 | 		codecs: []Codec{nullCodec{}, StringCodec{}},
 11 | 	}
 12 | 
 13 | 	tests := []struct {
 14 | 		name string
 15 | 		data []byte
 16 | 		exp  string
 17 | 	}{
 18 | 		{
 19 | 			name: "null",
 20 | 			data: []byte{0},
 21 | 			exp:  "",
 22 | 		},
 23 | 		{
 24 | 			name: "string",
 25 | 			data: []byte{2, 6, 'f', 'o', 'o'},
 26 | 			exp:  "foo",
 27 | 		},
 28 | 	}
 29 | 
 30 | 	for _, test := range tests {
 31 | 		t.Run(test.name, func(t *testing.T) {
 32 | 			r := NewReadBuf(test.data)
 33 | 			var actual string
 34 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
 35 | 				t.Fatal(err)
 36 | 			}
 37 | 			if actual != test.exp {
 38 | 				t.Fatalf("result %q does not match expected %q", actual, test.exp)
 39 | 			}
 40 | 			if r.Len() != 0 {
 41 | 				t.Fatalf("%d bytes unread", r.Len())
 42 | 			}
 43 | 		})
 44 | 		t.Run(test.name+" skip", func(t *testing.T) {
 45 | 			r := NewReadBuf(test.data)
 46 | 			if err := c.Skip(r); err != nil {
 47 | 				t.Fatal(err)
 48 | 			}
 49 | 			if r.Len() != 0 {
 50 | 				t.Fatalf("%d bytes unread", r.Len())
 51 | 			}
 52 | 		})
 53 | 
 54 | 	}
 55 | }
 56 | 
 57 | func TestUnionOneCodec(t *testing.T) {
 58 | 	c := unionOneAndNullCodec{
 59 | 		codec:   StringCodec{},
 60 | 		nonNull: 1,
 61 | 	}
 62 | 
 63 | 	tests := []struct {
 64 | 		name string
 65 | 		data []byte
 66 | 		exp  string
 67 | 	}{
 68 | 		{
 69 | 			name: "null",
 70 | 			data: []byte{0},
 71 | 			exp:  "",
 72 | 		},
 73 | 		{
 74 | 			name: "string",
 75 | 			data: []byte{2, 6, 'f', 'o', 'o'},
 76 | 			exp:  "foo",
 77 | 		},
 78 | 	}
 79 | 
 80 | 	for _, test := range tests {
 81 | 		t.Run(test.name, func(t *testing.T) {
 82 | 			r := NewReadBuf(test.data)
 83 | 			var actual string
 84 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
 85 | 				t.Fatal(err)
 86 | 			}
 87 | 			if actual != test.exp {
 88 | 				t.Fatalf("result %q does not match expected %q", actual, test.exp)
 89 | 			}
 90 | 			if r.Len() != 0 {
 91 | 				t.Fatalf("%d bytes unread", r.Len())
 92 | 			}
 93 | 		})
 94 | 		t.Run(test.name+" skip", func(t *testing.T) {
 95 | 			r := NewReadBuf(test.data)
 96 | 			if err := c.Skip(r); err != nil {
 97 | 				t.Fatal(err)
 98 | 			}
 99 | 			if r.Len() != 0 {
100 | 				t.Fatalf("%d bytes unread", r.Len())
101 | 			}
102 | 		})
103 | 		t.Run(test.name+" roundtrip", func(t *testing.T) {
104 | 			w := NewWriteBuf(nil)
105 | 			c.Write(w, unsafe.Pointer(&test.exp))
106 | 			var actual string
107 | 			r := NewReadBuf(w.Bytes())
108 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
109 | 				t.Fatal(err)
110 | 			}
111 | 			if actual != test.exp {
112 | 				t.Fatalf("result %q does not match expected %q", actual, test.exp)
113 | 			}
114 | 		})
115 | 	}
116 | }
117 | 
118 | func TestUnionStringCodec(t *testing.T) {
119 | 	c := unionNullString{
120 | 		nonNull: 1,
121 | 	}
122 | 
123 | 	tests := []struct {
124 | 		name string
125 | 		data []byte
126 | 		exp  string
127 | 	}{
128 | 		{
129 | 			name: "null",
130 | 			data: []byte{0},
131 | 			exp:  "",
132 | 		},
133 | 		{
134 | 			name: "string",
135 | 			data: []byte{2, 6, 'f', 'o', 'o'},
136 | 			exp:  "foo",
137 | 		},
138 | 	}
139 | 
140 | 	for _, test := range tests {
141 | 		t.Run(test.name, func(t *testing.T) {
142 | 			r := NewReadBuf(test.data)
143 | 			var actual string
144 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
145 | 				t.Fatal(err)
146 | 			}
147 | 			if actual != test.exp {
148 | 				t.Fatalf("result %q does not match expected %q", actual, test.exp)
149 | 			}
150 | 			if r.Len() != 0 {
151 | 				t.Fatalf("%d bytes unread", r.Len())
152 | 			}
153 | 		})
154 | 		t.Run(test.name+" skip", func(t *testing.T) {
155 | 			r := NewReadBuf(test.data)
156 | 			if err := c.Skip(r); err != nil {
157 | 				t.Fatal(err)
158 | 			}
159 | 			if r.Len() != 0 {
160 | 				t.Fatalf("%d bytes unread", r.Len())
161 | 			}
162 | 		})
163 | 		t.Run(test.name+" roundtrip", func(t *testing.T) {
164 | 			w := NewWriteBuf(nil)
165 | 			c.Write(w, unsafe.Pointer(&test.exp))
166 | 			var actual string
167 | 			r := NewReadBuf(w.Bytes())
168 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
169 | 				t.Fatal(err)
170 | 			}
171 | 			if actual != test.exp {
172 | 				t.Fatalf("result %q does not match expected %q", actual, test.exp)
173 | 			}
174 | 		})
175 | 		t.Run(test.name+" roundtrip omitempty", func(t *testing.T) {
176 | 			c := unionNullString{
177 | 				nonNull: 1,
178 | 				codec:   StringCodec{omitEmpty: true},
179 | 			}
180 | 
181 | 			w := NewWriteBuf(nil)
182 | 			c.Write(w, unsafe.Pointer(&test.exp))
183 | 			var actual string
184 | 			r := NewReadBuf(w.Bytes())
185 | 			if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
186 | 				t.Fatal(err)
187 | 			}
188 | 			if actual != test.exp {
189 | 				t.Fatalf("result %q does not match expected %q", actual, test.exp)
190 | 			}
191 | 		})
192 | 
193 | 	}
194 | }
195 | 
196 | func BenchmarkUnionStringCodec(b *testing.B) {
197 | 	c := unionNullString{
198 | 		nonNull: 1,
199 | 	}
200 | 	data := []byte{2, 6, 'f', 'o', 'o'}
201 | 
202 | 	b.Run("read", func(b *testing.B) {
203 | 		b.ReportAllocs()
204 | 		b.RunParallel(func(pb *testing.PB) {
205 | 			r := NewReadBuf(nil)
206 | 			var actual string
207 | 			for pb.Next() {
208 | 				r.Reset(data)
209 | 				if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
210 | 					b.Fatal(err)
211 | 				}
212 | 				r.ExtractResourceBank().Close()
213 | 			}
214 | 		})
215 | 	})
216 | 	b.Run("skip", func(b *testing.B) {
217 | 		b.ReportAllocs()
218 | 		b.RunParallel(func(pb *testing.PB) {
219 | 			r := NewReadBuf(nil)
220 | 			for pb.Next() {
221 | 				r.Reset(data)
222 | 				if err := c.Skip(r); err != nil {
223 | 					b.Fatal(err)
224 | 				}
225 | 			}
226 | 		})
227 | 	})
228 | }
229 | 
230 | func BenchmarkUnionOneCodec(b *testing.B) {
231 | 	c := unionOneAndNullCodec{
232 | 		codec:   StringCodec{},
233 | 		nonNull: 1,
234 | 	}
235 | 	data := []byte{2, 6, 'f', 'o', 'o'}
236 | 
237 | 	b.Run("read", func(b *testing.B) {
238 | 		b.ReportAllocs()
239 | 		b.RunParallel(func(pb *testing.PB) {
240 | 			r := NewReadBuf(nil)
241 | 			var actual string
242 | 			for pb.Next() {
243 | 				r.Reset(data)
244 | 				if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
245 | 					b.Fatal(err)
246 | 				}
247 | 				r.ExtractResourceBank().Close()
248 | 			}
249 | 		})
250 | 	})
251 | 	b.Run("skip", func(b *testing.B) {
252 | 		b.ReportAllocs()
253 | 		b.RunParallel(func(pb *testing.PB) {
254 | 			r := NewReadBuf(nil)
255 | 			for pb.Next() {
256 | 				r.Reset(data)
257 | 				if err := c.Skip(r); err != nil {
258 | 					b.Fatal(err)
259 | 				}
260 | 			}
261 | 		})
262 | 	})
263 | }
264 | 
265 | func BenchmarkUnionCodec(b *testing.B) {
266 | 	c := unionCodec{
267 | 		codecs: []Codec{nullCodec{}, StringCodec{}},
268 | 	}
269 | 	data := []byte{2, 6, 'f', 'o', 'o'}
270 | 
271 | 	b.Run("read", func(b *testing.B) {
272 | 		b.ReportAllocs()
273 | 		b.RunParallel(func(pb *testing.PB) {
274 | 			r := NewReadBuf(nil)
275 | 			var actual string
276 | 			for pb.Next() {
277 | 				r.Reset(data)
278 | 				if err := c.Read(r, unsafe.Pointer(&actual)); err != nil {
279 | 					b.Fatal(err)
280 | 				}
281 | 				r.ExtractResourceBank().Close()
282 | 			}
283 | 		})
284 | 	})
285 | 	b.Run("skip", func(b *testing.B) {
286 | 		b.ReportAllocs()
287 | 		b.RunParallel(func(pb *testing.PB) {
288 | 			r := NewReadBuf(nil)
289 | 			for pb.Next() {
290 | 				r.Reset(data)
291 | 				if err := c.Skip(r); err != nil {
292 | 					b.Fatal(err)
293 | 				}
294 | 			}
295 | 		})
296 | 	})
297 | }
298 | 


--------------------------------------------------------------------------------
/unsafetricks.go:
--------------------------------------------------------------------------------
 1 | package avro
 2 | 
 3 | import "unsafe"
 4 | 
 5 | //go:linkname unsafe_New reflect.unsafe_New
 6 | func unsafe_New(rtype unsafe.Pointer) unsafe.Pointer
 7 | 
 8 | //go:linkname unsafe_NewArray reflect.unsafe_NewArray
 9 | func unsafe_NewArray(rtype unsafe.Pointer, length int) unsafe.Pointer
10 | 
11 | // typedslicecopy copies a slice of elemType values from src to dst,
12 | // returning the number of elements copied.
13 | //
14 | //go:linkname typedslicecopy reflect.typedslicecopy
15 | //go:noescape
16 | func typedslicecopy(elemType unsafe.Pointer, dst, src sliceHeader) int
17 | 
18 | //go:linkname mapassign reflect.mapassign
19 | //go:noescape
20 | func mapassign(typ unsafe.Pointer, hmap unsafe.Pointer, key, val unsafe.Pointer)
21 | 
22 | // typedmemclr zeros the value at ptr of type t.
23 | //
24 | //go:linkname typedmemclr reflect.typedmemclr
25 | //go:noescape
26 | func typedmemclr(typ, ptr unsafe.Pointer)
27 | 
28 | // typedarrayclear clears the array at ptr
29 | //
30 | //go:linkname typedarrayclear reflect.typedarrayclear
31 | //go:noescape
32 | func typedarrayclear(typ, ptr unsafe.Pointer, len int)
33 | 
34 | // We could use the reflect version of mapiterinit, but that forces a heap
35 | // allocation per map iteration. Instead we can use the runtime version, but
36 | // then we have to allocate a runtime private struct for it to use instead. We
37 | // can do this, and it uses stack memory, so that's less GC pressure and more
38 | // speed. But it isn't excellent from a maintenance point of view. Things will
39 | // break if the struct changes and we won't find out. But let's go for it.
40 | //
41 | // mapiter matches hiter in runtime/map.go. Using matching-ish types rather than
42 | // a big enough array of unsafe.Pointer just in case the GC would run into an
43 | // issue if something it thought was a pointer was not. Don't attempt to access
44 | // any of the fields in this struct directly! On the plus side this hasn't
45 | // changed significantly for 6 years
46 | //
47 | // Hmm, actually, as of Go 1.24 the underlying map has changed and this only
48 | // works as there's an explicit shim in the Go code to allow it to! It costs a
49 | // single heap allocation (I think?)
50 | type mapiter struct {
51 | 	key         unsafe.Pointer
52 | 	elem        unsafe.Pointer
53 | 	t           unsafe.Pointer
54 | 	h           unsafe.Pointer
55 | 	buckets     unsafe.Pointer
56 | 	bptr        unsafe.Pointer
57 | 	overflow    unsafe.Pointer
58 | 	oldoverflow unsafe.Pointer
59 | 	startBucket uintptr
60 | 	offset      uint8
61 | 	wrapped     bool
62 | 	B           uint8
63 | 	i           uint8
64 | 	bucket      uintptr
65 | 	checkBucket uintptr
66 | }
67 | 
68 | //go:linkname mapiterinit runtime.mapiterinit
69 | //go:noescape
70 | func mapiterinit(t unsafe.Pointer, m unsafe.Pointer, hi unsafe.Pointer)
71 | 
72 | //go:linkname mapiterkey reflect.mapiterkey
73 | //go:noescape
74 | func mapiterkey(it unsafe.Pointer) (key unsafe.Pointer)
75 | 
76 | //go:linkname mapiterelem reflect.mapiterelem
77 | //go:noescape
78 | func mapiterelem(it unsafe.Pointer) (elem unsafe.Pointer)
79 | 
80 | //go:linkname mapiternext reflect.mapiternext
81 | //go:noescape
82 | func mapiternext(it unsafe.Pointer)
83 | 
84 | //go:linkname maplen reflect.maplen
85 | //go:noescape
86 | func maplen(m unsafe.Pointer) int
87 | 


--------------------------------------------------------------------------------