├── LICENSE.txt ├── array.go ├── array_test.go ├── bool.go ├── bool_test.go ├── buffer.go ├── build.go ├── build_test.go ├── buildschema.go ├── buildschema_test.go ├── bytes.go ├── bytes_test.go ├── codec.go ├── discard.go ├── encoder.go ├── encoder_test.go ├── file.go ├── file_test.go ├── filewriter.go ├── filewriter_test.go ├── fixed.go ├── fixed_test.go ├── float.go ├── float_test.go ├── go.mod ├── go.sum ├── int.go ├── int_test.go ├── interface.go ├── map.go ├── map_test.go ├── null.go ├── null ├── null.go ├── null_test.go └── testdata │ └── nullavro ├── pointer.go ├── pointer_test.go ├── readme.md ├── record.go ├── record_test.go ├── schema.go ├── schema_test.go ├── string.go ├── string_test.go ├── testdata └── avro1 ├── time ├── parse.go ├── parse_test.go ├── time.go └── time_test.go ├── union.go ├── union_test.go └── unsafetricks.go /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020-2025 Phil Pearl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /array.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | type arrayCodec struct { 10 | itemCodec Codec 11 | itemType reflect.Type 12 | omitEmpty bool 13 | } 14 | 15 | func (rc *arrayCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 16 | sh := (*sliceHeader)(p) 17 | 18 | // Blocks can be repeated 19 | for { 20 | count, err := r.Varint() 21 | if err != nil { 22 | return fmt.Errorf("failed to read count for array. %w", err) 23 | } 24 | if count == 0 { 25 | break 26 | } 27 | if count < 0 { 28 | // negative length means there's a block size, which is only really 29 | // useful for skipping. 30 | count = -count 31 | if _, err := r.Varint(); err != nil { 32 | return fmt.Errorf("failed to read block size for array. %w", err) 33 | } 34 | } 35 | 36 | // If our array is nil or undersized then we can fix it up here. 37 | *sh = rc.resizeSlice(r, *sh, int(count)) 38 | 39 | itemSize := rc.itemType.Size() 40 | for i := range count { 41 | cursor := unsafe.Add(sh.Data, uintptr(sh.Len)*itemSize) 42 | if err := rc.itemCodec.Read(r, cursor); err != nil { 43 | return fmt.Errorf("failed to decode array entry %d. %w", i, err) 44 | } 45 | sh.Len++ 46 | } 47 | } 48 | 49 | return nil 50 | } 51 | 52 | func (rc *arrayCodec) Skip(r *ReadBuf) error { 53 | for { 54 | count, err := r.Varint() 55 | if err != nil { 56 | return fmt.Errorf("failed to read count for array. %w", err) 57 | } 58 | if count == 0 { 59 | break 60 | } 61 | if count < 0 { 62 | // negative count means there's a block size we can use to skip the 63 | // rest of this block 64 | bs, err := r.Varint() 65 | if err != nil { 66 | return fmt.Errorf("failed to read block size for array. %w", err) 67 | } 68 | if err := skip(r, bs); err != nil { 69 | return err 70 | } 71 | continue 72 | } 73 | 74 | for ; count > 0; count-- { 75 | if err := rc.itemCodec.Skip(r); err != nil { 76 | return fmt.Errorf("failed to skip array entry. %w", err) 77 | } 78 | } 79 | } 80 | 81 | return nil 82 | } 83 | 84 | var sliceType = reflect.TypeFor[sliceHeader]() 85 | 86 | func (rc *arrayCodec) New(r *ReadBuf) unsafe.Pointer { 87 | return r.Alloc(sliceType) 88 | } 89 | 90 | // resizeSlice increases the length of the slice by len entries 91 | func (rc *arrayCodec) resizeSlice(r *ReadBuf, in sliceHeader, len int) sliceHeader { 92 | if in.Len+len <= in.Cap { 93 | return in 94 | } 95 | // Will assume for now that blocks are sensible sizes 96 | out := sliceHeader{ 97 | Cap: in.Len + len, 98 | Len: in.Len, 99 | } 100 | out.Data = r.AllocArray(rc.itemType, out.Cap) 101 | 102 | if in.Data != nil { 103 | elemType := unpackEFace(rc.itemType).data 104 | typedslicecopy(elemType, out, in) 105 | } 106 | return out 107 | } 108 | 109 | func (rc *arrayCodec) Omit(p unsafe.Pointer) bool { 110 | return rc.omitEmpty && len(*(*[]byte)(p)) == 0 111 | } 112 | 113 | func (rc *arrayCodec) Write(w *WriteBuf, p unsafe.Pointer) { 114 | sh := (*sliceHeader)(p) 115 | if sh.Len == 0 { 116 | w.Varint(0) 117 | return 118 | } 119 | 120 | // TODO: you can write negative counts, which are then followed by the size 121 | // of the block, then the data. That makes it easier to skip over data. TBD if we want to do that 122 | w.Varint(int64(sh.Len)) 123 | for i := range sh.Len { 124 | cursor := unsafe.Add(sh.Data, uintptr(i)*rc.itemType.Size()) 125 | rc.itemCodec.Write(w, cursor) 126 | } 127 | 128 | // Write a zero count to indicate the end of the array. This does appear to 129 | // be necessary as you can write multiple blocks. 130 | w.Varint(0) 131 | } 132 | -------------------------------------------------------------------------------- /array_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | "unsafe" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | "github.com/google/go-cmp/cmp/cmpopts" 10 | ) 11 | 12 | func TestArrayCodec(t *testing.T) { 13 | tests := []struct { 14 | name string 15 | data []byte 16 | exp []string 17 | out []string 18 | }{ 19 | { 20 | name: "empty", 21 | data: []byte{0}, 22 | }, 23 | { 24 | name: "one", 25 | data: []byte{ 26 | 2, 27 | 6, 'o', 'n', 'e', 28 | 0, 29 | }, 30 | exp: []string{"one"}, 31 | }, 32 | { 33 | name: "one append", 34 | data: []byte{ 35 | 2, 36 | 6, 'o', 'n', 'e', 37 | 0, 38 | }, 39 | out: []string{"two"}, 40 | exp: []string{"two", "one"}, 41 | }, 42 | { 43 | name: "two", 44 | data: []byte{ 45 | 4, 46 | 6, 'o', 'n', 'e', 47 | 6, 't', 'w', 'o', 48 | 0, 49 | }, 50 | exp: []string{"one", "two"}, 51 | }, 52 | { 53 | name: "two blocks", 54 | data: []byte{ 55 | 2, 56 | 6, 'o', 'n', 'e', 57 | 2, 58 | 6, 't', 'w', 'o', 59 | 0, 60 | }, 61 | exp: []string{"one", "two"}, 62 | }, 63 | { 64 | name: "two blocks with size", 65 | data: []byte{ 66 | 1, 67 | 8, 68 | 6, 'o', 'n', 'e', 69 | 1, 70 | 8, 71 | 6, 't', 'w', 'o', 72 | 0, 73 | }, 74 | exp: []string{"one", "two"}, 75 | }, 76 | } 77 | 78 | c := arrayCodec{ 79 | itemCodec: StringCodec{}, 80 | itemType: reflect.TypeFor[string](), 81 | } 82 | 83 | for _, test := range tests { 84 | t.Run(test.name, func(t *testing.T) { 85 | buf := NewReadBuf(test.data) 86 | 87 | if err := c.Read(buf, unsafe.Pointer(&test.out)); err != nil { 88 | t.Fatal(err) 89 | } 90 | 91 | if diff := cmp.Diff(test.exp, test.out); diff != "" { 92 | t.Fatalf("output not as expected. %s", diff) 93 | } 94 | if buf.Len() != 0 { 95 | t.Fatalf("unread data (%d)", buf.Len()) 96 | } 97 | }) 98 | t.Run(test.name+"_skip", func(t *testing.T) { 99 | buf := NewReadBuf(test.data) 100 | 101 | if err := c.Skip(buf); err != nil { 102 | t.Fatal(err) 103 | } 104 | 105 | if buf.Len() != 0 { 106 | t.Fatalf("unread data (%d)", buf.Len()) 107 | } 108 | }) 109 | 110 | } 111 | } 112 | 113 | func TestArrayCodecInt(t *testing.T) { 114 | tests := []struct { 115 | name string 116 | data []byte 117 | exp []int32 118 | out []int32 119 | }{ 120 | { 121 | name: "empty", 122 | data: []byte{0}, 123 | }, 124 | { 125 | name: "one", 126 | data: []byte{ 127 | 2, 128 | 2, 129 | 0, 130 | }, 131 | exp: []int32{1}, 132 | }, 133 | { 134 | name: "one append", 135 | data: []byte{ 136 | 2, 137 | 2, 138 | 0, 139 | }, 140 | out: []int32{2}, 141 | exp: []int32{2, 1}, 142 | }, 143 | { 144 | name: "more", 145 | data: []byte{ 146 | 8, 147 | 1, 148 | 2, 149 | 3, 150 | 4, 151 | 0, 152 | }, 153 | exp: []int32{-1, 1, -2, 2}, 154 | }, 155 | { 156 | name: "two blocks", 157 | data: []byte{ 158 | 2, 159 | 2, 160 | 2, 161 | 4, 162 | 0, 163 | }, 164 | exp: []int32{1, 2}, 165 | }, 166 | } 167 | 168 | c := arrayCodec{ 169 | itemCodec: Int32Codec{}, 170 | itemType: reflect.TypeFor[int32](), 171 | } 172 | 173 | for _, test := range tests { 174 | test := test 175 | t.Run(test.name, func(t *testing.T) { 176 | t.Parallel() 177 | buf := NewReadBuf(test.data) 178 | 179 | if err := c.Read(buf, unsafe.Pointer(&test.out)); err != nil { 180 | t.Fatal(err) 181 | } 182 | 183 | if diff := cmp.Diff(test.exp, test.out); diff != "" { 184 | t.Fatalf("output not as expected. %s", diff) 185 | } 186 | if buf.Len() != 0 { 187 | t.Fatalf("unread data (%d)", buf.Len()) 188 | } 189 | }) 190 | t.Run(test.name+"_skip", func(t *testing.T) { 191 | t.Parallel() 192 | buf := NewReadBuf(test.data) 193 | 194 | if err := c.Skip(buf); err != nil { 195 | t.Fatal(err) 196 | } 197 | 198 | if buf.Len() != 0 { 199 | t.Fatalf("unread data (%d)", buf.Len()) 200 | } 201 | }) 202 | 203 | } 204 | } 205 | 206 | func TestArrayCodecRoundTrip(t *testing.T) { 207 | tests := []struct { 208 | name string 209 | data []string 210 | }{ 211 | { 212 | name: "empty", 213 | data: []string{}, 214 | }, 215 | { 216 | name: "one", 217 | data: []string{"one"}, 218 | }, 219 | { 220 | name: "two", 221 | data: []string{"one", "two"}, 222 | }, 223 | { 224 | name: "three", 225 | data: []string{"one", "two", "three"}, 226 | }, 227 | } 228 | 229 | c := arrayCodec{ 230 | itemCodec: StringCodec{}, 231 | itemType: reflect.TypeFor[string](), 232 | } 233 | 234 | for _, test := range tests { 235 | t.Run(test.name, func(t *testing.T) { 236 | t.Parallel() 237 | w := NewWriteBuf(nil) 238 | 239 | c.Write(w, unsafe.Pointer(&test.data)) 240 | 241 | var out []string 242 | r := NewReadBuf(w.Bytes()) 243 | if err := c.Read(r, unsafe.Pointer(&out)); err != nil { 244 | t.Fatal(err) 245 | } 246 | 247 | if diff := cmp.Diff(test.data, out, cmpopts.EquateEmpty()); diff != "" { 248 | t.Fatalf("output not as expected. %s", diff) 249 | } 250 | }) 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /bool.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "unsafe" 6 | ) 7 | 8 | type BoolCodec struct{ omitEmpty bool } 9 | 10 | func (BoolCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 11 | b, err := r.ReadByte() 12 | if err != nil { 13 | return err 14 | } 15 | 16 | *(*bool)(p) = b != 0 17 | return nil 18 | } 19 | 20 | func (BoolCodec) Skip(r *ReadBuf) error { 21 | return skip(r, 1) 22 | } 23 | 24 | var boolType = reflect.TypeFor[bool]() 25 | 26 | func (BoolCodec) New(r *ReadBuf) unsafe.Pointer { 27 | return r.Alloc(boolType) 28 | } 29 | 30 | func (rc BoolCodec) Omit(p unsafe.Pointer) bool { 31 | return rc.omitEmpty && !*(*bool)(p) 32 | } 33 | 34 | func (rc BoolCodec) Write(w *WriteBuf, p unsafe.Pointer) { 35 | if *(*bool)(p) { 36 | w.Byte(1) 37 | } else { 38 | w.Byte(0) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /bool_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | "unsafe" 7 | ) 8 | 9 | func TestBoolCodec(t *testing.T) { 10 | tests := []struct { 11 | name string 12 | data []byte 13 | exp bool 14 | }{ 15 | { 16 | name: "true", 17 | data: []byte{1}, 18 | exp: true, 19 | }, 20 | { 21 | name: "false", 22 | data: []byte{0}, 23 | exp: false, 24 | }, 25 | } 26 | 27 | c := BoolCodec{} 28 | 29 | for _, test := range tests { 30 | test := test 31 | t.Run(test.name, func(t *testing.T) { 32 | t.Parallel() 33 | var actual bool 34 | r := NewReadBuf(test.data) 35 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 36 | t.Fatal(err) 37 | } 38 | if actual != test.exp { 39 | t.Fatalf("got %t, expected %t", actual, test.exp) 40 | } 41 | if r.Len() != 0 { 42 | t.Fatalf("%d bytes left", r.Len()) 43 | } 44 | }) 45 | 46 | t.Run(test.name+" skip", func(t *testing.T) { 47 | t.Parallel() 48 | r := NewReadBuf(test.data) 49 | if err := c.Skip(r); err != nil { 50 | t.Fatal(err) 51 | } 52 | if r.Len() != 0 { 53 | t.Fatalf("%d bytes left", r.Len()) 54 | } 55 | }) 56 | } 57 | } 58 | 59 | func BenchmarkBoolPointer(b *testing.B) { 60 | data := bytes.Repeat([]byte{1}, 1000) 61 | r := NewReadBuf(data) 62 | 63 | c := PointerCodec{BoolCodec{}} 64 | b.ReportAllocs() 65 | 66 | for b.Loop() { 67 | r.Reset(data) 68 | for range 1000 { 69 | var out *bool 70 | if err := c.Read(r, unsafe.Pointer(&out)); err != nil { 71 | b.Fatal(err) 72 | } 73 | if !*out { 74 | b.Fatal("wrong bool") 75 | } 76 | } 77 | r.ExtractResourceBank().Close() 78 | } 79 | } 80 | 81 | func TestBoolCodecRoundTrip(t *testing.T) { 82 | tests := []struct { 83 | name string 84 | data bool 85 | }{ 86 | { 87 | name: "true", 88 | data: true, 89 | }, 90 | { 91 | name: "false", 92 | data: false, 93 | }, 94 | } 95 | 96 | c := BoolCodec{} 97 | 98 | for _, test := range tests { 99 | test := test 100 | t.Run(test.name, func(t *testing.T) { 101 | t.Parallel() 102 | var actual bool 103 | w := NewWriteBuf(nil) 104 | c.Write(w, unsafe.Pointer(&test.data)) 105 | r := NewReadBuf(w.Bytes()) 106 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 107 | t.Fatal(err) 108 | } 109 | if actual != test.data { 110 | t.Fatalf("got %t, expected %t", actual, test.data) 111 | } 112 | }) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /buffer.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "io" 7 | "reflect" 8 | "sync" 9 | "unsafe" 10 | ) 11 | 12 | // WriteBuf is a simple, append only, replacement for bytes.Buffer. It is used 13 | // by AVRO encoders. It is not safe for concurrent use. 14 | type WriteBuf struct { 15 | buf []byte 16 | } 17 | 18 | // NewWriteBuf returns a new WriteBuf. 19 | func NewWriteBuf(buf []byte) *WriteBuf { 20 | return &WriteBuf{buf: buf} 21 | } 22 | 23 | func (w *WriteBuf) Varint(v int64) { 24 | w.buf = binary.AppendVarint(w.buf, v) 25 | } 26 | 27 | func (w *WriteBuf) Byte(val byte) { 28 | w.buf = append(w.buf, val) 29 | } 30 | 31 | func (w *WriteBuf) Write(val []byte) { 32 | w.buf = append(w.buf, val...) 33 | } 34 | 35 | func (w *WriteBuf) Bytes() []byte { 36 | return w.buf 37 | } 38 | 39 | func (w *WriteBuf) Reset() { 40 | w.buf = w.buf[:0] 41 | } 42 | 43 | func (w *WriteBuf) Len() int { 44 | return len(w.buf) 45 | } 46 | 47 | // ReadBuf is a very simple replacement for bytes.Reader that avoids data copies 48 | type ReadBuf struct { 49 | i int 50 | buf []byte 51 | rb *ResourceBank 52 | } 53 | 54 | // NewReadBuf returns a new Buffer. 55 | func NewReadBuf(data []byte) *ReadBuf { 56 | return &ReadBuf{buf: data, rb: newResourceBank()} 57 | } 58 | 59 | // Reset allows you to reuse a buffer with a new set of data 60 | func (d *ReadBuf) Reset(data []byte) { 61 | d.i = 0 62 | d.buf = data 63 | if d.rb == nil { 64 | d.rb = newResourceBank() 65 | } 66 | } 67 | 68 | // ExtractResourceBank extracts the current ResourceBank from the buffer, and replaces 69 | // it with a fresh one. 70 | func (d *ReadBuf) ExtractResourceBank() *ResourceBank { 71 | rb := d.rb 72 | d.rb = newResourceBank() 73 | return rb 74 | } 75 | 76 | // Next returns the next l bytes from the buffer. It does so without copying, so 77 | // if you hold onto the data you risk holding onto a lot of data. If l exceeds 78 | // the remaining space Next returns io.EOF 79 | func (d *ReadBuf) Next(l int) ([]byte, error) { 80 | if l+d.i > len(d.buf) { 81 | return nil, io.EOF 82 | } 83 | d.i += l 84 | return d.buf[d.i-l : d.i], nil 85 | } 86 | 87 | // NextAsString returns the next l bytes from the buffer as a string. The string 88 | // data is held in a StringBank and will be valid only until someone calls Close 89 | // on that bank. If l exceeds the remaining space NextAsString returns io.EOF 90 | func (d *ReadBuf) NextAsString(l int) (string, error) { 91 | if l+d.i > len(d.buf) { 92 | return "", io.EOF 93 | } 94 | d.i += l 95 | return d.rb.ToString(d.buf[d.i-l : d.i]), nil 96 | } 97 | 98 | func (d *ReadBuf) NextAsBytes(l int) ([]byte, error) { 99 | if l+d.i > len(d.buf) { 100 | return nil, io.EOF 101 | } 102 | d.i += l 103 | return d.rb.ToBytes(d.buf[d.i-l : d.i]), nil 104 | } 105 | 106 | // Alloc allocates a pointer to the type rtyp. The data is allocated in a ResourceBank 107 | func (d *ReadBuf) Alloc(rtyp reflect.Type) unsafe.Pointer { 108 | return d.rb.Alloc(rtyp) 109 | } 110 | 111 | func (d *ReadBuf) AllocArray(rtyp reflect.Type, len int) unsafe.Pointer { 112 | return d.rb.AllocArray(rtyp, len) 113 | } 114 | 115 | // ReadByte returns the next byte from the buffer. If no bytes are left it 116 | // returns io.EOF 117 | func (d *ReadBuf) ReadByte() (byte, error) { 118 | if d.i >= len(d.buf) { 119 | return 0, io.EOF 120 | } 121 | d.i++ 122 | return d.buf[d.i-1], nil 123 | } 124 | 125 | // Len returns the length of unread data in the buffer 126 | func (d *ReadBuf) Len() int { 127 | return len(d.buf) - d.i 128 | } 129 | 130 | // Varint reads a varint from the buffer 131 | func (d *ReadBuf) Varint() (int64, error) { 132 | v, err := d.uvarint() // ok to continue in presence of error 133 | return int64(v>>1) ^ -int64(v&1), err 134 | } 135 | 136 | var errOverflow = errors.New("varint overflows a 64-bit integer") 137 | 138 | func (d *ReadBuf) uvarint() (uint64, error) { 139 | var x uint64 140 | var s uint 141 | for i := 0; ; i++ { 142 | b, err := d.ReadByte() 143 | if err != nil { 144 | return x, err 145 | } 146 | if b < 0x80 { 147 | if i > 9 || i == 9 && b > 1 { 148 | return x, errOverflow 149 | } 150 | return x | uint64(b)< 0 { 270 | var opt string 271 | opt, opts, _ = strings.Cut(opts, ",") 272 | if opt == "omitempty" { 273 | return true 274 | } 275 | } 276 | return false 277 | } 278 | 279 | func buildRecordCodec(schema Schema, typ reflect.Type) (Codec, error) { 280 | if schema.Object == nil { 281 | return nil, fmt.Errorf("record schema does not have object") 282 | } 283 | 284 | var ntf map[string]reflect.StructField 285 | if typ != nil { 286 | if typ.Kind() != reflect.Struct { 287 | return nil, fmt.Errorf("type for a record must be struct, not %s", typ.Kind()) 288 | } 289 | 290 | // Build a name to field map 291 | ntf = make(map[string]reflect.StructField, typ.NumField()) 292 | for i := range typ.NumField() { 293 | sf := typ.Field(i) 294 | name := nameForField(sf) 295 | if name == "-" { 296 | continue 297 | } 298 | 299 | ntf[name] = sf 300 | } 301 | } 302 | 303 | var rc recordCodec 304 | rc.rtype = typ 305 | 306 | // The schema is in the driving-seat here 307 | for _, schemaf := range schema.Object.Fields { 308 | offset := uintptr(math.MaxUint64) 309 | var fieldType reflect.Type 310 | sf, ok := ntf[schemaf.Name] 311 | if ok { 312 | offset = sf.Offset 313 | fieldType = sf.Type 314 | } 315 | 316 | codec, err := buildCodec(schemaf.Type, fieldType, omitEmpty(sf)) 317 | if err != nil { 318 | return nil, fmt.Errorf("failed to get codec for field %q: %w", schemaf.Name, err) 319 | } 320 | 321 | rc.fields = append(rc.fields, recordCodecField{ 322 | codec: codec, 323 | offset: offset, 324 | name: schemaf.Name, 325 | }) 326 | } 327 | 328 | return &rc, nil 329 | } 330 | -------------------------------------------------------------------------------- /build_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestBuildCodec(t *testing.T) { 9 | t.Parallel() 10 | 11 | type some struct { 12 | I []int32 13 | } 14 | 15 | type all struct { 16 | A bool 17 | B int32 18 | C int64 19 | D float32 20 | E float64 21 | F []byte 22 | G string 23 | H some 24 | J map[string]int 25 | K [4]byte 26 | L int16 27 | } 28 | 29 | allSchema := Schema{ 30 | Type: "record", 31 | Object: &SchemaObject{ 32 | Fields: []SchemaRecordField{ 33 | { 34 | Name: "A", 35 | Type: Schema{Type: "boolean"}, 36 | }, 37 | { 38 | Name: "B", 39 | Type: Schema{Type: "int"}, 40 | }, 41 | { 42 | Name: "C", 43 | Type: Schema{Type: "long"}, 44 | }, 45 | { 46 | Name: "D", 47 | Type: Schema{Type: "float"}, 48 | }, 49 | { 50 | Name: "E", 51 | Type: Schema{Type: "double"}, 52 | }, 53 | { 54 | Name: "F", 55 | Type: Schema{Type: "bytes"}, 56 | }, 57 | { 58 | Name: "G", 59 | Type: Schema{Type: "string"}, 60 | }, 61 | { 62 | Name: "H", 63 | Type: Schema{ 64 | Type: "record", 65 | Object: &SchemaObject{ 66 | Name: "some", 67 | Fields: []SchemaRecordField{ 68 | { 69 | Name: "I", 70 | Type: Schema{ 71 | Type: "array", 72 | Object: &SchemaObject{ 73 | Items: Schema{Type: "int"}, 74 | }, 75 | }, 76 | }, 77 | }, 78 | }, 79 | }, 80 | }, 81 | { 82 | Name: "J", 83 | Type: Schema{ 84 | Type: "map", 85 | Object: &SchemaObject{ 86 | Values: Schema{Type: "long"}, 87 | }, 88 | }, 89 | }, 90 | { 91 | Name: "K", 92 | Type: Schema{ 93 | Type: "fixed", 94 | Object: &SchemaObject{ 95 | Size: 4, 96 | }, 97 | }, 98 | }, 99 | { 100 | Name: "L", 101 | Type: Schema{ 102 | Type: "long", 103 | }, 104 | }, 105 | }, 106 | }, 107 | } 108 | 109 | c, err := buildCodec(allSchema, reflect.TypeFor[all](), false) 110 | if err != nil { 111 | t.Fatal(err) 112 | } 113 | 114 | _ = c 115 | } 116 | -------------------------------------------------------------------------------- /buildschema.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "strings" 7 | "sync" 8 | ) 9 | 10 | var ( 11 | schemaRegistryMutex sync.RWMutex 12 | schemaRegistry = make(map[reflect.Type]Schema) 13 | ) 14 | 15 | // Call RegisterSchema to indicate what schema should be used for a given type. 16 | // Use this to register the schema to use for a type for which you write a 17 | // custom codec. 18 | func RegisterSchema(typ reflect.Type, s Schema) { 19 | schemaRegistryMutex.Lock() 20 | defer schemaRegistryMutex.Unlock() 21 | schemaRegistry[typ] = s 22 | } 23 | 24 | // SchemaForType returns a Schema for the given type. It aims to produce a 25 | // Schema that's compatible with BigQuery. 26 | func SchemaForType(item any) (Schema, error) { 27 | typ := reflect.TypeOf(item) 28 | if typ.Kind() == reflect.Ptr { 29 | typ = typ.Elem() 30 | } 31 | if typ.Kind() != reflect.Struct { 32 | return Schema{}, fmt.Errorf("item must be a struct or pointer to a struct") 33 | } 34 | 35 | return schemaForType(typ) 36 | } 37 | 38 | func isInSchemaRegistry(typ reflect.Type) (Schema, bool) { 39 | schemaRegistryMutex.RLock() 40 | defer schemaRegistryMutex.RUnlock() 41 | s, ok := schemaRegistry[typ] 42 | return s, ok 43 | } 44 | 45 | func schemaForType(typ reflect.Type) (Schema, error) { 46 | if s, ok := isInSchemaRegistry(typ); ok { 47 | return s, nil 48 | } 49 | 50 | // BigQuery makes every basic type nullable. We'll send null for the zero 51 | // value if there's an "omitempty" tag. 52 | switch typ.Kind() { 53 | case reflect.Bool: 54 | return Schema{Type: "boolean"}, nil 55 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 56 | return Schema{Type: "long"}, nil 57 | case reflect.Float32, reflect.Float64: 58 | return Schema{Type: "double"}, nil 59 | case reflect.String: 60 | return Schema{Type: "string"}, nil 61 | case reflect.Struct: 62 | return schemaForStruct(typ) 63 | case reflect.Array, reflect.Slice: 64 | return schemaForArray(typ) 65 | case reflect.Map: 66 | return schemaForMap(typ) 67 | case reflect.Pointer: 68 | // If this is a pointer to a basic type then we don't need to wrap in a union as all the basic types are nullable. 69 | underlying, err := schemaForType(typ.Elem()) 70 | if err != nil { 71 | return Schema{}, fmt.Errorf("getting underlying schema for pointer: %w", err) 72 | } 73 | if underlying.Type == "union" || underlying.Type == "array" || underlying.Type == "map" { 74 | return underlying, nil 75 | } 76 | return nullableSchema(underlying), nil 77 | default: 78 | return Schema{}, fmt.Errorf("type %s not supported", typ) 79 | } 80 | } 81 | 82 | func nullableSchema(s Schema) Schema { 83 | return Schema{ 84 | Type: "union", 85 | Union: []Schema{ 86 | {Type: "null"}, 87 | s, 88 | }, 89 | } 90 | } 91 | 92 | func schemaForStruct(typ reflect.Type) (Schema, error) { 93 | fields := make([]SchemaRecordField, 0, typ.NumField()) 94 | for i := range typ.NumField() { 95 | field := typ.Field(i) 96 | name := nameForField(field) 97 | if name == "-" { 98 | continue 99 | } 100 | 101 | s, err := schemaForType(field.Type) 102 | if err != nil { 103 | return Schema{}, fmt.Errorf("getting schema for field %s: %w", name, err) 104 | } 105 | 106 | if omitEmpty(field) && s.Type != "union" { 107 | s = nullableSchema(s) 108 | } 109 | 110 | fields = append(fields, SchemaRecordField{ 111 | Name: name, 112 | Type: s, 113 | }) 114 | } 115 | 116 | return Schema{ 117 | Type: "record", 118 | Object: &SchemaObject{ 119 | Name: typ.Name(), 120 | // namespace must be a valid Avro namespace, which is a 121 | // dot-separated alphanumeric string. 122 | Namespace: namespaceReplacer.Replace(typ.PkgPath()), 123 | Fields: fields, 124 | }, 125 | }, nil 126 | } 127 | 128 | var namespaceReplacer = strings.NewReplacer("/", ".", "-", "_") 129 | 130 | func schemaForArray(typ reflect.Type) (Schema, error) { 131 | elem := typ.Elem() 132 | if elem.Kind() == reflect.Uint8 { 133 | return Schema{ 134 | Type: "bytes", 135 | }, nil 136 | } 137 | 138 | s, err := schemaForType(elem) 139 | if err != nil { 140 | return Schema{}, fmt.Errorf("building array schema: %w", err) 141 | } 142 | 143 | return Schema{ 144 | Type: "array", 145 | Object: &SchemaObject{ 146 | Items: s, 147 | }, 148 | }, nil 149 | } 150 | 151 | func schemaForMap(typ reflect.Type) (Schema, error) { 152 | s, err := schemaForType(typ.Elem()) 153 | if err != nil { 154 | return Schema{}, err 155 | } 156 | 157 | return Schema{ 158 | Type: "map", 159 | Object: &SchemaObject{ 160 | Values: s, 161 | }, 162 | }, nil 163 | } 164 | -------------------------------------------------------------------------------- /buildschema_test.go: -------------------------------------------------------------------------------- 1 | package avro_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | "github.com/philpearl/avro" 8 | ) 9 | 10 | func TestBuildSchema(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | in any 14 | exp avro.Schema 15 | }{ 16 | { 17 | name: "int", 18 | in: struct { 19 | A int `json:"aaa"` 20 | }{}, 21 | exp: avro.Schema{ 22 | Type: "record", 23 | Object: &avro.SchemaObject{ 24 | Fields: []avro.SchemaRecordField{ 25 | { 26 | Name: "aaa", 27 | Type: avro.Schema{ 28 | Type: "long", 29 | }, 30 | }, 31 | }, 32 | }, 33 | }, 34 | }, 35 | { 36 | name: "int omitempty", 37 | in: struct { 38 | A int `json:"aaa,omitempty"` 39 | }{}, 40 | exp: avro.Schema{ 41 | Type: "record", 42 | Object: &avro.SchemaObject{ 43 | Fields: []avro.SchemaRecordField{ 44 | { 45 | Name: "aaa", 46 | Type: avro.Schema{ 47 | Type: "union", 48 | Union: []avro.Schema{ 49 | {Type: "null"}, 50 | {Type: "long"}, 51 | }, 52 | }, 53 | }, 54 | }, 55 | }, 56 | }, 57 | }, 58 | 59 | { 60 | name: "int skip unexported", 61 | in: struct { 62 | A int `json:"aaa"` 63 | b int 64 | }{}, 65 | exp: avro.Schema{ 66 | Type: "record", 67 | Object: &avro.SchemaObject{ 68 | Fields: []avro.SchemaRecordField{ 69 | { 70 | Name: "aaa", 71 | Type: avro.Schema{ 72 | Type: "long", 73 | }, 74 | }, 75 | }, 76 | }, 77 | }, 78 | }, 79 | { 80 | name: "int skip json", 81 | in: struct { 82 | A int `json:"aaa"` 83 | B int `json:"-"` 84 | }{}, 85 | exp: avro.Schema{ 86 | Type: "record", 87 | Object: &avro.SchemaObject{ 88 | Fields: []avro.SchemaRecordField{ 89 | { 90 | Name: "aaa", 91 | Type: avro.Schema{ 92 | Type: "long", 93 | }, 94 | }, 95 | }, 96 | }, 97 | }, 98 | }, 99 | { 100 | name: "int skip bq", 101 | in: struct { 102 | A int `json:"aaa"` 103 | B int `json:"bbb" bq:"-"` 104 | }{}, 105 | exp: avro.Schema{ 106 | Type: "record", 107 | Object: &avro.SchemaObject{ 108 | Fields: []avro.SchemaRecordField{ 109 | { 110 | Name: "aaa", 111 | Type: avro.Schema{ 112 | Type: "long", 113 | }, 114 | }, 115 | }, 116 | }, 117 | }, 118 | }, 119 | { 120 | name: "bool", 121 | in: struct { 122 | A bool `json:"aaa"` 123 | }{}, 124 | exp: avro.Schema{ 125 | Type: "record", 126 | Object: &avro.SchemaObject{ 127 | Fields: []avro.SchemaRecordField{ 128 | { 129 | Name: "aaa", 130 | Type: avro.Schema{ 131 | Type: "boolean", 132 | }, 133 | }, 134 | }, 135 | }, 136 | }, 137 | }, 138 | { 139 | name: "float32", 140 | in: struct { 141 | A float32 `json:"aaa"` 142 | }{}, 143 | exp: avro.Schema{ 144 | Type: "record", 145 | Object: &avro.SchemaObject{ 146 | Fields: []avro.SchemaRecordField{ 147 | { 148 | Name: "aaa", 149 | Type: avro.Schema{ 150 | Type: "double", 151 | }, 152 | }, 153 | }, 154 | }, 155 | }, 156 | }, 157 | { 158 | name: "float64", 159 | in: struct { 160 | A float64 `json:"aaa"` 161 | }{}, 162 | exp: avro.Schema{ 163 | Type: "record", 164 | Object: &avro.SchemaObject{ 165 | Fields: []avro.SchemaRecordField{ 166 | { 167 | Name: "aaa", 168 | Type: avro.Schema{ 169 | Type: "double", 170 | }, 171 | }, 172 | }, 173 | }, 174 | }, 175 | }, 176 | { 177 | name: "string", 178 | in: struct { 179 | A string `json:"aaa"` 180 | }{}, 181 | exp: avro.Schema{ 182 | Type: "record", 183 | Object: &avro.SchemaObject{ 184 | Fields: []avro.SchemaRecordField{ 185 | { 186 | Name: "aaa", 187 | Type: avro.Schema{ 188 | Type: "string", 189 | }, 190 | }, 191 | }, 192 | }, 193 | }, 194 | }, 195 | { 196 | name: "bytes", 197 | in: struct { 198 | A []byte `json:"aaa"` 199 | }{}, 200 | exp: avro.Schema{ 201 | Type: "record", 202 | Object: &avro.SchemaObject{ 203 | Fields: []avro.SchemaRecordField{ 204 | { 205 | Name: "aaa", 206 | Type: avro.Schema{ 207 | Type: "bytes", 208 | }, 209 | }, 210 | }, 211 | }, 212 | }, 213 | }, 214 | { 215 | name: "map", 216 | in: struct { 217 | A map[string]int `json:"aaa"` 218 | }{}, 219 | exp: avro.Schema{ 220 | Type: "record", 221 | Object: &avro.SchemaObject{ 222 | Fields: []avro.SchemaRecordField{ 223 | { 224 | Name: "aaa", 225 | Type: avro.Schema{ 226 | Type: "map", 227 | Object: &avro.SchemaObject{ 228 | Values: avro.Schema{ 229 | Type: "long", 230 | }, 231 | }, 232 | }, 233 | }, 234 | }, 235 | }, 236 | }, 237 | }, 238 | { 239 | name: "pointer to int", 240 | in: struct { 241 | A *int `json:"aaa"` 242 | }{}, 243 | exp: avro.Schema{ 244 | Type: "record", 245 | Object: &avro.SchemaObject{ 246 | Fields: []avro.SchemaRecordField{ 247 | { 248 | Name: "aaa", 249 | Type: avro.Schema{ 250 | Type: "union", 251 | Union: []avro.Schema{{Type: "null"}, {Type: "long"}}, 252 | }, 253 | }, 254 | }, 255 | }, 256 | }, 257 | }, 258 | { 259 | name: "struct", 260 | in: struct { 261 | A struct { 262 | B int `json:"bbb"` 263 | } `json:"aaa"` 264 | }{}, 265 | exp: avro.Schema{ 266 | Type: "record", 267 | Object: &avro.SchemaObject{ 268 | Fields: []avro.SchemaRecordField{ 269 | { 270 | Name: "aaa", 271 | Type: avro.Schema{ 272 | Type: "record", 273 | Object: &avro.SchemaObject{ 274 | Fields: []avro.SchemaRecordField{ 275 | { 276 | Name: "bbb", 277 | Type: avro.Schema{ 278 | Type: "long", 279 | }, 280 | }, 281 | }, 282 | }, 283 | }, 284 | }, 285 | }, 286 | }, 287 | }, 288 | }, 289 | { 290 | name: "struct ptr", 291 | in: struct { 292 | A *struct { 293 | B int `json:"bbb"` 294 | } `json:"aaa"` 295 | }{}, 296 | exp: avro.Schema{ 297 | Type: "record", 298 | Object: &avro.SchemaObject{ 299 | Fields: []avro.SchemaRecordField{ 300 | { 301 | Name: "aaa", 302 | Type: avro.Schema{ 303 | Type: "union", 304 | Union: []avro.Schema{ 305 | {Type: "null"}, 306 | { 307 | Type: "record", 308 | Object: &avro.SchemaObject{ 309 | Fields: []avro.SchemaRecordField{ 310 | { 311 | Name: "bbb", 312 | Type: avro.Schema{ 313 | Type: "long", 314 | }, 315 | }, 316 | }, 317 | }, 318 | }, 319 | }, 320 | }, 321 | }, 322 | }, 323 | }, 324 | }, 325 | }, 326 | 327 | { 328 | name: "struct slice", 329 | in: struct { 330 | A []struct { 331 | B int `json:"bbb"` 332 | } `json:"aaa"` 333 | }{}, 334 | exp: avro.Schema{ 335 | Type: "record", 336 | Object: &avro.SchemaObject{ 337 | Fields: []avro.SchemaRecordField{ 338 | { 339 | Name: "aaa", 340 | Type: avro.Schema{ 341 | Type: "array", 342 | Object: &avro.SchemaObject{ 343 | Items: avro.Schema{ 344 | Type: "record", 345 | Object: &avro.SchemaObject{ 346 | Fields: []avro.SchemaRecordField{ 347 | { 348 | Name: "bbb", 349 | Type: avro.Schema{ 350 | Type: "long", 351 | }, 352 | }, 353 | }, 354 | }, 355 | }, 356 | }, 357 | }, 358 | }, 359 | }, 360 | }, 361 | }, 362 | }, 363 | } 364 | 365 | for _, tt := range tests { 366 | t.Run(tt.name, func(t *testing.T) { 367 | got, err := avro.SchemaForType(tt.in) 368 | if err != nil { 369 | t.Fatal(err) 370 | } 371 | if diff := cmp.Diff(tt.exp, got); diff != "" { 372 | t.Errorf("BuildSchema() mismatch (-want +got):\n%s", diff) 373 | } 374 | }) 375 | } 376 | } 377 | -------------------------------------------------------------------------------- /bytes.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | type BytesCodec struct{ omitEmpty bool } 10 | 11 | func (BytesCodec) Read(r *ReadBuf, ptr unsafe.Pointer) error { 12 | l, err := r.Varint() 13 | if err != nil { 14 | return fmt.Errorf("failed to read length of bytes. %w", err) 15 | } 16 | if l == 0 { 17 | return nil 18 | } 19 | data, err := r.NextAsBytes(int(l)) 20 | if err != nil { 21 | return fmt.Errorf("failed to read %d bytes of bytes body. %w", l, err) 22 | } 23 | *(*[]byte)(ptr) = data 24 | return nil 25 | } 26 | 27 | func (BytesCodec) Skip(r *ReadBuf) error { 28 | l, err := r.Varint() 29 | if err != nil { 30 | return fmt.Errorf("failed to read length of bytes. %w", err) 31 | } 32 | return skip(r, l) 33 | } 34 | 35 | var bytesType = reflect.TypeFor[[]byte]() 36 | 37 | func (BytesCodec) New(r *ReadBuf) unsafe.Pointer { 38 | return r.Alloc(bytesType) 39 | } 40 | 41 | func (rc BytesCodec) Omit(p unsafe.Pointer) bool { 42 | return rc.omitEmpty && len(*(*[]byte)(p)) == 0 43 | } 44 | 45 | func (rc BytesCodec) Write(w *WriteBuf, p unsafe.Pointer) { 46 | sh := *(*[]byte)(p) 47 | 48 | w.Varint(int64(len(sh))) 49 | w.Write(sh) 50 | } 51 | -------------------------------------------------------------------------------- /bytes_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | "github.com/google/go-cmp/cmp/cmpopts" 9 | ) 10 | 11 | func TestBytesCodec(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | data []byte 15 | exp []byte 16 | }{ 17 | { 18 | name: "empty", 19 | data: []byte{0}, 20 | }, 21 | { 22 | name: "small", // 10 is 5 23 | data: []byte{10, 1, 2, 3, 4, 5}, 24 | exp: []byte{1, 2, 3, 4, 5}, 25 | }, 26 | } 27 | var c BytesCodec 28 | for _, test := range tests { 29 | test := test 30 | t.Run(test.name, func(t *testing.T) { 31 | t.Parallel() 32 | r := NewReadBuf(test.data) 33 | var actual []byte 34 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 35 | t.Fatal(err) 36 | } 37 | 38 | if diff := cmp.Diff(test.exp, actual); diff != "" { 39 | t.Fatalf("result not as expected. %s", diff) 40 | } 41 | if r.Len() != 0 { 42 | t.Fatalf("unread data %d", r.Len()) 43 | } 44 | }) 45 | t.Run(test.name+" skip", func(t *testing.T) { 46 | t.Parallel() 47 | r := NewReadBuf(test.data) 48 | if err := c.Skip(r); err != nil { 49 | t.Fatal(err) 50 | } 51 | if r.Len() != 0 { 52 | t.Fatalf("unread data %d", r.Len()) 53 | } 54 | }) 55 | 56 | } 57 | } 58 | 59 | func TestBytesRoundTrip(t *testing.T) { 60 | tests := []struct { 61 | name string 62 | in []byte 63 | }{ 64 | { 65 | name: "empty", 66 | in: []byte{}, 67 | }, 68 | { 69 | name: "zero", 70 | in: []byte{0}, 71 | }, 72 | 73 | { 74 | name: "hello", 75 | in: []byte("hello"), 76 | }, 77 | } 78 | 79 | var c BytesCodec 80 | for _, test := range tests { 81 | t.Run(test.name, func(t *testing.T) { 82 | buf := NewWriteBuf(nil) 83 | c.Write(buf, unsafe.Pointer(&test.in)) 84 | 85 | var actual []byte 86 | if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | if diff := cmp.Diff(test.in, actual, cmpopts.EquateEmpty()); diff != "" { 91 | t.Fatalf("output not as expected. %s", diff) 92 | } 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /codec.go: -------------------------------------------------------------------------------- 1 | // Package avro is an AVRO encoder and decoder aimed principly at decoding AVRO 2 | // output from Google's Big Query. It encodes directly from Go structs and 3 | // decodes directly into Go structs, and uses json tags as naming hints. 4 | // 5 | // The primary decoding interface is ReadFile. This reads an AVRO file, 6 | // combining the schema in the file with type information from the struct passed 7 | // via the out parameter to decode the records. It then passes an instance of a 8 | // struct of type out to the callback cb for each record in the file. 9 | // 10 | // Use an Encoder to write AVRO files. Create an Encoder using NewEncoderFor, then 11 | // call Encode to write a record, and finally call Flush before closing the file. 12 | // 13 | // You can implement custom decoders for your own types and register them via 14 | // the Register function. github.com/phil/avro/null is an example of custom 15 | // decoders for the types defined in github.com/unravelin/null 16 | package avro 17 | 18 | import ( 19 | "unsafe" 20 | ) 21 | 22 | // Codec defines an encoder / decoder for a type. 23 | // You can write custom Codecs for types. See Register and CodecBuildFunc 24 | type Codec interface { 25 | // Read reads the wire format bytes for the current field from r and sets up 26 | // the value that p points to. The codec can assume that the memory for an 27 | // instance of the type for which the codec is registered is present behind 28 | // p 29 | Read(r *ReadBuf, p unsafe.Pointer) error 30 | // Skip advances the reader over the bytes for the current field. 31 | Skip(r *ReadBuf) error 32 | // New creates a pointer to the type for which the codec is registered. It is 33 | // used if the enclosing record has a field that is a pointer to this type 34 | New(r *ReadBuf) unsafe.Pointer 35 | 36 | // Omit returns true if the value that p points to should be omitted from the 37 | // output. This is used for optional fields in records. 38 | Omit(p unsafe.Pointer) bool 39 | 40 | // Write writes the wire format bytes for the value that p points to to w. 41 | Write(w *WriteBuf, p unsafe.Pointer) 42 | } 43 | -------------------------------------------------------------------------------- /discard.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | func skip(r *ReadBuf, l int64) error { 4 | _, err := r.Next(int(l)) 5 | return err 6 | } 7 | -------------------------------------------------------------------------------- /encoder.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "reflect" 7 | "unsafe" 8 | ) 9 | 10 | type Encoder[T any] struct { 11 | schema Schema 12 | codec Codec 13 | fw *FileWriter 14 | w io.Writer 15 | 16 | approxBlockSize int 17 | wb *WriteBuf 18 | count int 19 | } 20 | 21 | // NewEncoderFor returns a new Encoder. Data will be written to w in Avro format, 22 | // including a schema header. The data will be compressed using the specified 23 | // compression algorithm. Data is written in blocks of at least approxBlockSize 24 | // bytes. A block is written when it reaches that size, or when Flush is called. 25 | func NewEncoderFor[T any](w io.Writer, compression Compression, approxBlockSize int) (*Encoder[T], error) { 26 | var t T 27 | 28 | typ := reflect.TypeFor[T]() 29 | if typ.Kind() != reflect.Struct { 30 | return nil, fmt.Errorf("only structs are supported, got %v", typ) 31 | } 32 | 33 | s, err := schemaForType(typ) 34 | if err != nil { 35 | return nil, fmt.Errorf("generating schema: %w", err) 36 | } 37 | 38 | c, err := s.Codec(t) 39 | if err != nil { 40 | return nil, fmt.Errorf("generating codec: %w", err) 41 | } 42 | 43 | schemaBytes, err := s.Marshal() 44 | if err != nil { 45 | return nil, fmt.Errorf("marshaling schema: %w", err) 46 | } 47 | 48 | fw, err := NewFileWriter(schemaBytes, compression) 49 | if err != nil { 50 | return nil, fmt.Errorf("creating file writer: %w", err) 51 | } 52 | 53 | if err := fw.WriteHeader(w); err != nil { 54 | return nil, fmt.Errorf("writing file header: %w", err) 55 | } 56 | 57 | return &Encoder[T]{ 58 | schema: s, 59 | codec: c, 60 | fw: fw, 61 | w: w, 62 | 63 | approxBlockSize: approxBlockSize, 64 | wb: NewWriteBuf(make([]byte, 0, approxBlockSize)), 65 | }, nil 66 | } 67 | 68 | // Encode writes a new row to the Avro file. 69 | func (e *Encoder[T]) Encode(v *T) error { 70 | e.codec.Write(e.wb, unsafe.Pointer(v)) 71 | e.count++ 72 | 73 | if e.wb.Len() >= e.approxBlockSize { 74 | if err := e.Flush(); err != nil { 75 | return fmt.Errorf("flushing: %w", err) 76 | } 77 | } 78 | 79 | return nil 80 | } 81 | 82 | // Flush writes any buffered data to the underlying writer. It completes the 83 | // current block. It must be called before closing the underlying file. 84 | func (e *Encoder[T]) Flush() error { 85 | if e.count > 0 { 86 | if err := e.fw.WriteBlock(e.w, e.count, e.wb.Bytes()); err != nil { 87 | return fmt.Errorf("writing block: %w", err) 88 | } 89 | e.count = 0 90 | e.wb.Reset() 91 | } 92 | return nil 93 | } 94 | -------------------------------------------------------------------------------- /encoder_test.go: -------------------------------------------------------------------------------- 1 | package avro_test 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | "unsafe" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | "github.com/google/go-cmp/cmp/cmpopts" 10 | "github.com/philpearl/avro" 11 | ) 12 | 13 | func TestEncoder(t *testing.T) { 14 | type myStruct struct { 15 | Name string `json:"name"` 16 | Hat string `json:",omitempty"` 17 | V int 18 | Q float64 19 | Bytes []byte 20 | La []int `json:"la"` 21 | W int32 `json:"w,omitempty"` 22 | Z *int64 `json:"z"` 23 | Mmm map[string]string 24 | } 25 | 26 | buf := bytes.NewBuffer(nil) 27 | 28 | enc, err := avro.NewEncoderFor[myStruct](buf, avro.CompressionSnappy, 10_000) 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | 33 | contents := []myStruct{ 34 | { 35 | Name: "jim", 36 | Hat: "cat", 37 | V: 31, 38 | Q: 3.14, 39 | Bytes: []byte{1, 2, 3, 4}, 40 | La: []int{1, 2, 3, 4}, 41 | W: 0, 42 | Z: new(int64), 43 | Mmm: map[string]string{"foo": "bar", "baz": "qux"}, 44 | }, 45 | { 46 | Name: "jim", 47 | Hat: "cat", 48 | V: 31, 49 | Q: 3.14, 50 | Bytes: []byte{1, 2, 3, 4}, 51 | La: []int{1, 2, 3, 4}, 52 | W: 0, 53 | Z: nil, 54 | Mmm: map[string]string{"foo": "bar", "baz": "qux"}, 55 | }, 56 | { 57 | Name: "jim", 58 | Hat: "cat", 59 | V: 31, 60 | Q: 0, 61 | Bytes: []byte{1, 2, 3, 4}, 62 | W: 0, 63 | Z: new(int64), 64 | Mmm: map[string]string{"foo": "bar", "baz": "qux"}, 65 | }, 66 | 67 | { 68 | Name: "jim", 69 | Hat: "cat", 70 | V: 31, 71 | Q: 0, 72 | Bytes: []byte{1, 2, 3, 4}, 73 | W: 0, 74 | Z: new(int64), 75 | }, 76 | {}, 77 | } 78 | 79 | for i := range contents { 80 | if err := enc.Encode(&contents[i]); err != nil { 81 | t.Fatal(err) 82 | } 83 | } 84 | 85 | if err := enc.Flush(); err != nil { 86 | t.Fatal(err) 87 | } 88 | 89 | var actual []myStruct 90 | if err := avro.ReadFile(buf, myStruct{}, func(val unsafe.Pointer, rb *avro.ResourceBank) error { 91 | v := *(*myStruct)(val) 92 | actual = append(actual, v) 93 | return nil 94 | }); err != nil { 95 | t.Fatal(err) 96 | } 97 | 98 | if diff := cmp.Diff(contents, actual, cmpopts.EquateEmpty()); diff != "" { 99 | t.Fatalf("result not as expected. %s", diff) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /file.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "compress/flate" 7 | "encoding/binary" 8 | "errors" 9 | "fmt" 10 | "hash/crc32" 11 | "io" 12 | "os" 13 | "reflect" 14 | "unsafe" 15 | 16 | "github.com/go-json-experiment/json" 17 | "github.com/golang/snappy" 18 | ) 19 | 20 | // FileHeader represents an AVRO file header 21 | type FileHeader struct { 22 | Magic [4]byte `json:"magic"` 23 | Meta map[string][]byte `json:"meta"` 24 | Sync [16]byte `json:"sync"` 25 | } 26 | 27 | // FileMagic is the magic number for AVRO files. 28 | var FileMagic = [4]byte{'O', 'b', 'j', 1} 29 | 30 | // Note this isn't actually used except in one test of schema encoding. 31 | var avroFileSchema = Schema{ 32 | Type: "record", 33 | Object: &SchemaObject{ 34 | Name: "org.apache.avro.file.Header", 35 | Fields: []SchemaRecordField{ 36 | { 37 | Name: "magic", 38 | Type: Schema{ 39 | Type: "fixed", 40 | Object: &SchemaObject{ 41 | Name: "Magic", 42 | Size: 4, 43 | }, 44 | }, 45 | }, 46 | { 47 | Name: "meta", 48 | Type: Schema{ 49 | Type: "map", 50 | Object: &SchemaObject{ 51 | Values: Schema{ 52 | Type: "bytes", 53 | }, 54 | }, 55 | }, 56 | }, 57 | { 58 | Name: "sync", 59 | Type: Schema{ 60 | Type: "fixed", 61 | Object: &SchemaObject{ 62 | Name: "Sync", 63 | Size: 16, 64 | }, 65 | }, 66 | }, 67 | }, 68 | }, 69 | } 70 | 71 | // FileSchema reads the Schema from an AVRO file. 72 | func FileSchema(filename string) (Schema, error) { 73 | f, err := os.Open(filename) 74 | if err != nil { 75 | return Schema{}, fmt.Errorf("failed to open file: %w", err) 76 | } 77 | defer f.Close() 78 | 79 | r := bufio.NewReader(f) 80 | 81 | fh, err := readFileHeader(r) 82 | if err != nil { 83 | return Schema{}, fmt.Errorf("failed to read AVRO file header: %w", err) 84 | } 85 | 86 | return fh.schema() 87 | } 88 | 89 | // Reader combines io.ByteReader and io.Reader. It's what we need to read 90 | type Reader interface { 91 | io.Reader 92 | io.ByteReader 93 | } 94 | 95 | // ReadFileFor is a type-safe version of ReadFile. 96 | // 97 | // var records []myrecord 98 | // if err := avro.ReadFileFor(f, func(val *myrecord, rb *avro.ResourceBank) error { 99 | // defer rb.Close() 100 | // records = append(records, *val) 101 | // return nil 102 | // }); err != nil { 103 | // return err 104 | // } 105 | 106 | func ReadFileFor[T any](r Reader, cb func(val *T, rb *ResourceBank) error) error { 107 | var t T 108 | return ReadFile(r, t, func(val unsafe.Pointer, rb *ResourceBank) error { 109 | return cb((*T)(val), rb) 110 | }) 111 | } 112 | 113 | // ReadFile reads from an AVRO file. The records in the file are decoded into 114 | // structs of the type indicated by out. These are fed back to the application 115 | // via the cb callback. ReadFile calls cb with a pointer to the struct and a 116 | // ResourceBank. The pointer is converted to an unsafe.Pointer. The pointer 117 | // should not be retained by the application past the return of cb. 118 | // 119 | // The data that val points to is allocated in a ResourceBank. When the 120 | // ResourceBank is closed the memory backing val is available for re-use. The 121 | // application should ensure data kept after that point is copied (e.g. by 122 | // calling strings.Clone for strings). 123 | // 124 | // var records []myrecord 125 | // if err := avro.ReadFile(f, myrecord{}, func(val unsafe.Pointer, rb *avro.ResourceBank) error { 126 | // defer rb.Close() 127 | // records = append(records, *(*record)(val)) 128 | // return nil 129 | // }); err != nil { 130 | // return err 131 | // } 132 | func ReadFile(r Reader, out any, cb func(val unsafe.Pointer, rb *ResourceBank) error) error { 133 | fh, err := readFileHeader(r) 134 | if err != nil { 135 | return err 136 | } 137 | 138 | var decoder compressionCodec 139 | if compress, ok := fh.Meta["avro.codec"]; ok { 140 | switch string(compress) { 141 | case "null": 142 | decoder = nullCompression{} 143 | case "deflate": 144 | decoder = &deflate{} 145 | case "snappy": 146 | decoder = &snappyCodec{} 147 | default: 148 | return fmt.Errorf("compression codec %s not supported", string(compress)) 149 | } 150 | } 151 | 152 | schema, err := fh.schema() 153 | if err != nil { 154 | return err 155 | } 156 | 157 | codec, err := schema.Codec(out) 158 | if err != nil { 159 | return fmt.Errorf("failed to build codec. %w", err) 160 | } 161 | 162 | // At this point we know out is either a struct or a pointer to a struct. 163 | // We repeat some work from schema.Codec 164 | typ := reflect.TypeOf(out) 165 | var rtyp, p unsafe.Pointer 166 | 167 | if typ.Kind() == reflect.Ptr { 168 | // Pointer to a struct is what we really want. We can write to this as 169 | // Go semantics would allow us to write to the underlying struct without 170 | // weird unsafe tricks 171 | typ = typ.Elem() 172 | rtyp = unpackEFace(typ).data 173 | p = unpackEFace(out).data 174 | } else { 175 | // We don't try to re-use the memory of the out variable. If Go passes a 176 | // value type in an interface it may use memory that it doesn't expect 177 | // to be changed. Writing to the memory of go value types that can't be 178 | // changed except via unsafe mechanisms is almost certainly dangerous! 179 | // See see https://philpearl.github.io/post/anathema/ for one case 180 | rtyp = unpackEFace(typ).data 181 | p = unsafe_New(rtyp) 182 | } 183 | 184 | var compressed []byte 185 | br := &ReadBuf{} 186 | for { 187 | count, err := binary.ReadVarint(r) 188 | if err != nil { 189 | if errors.Is(err, io.EOF) { 190 | return nil 191 | } 192 | return fmt.Errorf("reading item count. %w", err) 193 | } 194 | dataLength, err := binary.ReadVarint(r) 195 | if err != nil { 196 | return fmt.Errorf("reading data block length. %w", err) 197 | } 198 | if cap(compressed) < int(dataLength) { 199 | compressed = make([]byte, dataLength) 200 | } else { 201 | compressed = compressed[:dataLength] 202 | } 203 | if n, err := io.ReadFull(r, compressed); err != nil { 204 | return fmt.Errorf("reading %d bytes of compressed data: %w after %d bytes", dataLength, err, n) 205 | } 206 | uncompressed, err := decoder.decompress(compressed) 207 | if err != nil { 208 | return fmt.Errorf("decompress failed: %w", err) 209 | } 210 | 211 | br.Reset(uncompressed) 212 | 213 | for i := range count { 214 | // TODO: might be better to allocate vals in blocks 215 | // Zero the data 216 | typedmemclr(rtyp, p) 217 | if err := codec.Read(br, p); err != nil { 218 | return fmt.Errorf("failed to read item %d in file. %w", i, err) 219 | } 220 | 221 | if err := cb(p, br.ExtractResourceBank()); err != nil { 222 | return err 223 | } 224 | } 225 | 226 | // Check the signature. 227 | var sig [16]byte 228 | if _, err := io.ReadFull(r, sig[:]); err != nil { 229 | return fmt.Errorf("failed reading block signature. %w", err) 230 | } 231 | if sig != fh.Sync { 232 | return fmt.Errorf("sync block does not match. Have %X, want %X", sig, fh.Sync) 233 | } 234 | } 235 | } 236 | 237 | func readFileHeader(r Reader) (fh FileHeader, err error) { 238 | // It would kind of make sense to use our codecs to read the header, but for 239 | // perf reasons we don't want to use a normal reader there 240 | if _, err := io.ReadFull(r, fh.Magic[:]); err != nil { 241 | return fh, fmt.Errorf("failed to read file magic: %w", err) 242 | } 243 | if fh.Magic != FileMagic { 244 | return fh, fmt.Errorf("file header Magic is not correct") 245 | } 246 | 247 | fh.Meta = make(map[string][]byte) 248 | // Seriously there's only going to be one block 249 | for { 250 | count, err := binary.ReadVarint(r) 251 | if err != nil { 252 | return fh, fmt.Errorf("failed to read count of map block. %w", err) 253 | } 254 | if count == 0 { 255 | break 256 | } 257 | if count < 0 { 258 | return fh, fmt.Errorf("negative block size not supported in file header") 259 | } 260 | 261 | for ; count > 0; count-- { 262 | key, err := readBytes(r) 263 | if err != nil { 264 | return fh, fmt.Errorf("failed to read key for map. %w", err) 265 | } 266 | 267 | val, err := readBytes(r) 268 | if err != nil { 269 | return fh, fmt.Errorf("failed to read value for map. %w", err) 270 | } 271 | // Put the thing in the thing 272 | fh.Meta[string(key)] = val 273 | } 274 | } 275 | 276 | if _, err := io.ReadFull(r, fh.Sync[:]); err != nil { 277 | return fh, fmt.Errorf("failed to read file sync: %w", err) 278 | } 279 | 280 | return fh, nil 281 | } 282 | 283 | func readBytes(r Reader) ([]byte, error) { 284 | l, err := binary.ReadVarint(r) 285 | if err != nil { 286 | return nil, err 287 | } 288 | v := make([]byte, l) 289 | _, err = io.ReadFull(r, v) 290 | return v, err 291 | } 292 | 293 | func (fh FileHeader) schema() (schema Schema, err error) { 294 | schemaJSON, ok := fh.Meta["avro.schema"] 295 | if !ok { 296 | return schema, fmt.Errorf("no schema found in file header") 297 | } 298 | 299 | if err := json.Unmarshal(schemaJSON, &schema); err != nil { 300 | return schema, fmt.Errorf("could not decode schema JSON from file header. %w", err) 301 | } 302 | 303 | return schema, nil 304 | } 305 | 306 | type compressionCodec interface { 307 | decompress(compressed []byte) ([]byte, error) 308 | compress(uncompressed []byte) ([]byte, error) 309 | } 310 | 311 | type nullCompression struct{} 312 | 313 | func (nullCompression) decompress(compressed []byte) ([]byte, error) { 314 | return compressed, nil 315 | } 316 | 317 | func (nullCompression) compress(uncompressed []byte) ([]byte, error) { 318 | return uncompressed, nil 319 | } 320 | 321 | type deflate struct { 322 | reader io.Reader 323 | writer *flate.Writer 324 | buf bytes.Reader 325 | out bytes.Buffer 326 | } 327 | 328 | func (d *deflate) decompress(compressed []byte) ([]byte, error) { 329 | d.buf.Reset(compressed) 330 | if d.reader == nil { 331 | d.reader = flate.NewReader(nil) 332 | } 333 | d.reader.(flate.Resetter).Reset(&d.buf, nil) 334 | 335 | d.out.Reset() 336 | d.out.ReadFrom(d.reader) 337 | 338 | return d.out.Bytes(), nil 339 | } 340 | 341 | func (d *deflate) compress(uncompressed []byte) ([]byte, error) { 342 | d.out.Reset() 343 | if d.writer == nil { 344 | d.writer, _ = flate.NewWriter(&d.out, flate.DefaultCompression) 345 | } 346 | d.writer.Reset(&d.out) 347 | if _, err := d.writer.Write(uncompressed); err != nil { 348 | return nil, fmt.Errorf("writing to deflate compressor: %w", err) 349 | } 350 | if err := d.writer.Close(); err != nil { 351 | return nil, fmt.Errorf("flushing deflate compressor: %w", err) 352 | } 353 | 354 | return d.out.Bytes(), nil 355 | } 356 | 357 | type snappyCodec struct { 358 | buf []byte 359 | } 360 | 361 | func (s *snappyCodec) decompress(compressed []byte) ([]byte, error) { 362 | var err error 363 | s.buf, err = snappy.Decode(s.buf[:cap(s.buf)], compressed[:len(compressed)-4]) 364 | if err != nil { 365 | return nil, fmt.Errorf("snappy decode failed: %w", err) 366 | } 367 | 368 | crc := binary.BigEndian.Uint32(compressed[len(compressed)-4:]) 369 | if crc32.ChecksumIEEE(s.buf) != crc { 370 | return nil, errors.New("snappy checksum mismatch") 371 | } 372 | 373 | return s.buf, nil 374 | } 375 | 376 | func (s *snappyCodec) compress(uncompressed []byte) ([]byte, error) { 377 | s.buf = snappy.Encode(s.buf[:cap(s.buf)], uncompressed) 378 | crc := crc32.ChecksumIEEE(uncompressed) 379 | s.buf = binary.BigEndian.AppendUint32(s.buf, crc) 380 | 381 | return s.buf, nil 382 | } 383 | -------------------------------------------------------------------------------- /file_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "testing" 7 | "unsafe" 8 | 9 | "github.com/google/go-cmp/cmp" 10 | ) 11 | 12 | func TestReadFile(t *testing.T) { 13 | f, err := os.Open("./testdata/avro1") 14 | if err != nil { 15 | t.Fatal(err) 16 | } 17 | defer f.Close() 18 | 19 | type obj struct { 20 | Typ string `json:"typ,omitempty"` 21 | Size float64 `json:"size,omitempty"` 22 | } 23 | type entry struct { 24 | Name string `json:"name,omitempty"` 25 | Number int64 `json:"number"` 26 | Owns []obj `json:"owns,omitempty"` 27 | } 28 | 29 | var actual []entry 30 | if err := ReadFileFor(bufio.NewReader(f), func(val *entry, sb *ResourceBank) error { 31 | actual = append(actual, *val) 32 | return nil 33 | }); err != nil { 34 | t.Fatal(err) 35 | } 36 | 37 | exp := []entry{ 38 | { 39 | Name: "jim", 40 | Number: 1, 41 | Owns: []obj{ 42 | { 43 | Typ: "hat", 44 | Size: 1, 45 | }, 46 | { 47 | Typ: "shoe", 48 | Size: 42, 49 | }, 50 | }, 51 | }, 52 | { 53 | Name: "fred", 54 | Number: 1, 55 | Owns: []obj{ 56 | { 57 | Typ: "bag", 58 | Size: 3.7, 59 | }, 60 | }, 61 | }, 62 | } 63 | 64 | if diff := cmp.Diff(exp, actual); diff != "" { 65 | t.Fatalf("result differs. %s", diff) 66 | } 67 | } 68 | 69 | func TestReadFileAlt(t *testing.T) { 70 | f, err := os.Open("./testdata/avro1") 71 | if err != nil { 72 | t.Fatal(err) 73 | } 74 | defer f.Close() 75 | 76 | type obj struct { 77 | Typ string `json:"typ,omitempty"` 78 | Size *float32 `json:"size,omitempty"` 79 | } 80 | type entry struct { 81 | Name *string `json:"name,omitempty"` 82 | Number **int32 `json:"number"` 83 | Owns *[]*obj `json:"owns,omitempty"` 84 | } 85 | 86 | var actual []entry 87 | var sbs []*ResourceBank 88 | if err := ReadFile(bufio.NewReader(f), &entry{}, func(val unsafe.Pointer, sb *ResourceBank) error { 89 | actual = append(actual, *(*entry)(val)) 90 | sbs = append(sbs, sb) 91 | return nil 92 | }); err != nil { 93 | t.Fatal(err) 94 | } 95 | 96 | strptr := func(v string) *string { 97 | return &v 98 | } 99 | floatptr := func(v float32) *float32 { 100 | return &v 101 | } 102 | var one int32 = 1 103 | oneptr := &one 104 | 105 | exp := []entry{ 106 | { 107 | Name: strptr("jim"), 108 | Number: &oneptr, 109 | Owns: &[]*obj{ 110 | { 111 | Typ: "hat", 112 | Size: floatptr(1), 113 | }, 114 | { 115 | Typ: "shoe", 116 | Size: floatptr(42), 117 | }, 118 | }, 119 | }, 120 | { 121 | Name: strptr("fred"), 122 | Number: &oneptr, 123 | Owns: &[]*obj{ 124 | { 125 | Typ: "bag", 126 | Size: floatptr(3.7), 127 | }, 128 | }, 129 | }, 130 | } 131 | 132 | if diff := cmp.Diff(exp, actual); diff != "" { 133 | t.Fatalf("result differs. %s", diff) 134 | } 135 | for _, sb := range sbs { 136 | sb.Close() 137 | } 138 | } 139 | 140 | func TestFileSchema(t *testing.T) { 141 | schema, err := FileSchema("./testdata/avro1") 142 | if err != nil { 143 | t.Fatal(err) 144 | } 145 | if diff := cmp.Diff(Schema{ 146 | Type: "record", 147 | Object: &SchemaObject{ 148 | Name: "Root", 149 | Fields: []SchemaRecordField{ 150 | { 151 | Name: "name", 152 | Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "string"}}}, 153 | }, 154 | { 155 | Name: "number", 156 | Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "long"}}}, 157 | }, 158 | { 159 | Name: "owns", 160 | Type: Schema{ 161 | Type: "array", 162 | Object: &SchemaObject{ 163 | Items: Schema{ 164 | Type: "record", 165 | Object: &SchemaObject{ 166 | Name: "Owns", 167 | Namespace: "root", 168 | Fields: []SchemaRecordField{ 169 | { 170 | Name: "typ", 171 | Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "string"}}}, 172 | }, 173 | { 174 | Name: "size", 175 | Type: Schema{Type: "union", Union: []Schema{{Type: "null"}, {Type: "double"}}}, 176 | }, 177 | }, 178 | }, 179 | }, 180 | }, 181 | }, 182 | }, 183 | }, 184 | }, 185 | }, schema); diff != "" { 186 | t.Fatalf("not as expected: %s", diff) 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /filewriter.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "crypto/rand" 5 | "encoding/binary" 6 | "fmt" 7 | "io" 8 | ) 9 | 10 | type Compression string 11 | 12 | const ( 13 | CompressionNull Compression = "null" 14 | CompressionDeflate Compression = "deflate" 15 | CompressionSnappy Compression = "snappy" 16 | ) 17 | 18 | // FileWriter provides limited support for writing AVRO files. It allows you to 19 | // write blocks of already encoded data. Actually encoding data as AVRO is supported 20 | // by the Encoder type. 21 | type FileWriter struct { 22 | sync [16]byte 23 | // It may make sense for the schema to be a Schema object. But we won't use 24 | // that until we have encoding support. 25 | schema []byte 26 | compression Compression 27 | varintBuf [binary.MaxVarintLen64]byte 28 | compressor compressionCodec 29 | } 30 | 31 | // NewFileWriter creates a new FileWriter. The schema is the JSON encoded 32 | // schema. The compression parameter indicates the compression codec to use. 33 | func NewFileWriter(schema []byte, compression Compression) (*FileWriter, error) { 34 | // Generate a random sync value 35 | f := &FileWriter{ 36 | schema: schema, 37 | compression: compression, 38 | } 39 | _, err := rand.Read(f.sync[:]) 40 | if err != nil { 41 | return nil, fmt.Errorf("creating sync value: %w", err) 42 | } 43 | 44 | switch compression { 45 | case CompressionNull: 46 | f.compressor = nullCompression{} 47 | case CompressionDeflate: 48 | f.compressor = &deflate{} 49 | case CompressionSnappy: 50 | f.compressor = &snappyCodec{} 51 | default: 52 | return nil, fmt.Errorf("compression codec %s not supported", compression) 53 | } 54 | 55 | return f, nil 56 | } 57 | 58 | // WriteHeader writes the AVRO file header to the writer. 59 | func (f *FileWriter) WriteHeader(w io.Writer) error { 60 | buf := make([]byte, 0, 1024) 61 | buf = f.AppendHeader(buf) 62 | _, err := w.Write(buf) 63 | return err 64 | } 65 | 66 | // AppendHeader appends the AVRO file header to the provided buffer. 67 | func (f *FileWriter) AppendHeader(buf []byte) []byte { 68 | // Write the magic bytes 69 | buf = append(buf, FileMagic[:]...) 70 | 71 | // Count of how many metadata blocks there are. 72 | buf = binary.AppendVarint(buf, 2) 73 | 74 | // Write the metadata block. There will be an entry for the compression type 75 | // and an entry for the schema. Each entry is a string key followed by a 76 | // string value. Strings are written as a varint encoded length and then the 77 | // bytes of the string. 78 | buf = appendString(buf, "avro.schema") 79 | buf = appendString(buf, f.schema) 80 | buf = appendString(buf, "avro.codec") 81 | buf = appendString(buf, f.compression) 82 | 83 | // Append a zero count to indicate no more header blocks. 84 | buf = binary.AppendVarint(buf, 0) 85 | 86 | // Write the sync bytes. This is just the 16 bytes of the sync field. 87 | buf = append(buf, f.sync[:]...) 88 | return buf 89 | } 90 | 91 | type appendable interface { 92 | ~string | ~[]byte 93 | } 94 | 95 | func appendString[T appendable](buf []byte, s T) []byte { 96 | buf = binary.AppendVarint(buf, int64(len(s))) 97 | buf = append(buf, s...) 98 | return buf 99 | } 100 | 101 | func (f *FileWriter) writeVarInt(w io.Writer, v int) error { 102 | n := binary.PutVarint(f.varintBuf[:], int64(v)) 103 | _, err := w.Write(f.varintBuf[:n]) 104 | return err 105 | } 106 | 107 | // WriteBlock writes a block of data to the writer. The block must be rowCount 108 | // rows of AVRO encoded data. 109 | func (f *FileWriter) WriteBlock(w io.Writer, rowCount int, block []byte) error { 110 | // Write the count of rows in the block 111 | if err := f.writeVarInt(w, rowCount); err != nil { 112 | return fmt.Errorf("writing row count: %w", err) 113 | } 114 | 115 | compressed, err := f.compressor.compress(block) 116 | if err != nil { 117 | return fmt.Errorf("compressing block: %w", err) 118 | } 119 | 120 | // Write the (compressed) block size 121 | if err := f.writeVarInt(w, len(compressed)); err != nil { 122 | return fmt.Errorf("writing block len: %w", err) 123 | } 124 | 125 | // Write the block data. 126 | if _, err := w.Write(compressed); err != nil { 127 | return fmt.Errorf("writing block: %w", err) 128 | } 129 | 130 | // Write the sync block 131 | if _, err := w.Write(f.sync[:]); err != nil { 132 | return fmt.Errorf("writing sync: %w", err) 133 | } 134 | return nil 135 | } 136 | -------------------------------------------------------------------------------- /filewriter_test.go: -------------------------------------------------------------------------------- 1 | package avro_test 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | "unsafe" 9 | 10 | "github.com/google/go-cmp/cmp" 11 | "github.com/philpearl/avro" 12 | ) 13 | 14 | func TestWritingFile(t *testing.T) { 15 | type record struct { 16 | Name string `json:"name"` 17 | Hat string `json:"hat"` 18 | } 19 | 20 | schema := avro.Schema{ 21 | Type: "record", 22 | Object: &avro.SchemaObject{ 23 | Name: "Record", 24 | Fields: []avro.SchemaRecordField{ 25 | { 26 | Name: "name", 27 | Type: avro.Schema{ 28 | Type: "string", 29 | }, 30 | }, 31 | { 32 | Name: "hat", 33 | Type: avro.Schema{ 34 | Type: "string", 35 | }, 36 | }, 37 | }, 38 | }, 39 | } 40 | 41 | schemaJSON, err := schema.Marshal() 42 | if err != nil { 43 | t.Fatal(err) 44 | } 45 | 46 | data := []byte{ 47 | 6, 'j', 'i', 'm', 48 | 6, 'c', 'a', 't', 49 | 50 | 6, 's', 'i', 'm', 51 | 6, 'h', 'a', 't', 52 | } 53 | 54 | for _, compression := range []avro.Compression{avro.CompressionDeflate, avro.CompressionSnappy} { 55 | t.Run(string(compression), func(t *testing.T) { 56 | dir := t.TempDir() 57 | filename := filepath.Join(dir, "test.avro") 58 | 59 | f, err := os.Create(filename) 60 | if err != nil { 61 | t.Fatal(err) 62 | } 63 | defer f.Close() 64 | 65 | fw, err := avro.NewFileWriter(schemaJSON, compression) 66 | if err != nil { 67 | t.Fatal(err) 68 | } 69 | 70 | if err := fw.WriteHeader(f); err != nil { 71 | t.Fatal(err) 72 | } 73 | 74 | if err := fw.WriteBlock(f, 2, data); err != nil { 75 | t.Fatal(err) 76 | } 77 | 78 | if err := f.Close(); err != nil { 79 | t.Fatal(err) 80 | } 81 | 82 | r, err := os.Open(filename) 83 | if err != nil { 84 | t.Fatal(err) 85 | } 86 | defer r.Close() 87 | 88 | var records []record 89 | 90 | if err := avro.ReadFile(bufio.NewReader(r), record{}, func(val unsafe.Pointer, rb *avro.ResourceBank) error { 91 | r := (*record)(val) 92 | t.Logf("read record: %+v", r) 93 | records = append(records, *r) 94 | return nil 95 | }); err != nil { 96 | t.Fatal(err) 97 | } 98 | 99 | if diff := cmp.Diff([]record{ 100 | {Name: "jim", Hat: "cat"}, 101 | {Name: "sim", Hat: "hat"}, 102 | }, records); diff != "" { 103 | t.Fatal(diff) 104 | } 105 | }) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /fixed.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "unsafe" 6 | ) 7 | 8 | type fixedCodec struct { 9 | Size int 10 | } 11 | 12 | type sliceHeader struct { 13 | Data unsafe.Pointer 14 | Len int 15 | Cap int 16 | } 17 | 18 | func (f fixedCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 19 | // p points to an array of size f.Size 20 | sh := unsafe.Slice((*byte)(p), f.Size) 21 | data, err := r.Next(f.Size) 22 | copy(sh, data) 23 | return err 24 | } 25 | 26 | func (f fixedCodec) Skip(r *ReadBuf) error { 27 | return skip(r, int64(f.Size)) 28 | } 29 | 30 | func (f fixedCodec) New(r *ReadBuf) unsafe.Pointer { 31 | return r.AllocArray(reflect.TypeFor[byte](), f.Size) 32 | } 33 | 34 | func (rc fixedCodec) Omit(p unsafe.Pointer) bool { 35 | return false 36 | } 37 | 38 | func (rc fixedCodec) Write(w *WriteBuf, p unsafe.Pointer) { 39 | sh := unsafe.Slice((*byte)(p), rc.Size) 40 | w.Write(sh) 41 | } 42 | -------------------------------------------------------------------------------- /fixed_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | ) 9 | 10 | func TestFixed(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | data []byte 14 | exp [3]byte 15 | }{ 16 | { 17 | name: "basic", 18 | data: []byte{1, 2, 3}, 19 | exp: [3]byte{1, 2, 3}, 20 | }, 21 | } 22 | 23 | for _, test := range tests { 24 | test := test 25 | t.Run(test.name, func(t *testing.T) { 26 | t.Parallel() 27 | c := fixedCodec{Size: 3} 28 | b := NewReadBuf(test.data) 29 | var actual [3]byte 30 | if err := c.Read(b, unsafe.Pointer(&actual)); err != nil { 31 | t.Fatal(err) 32 | } 33 | if diff := cmp.Diff(test.exp, actual); diff != "" { 34 | t.Fatalf("result differs: %s", diff) 35 | } 36 | if b.Len() != 0 { 37 | t.Fatalf("Not all data read: %d", b.Len()) 38 | } 39 | }) 40 | t.Run(test.name+" skip", func(t *testing.T) { 41 | t.Parallel() 42 | c := fixedCodec{Size: 3} 43 | b := NewReadBuf(test.data) 44 | if err := c.Skip(b); err != nil { 45 | t.Fatal(err) 46 | } 47 | if b.Len() != 0 { 48 | t.Fatalf("Not all data read: %d", b.Len()) 49 | } 50 | }) 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /float.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | type floatCodec[t float32 | float64] struct{ omitEmpty bool } 10 | 11 | func (floatCodec[T]) Read(r *ReadBuf, p unsafe.Pointer) error { 12 | // This works for little-endian only (or is it bigendian?) 13 | return fixedCodec{Size: int(unsafe.Sizeof(T(0)))}.Read(r, p) 14 | } 15 | 16 | func (floatCodec[T]) Skip(r *ReadBuf) error { 17 | return skip(r, int64(unsafe.Sizeof(T(0)))) 18 | } 19 | 20 | var ( 21 | floatType = reflect.TypeFor[float32]() 22 | doubleType = reflect.TypeFor[float64]() 23 | ) 24 | 25 | func (floatCodec[T]) New(r *ReadBuf) unsafe.Pointer { 26 | switch unsafe.Sizeof(T(0)) { 27 | case 4: 28 | return r.Alloc(floatType) 29 | case 8: 30 | return r.Alloc(doubleType) 31 | } 32 | panic(fmt.Sprintf("unexpected float size %d", unsafe.Sizeof(T(0)))) 33 | } 34 | 35 | func (rc floatCodec[T]) Omit(p unsafe.Pointer) bool { 36 | return rc.omitEmpty && *(*T)(p) == 0 37 | } 38 | 39 | func (rc floatCodec[T]) Write(w *WriteBuf, p unsafe.Pointer) { 40 | fixedCodec{Size: int(unsafe.Sizeof(T(0)))}.Write(w, p) 41 | } 42 | 43 | type ( 44 | FloatCodec = floatCodec[float32] 45 | DoubleCodec = floatCodec[float64] 46 | ) 47 | 48 | type Float32DoubleCodec struct { 49 | DoubleCodec 50 | } 51 | 52 | func (c Float32DoubleCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 53 | var f float64 54 | if err := c.DoubleCodec.Read(r, unsafe.Pointer(&f)); err != nil { 55 | return err 56 | } 57 | *(*float32)(p) = float32(f) 58 | return nil 59 | } 60 | 61 | func (Float32DoubleCodec) New(r *ReadBuf) unsafe.Pointer { 62 | return r.Alloc(floatType) 63 | } 64 | 65 | func (rc Float32DoubleCodec) Omit(p unsafe.Pointer) bool { 66 | return rc.omitEmpty && *(*float32)(p) == 0 67 | } 68 | 69 | func (rc Float32DoubleCodec) Write(w *WriteBuf, p unsafe.Pointer) { 70 | q := float64(*(*float32)(p)) 71 | fixedCodec{Size: 8}.Write(w, unsafe.Pointer(&q)) 72 | } 73 | -------------------------------------------------------------------------------- /float_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | "unsafe" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | "github.com/google/go-cmp/cmp/cmpopts" 10 | ) 11 | 12 | func TestFloatCodec(t *testing.T) { 13 | tests := []struct { 14 | name string 15 | data []byte 16 | exp float32 17 | }{ 18 | { 19 | name: "zero", 20 | data: []byte{0, 0, 0, 0}, 21 | }, 22 | { 23 | name: "something", 24 | data: []byte{0, 1, 0, 0}, 25 | exp: 3.587324068671532e-43, 26 | }, 27 | } 28 | var c FloatCodec 29 | for _, test := range tests { 30 | test := test 31 | t.Run(test.name, func(t *testing.T) { 32 | t.Parallel() 33 | r := NewReadBuf(test.data) 34 | var actual float32 35 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 36 | t.Fatal(err) 37 | } 38 | 39 | if diff := cmp.Diff(test.exp, actual); diff != "" { 40 | t.Fatalf("result not as expected. %s", diff) 41 | } 42 | if r.Len() != 0 { 43 | t.Fatalf("unread data %d", r.Len()) 44 | } 45 | }) 46 | t.Run(test.name+" skip", func(t *testing.T) { 47 | t.Parallel() 48 | r := NewReadBuf(test.data) 49 | if err := c.Skip(r); err != nil { 50 | t.Fatal(err) 51 | } 52 | if r.Len() != 0 { 53 | t.Fatalf("unread data %d", r.Len()) 54 | } 55 | }) 56 | } 57 | } 58 | 59 | func TestDoubleCodec(t *testing.T) { 60 | tests := []struct { 61 | name string 62 | data []byte 63 | exp float64 64 | }{ 65 | { 66 | name: "zero", 67 | data: []byte{0, 0, 0, 0, 0, 0, 0, 0}, 68 | }, 69 | { 70 | name: "something", 71 | data: []byte{0, 1, 0, 0, 0, 0, 0, 0}, 72 | exp: 1.265e-321, 73 | }, 74 | } 75 | var c DoubleCodec 76 | for _, test := range tests { 77 | test := test 78 | t.Run(test.name, func(t *testing.T) { 79 | t.Parallel() 80 | r := NewReadBuf(test.data) 81 | var actual float64 82 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 83 | t.Fatal(err) 84 | } 85 | 86 | if diff := cmp.Diff(test.exp, actual); diff != "" { 87 | t.Fatalf("result not as expected. %s", diff) 88 | } 89 | if r.Len() != 0 { 90 | t.Fatalf("unread data %d", r.Len()) 91 | } 92 | }) 93 | t.Run(test.name+" skip", func(t *testing.T) { 94 | t.Parallel() 95 | r := NewReadBuf(test.data) 96 | if err := c.Skip(r); err != nil { 97 | t.Fatal(err) 98 | } 99 | if r.Len() != 0 { 100 | t.Fatalf("unread data %d", r.Len()) 101 | } 102 | }) 103 | } 104 | } 105 | 106 | func TestFloat32DoubleCodec(t *testing.T) { 107 | tests := []struct { 108 | name string 109 | data []byte 110 | exp float32 111 | }{ 112 | { 113 | name: "zero", 114 | data: []byte{0, 0, 0, 0, 0, 0, 0, 0}, 115 | }, 116 | { 117 | name: "something", 118 | data: []byte{0, 1, 0, 0, 0, 0, 0, 0}, 119 | exp: 1.265e-321, 120 | }, 121 | } 122 | var c Float32DoubleCodec 123 | for _, test := range tests { 124 | test := test 125 | t.Run(test.name, func(t *testing.T) { 126 | t.Parallel() 127 | r := NewReadBuf(test.data) 128 | var actual float32 129 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 130 | t.Fatal(err) 131 | } 132 | 133 | if diff := cmp.Diff(test.exp, actual); diff != "" { 134 | t.Fatalf("result not as expected. %s", diff) 135 | } 136 | if r.Len() != 0 { 137 | t.Fatalf("unread data %d", r.Len()) 138 | } 139 | }) 140 | t.Run(test.name+" skip", func(t *testing.T) { 141 | t.Parallel() 142 | r := NewReadBuf(test.data) 143 | if err := c.Skip(r); err != nil { 144 | t.Fatal(err) 145 | } 146 | if r.Len() != 0 { 147 | t.Fatalf("unread data %d", r.Len()) 148 | } 149 | }) 150 | } 151 | } 152 | 153 | func TestFloatRoundTrip(t *testing.T) { 154 | tests := []struct { 155 | name string 156 | val float32 157 | }{ 158 | { 159 | name: "zero", 160 | val: 0, 161 | }, 162 | { 163 | name: "something", 164 | val: 3.587324068671532e-43, 165 | }, 166 | { 167 | name: "negative", 168 | val: -3.587324068671532e-43, 169 | }, 170 | 171 | { 172 | name: "max", 173 | val: 3.4028234663852886e+38, 174 | }, 175 | { 176 | name: "NAN", 177 | val: float32(math.NaN()), 178 | }, 179 | } 180 | 181 | for _, test := range tests { 182 | t.Run(test.name, func(t *testing.T) { 183 | t.Parallel() 184 | w := NewWriteBuf(nil) 185 | var c FloatCodec 186 | c.Write(w, unsafe.Pointer(&test.val)) 187 | r := NewReadBuf(w.Bytes()) 188 | var actual float32 189 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 190 | t.Fatal(err) 191 | } 192 | if diff := cmp.Diff(test.val, actual, cmpopts.EquateNaNs()); diff != "" { 193 | t.Fatalf("result not as expected. %s", diff) 194 | } 195 | }) 196 | } 197 | } 198 | 199 | func TestDoubleRoundTrip(t *testing.T) { 200 | tests := []struct { 201 | name string 202 | val float64 203 | }{ 204 | { 205 | name: "zero", 206 | val: 0, 207 | }, 208 | { 209 | name: "something", 210 | val: 3.587324068671532e-43, 211 | }, 212 | { 213 | name: "negative", 214 | val: -3.587324068671532e-43, 215 | }, 216 | 217 | { 218 | name: "max", 219 | val: 3.4028234663852886e+38, 220 | }, 221 | { 222 | name: "NAN", 223 | val: math.NaN(), 224 | }, 225 | } 226 | 227 | for _, test := range tests { 228 | t.Run(test.name, func(t *testing.T) { 229 | t.Parallel() 230 | w := NewWriteBuf(nil) 231 | var c DoubleCodec 232 | c.Write(w, unsafe.Pointer(&test.val)) 233 | r := NewReadBuf(w.Bytes()) 234 | var actual float64 235 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 236 | t.Fatal(err) 237 | } 238 | if diff := cmp.Diff(test.val, actual, cmpopts.EquateNaNs()); diff != "" { 239 | t.Fatalf("result not as expected. %s", diff) 240 | } 241 | }) 242 | } 243 | } 244 | 245 | func TestFloat32DoubleRoundTrip(t *testing.T) { 246 | tests := []struct { 247 | name string 248 | val float32 249 | }{ 250 | { 251 | name: "zero", 252 | val: 0, 253 | }, 254 | { 255 | name: "something", 256 | val: 3.587324068671532e-43, 257 | }, 258 | { 259 | name: "negative", 260 | val: -3.587324068671532e-43, 261 | }, 262 | 263 | { 264 | name: "max", 265 | val: 3.4028234663852886e+38, 266 | }, 267 | { 268 | name: "NAN", 269 | val: float32(math.NaN()), 270 | }, 271 | } 272 | 273 | for _, test := range tests { 274 | t.Run(test.name, func(t *testing.T) { 275 | t.Parallel() 276 | w := NewWriteBuf(nil) 277 | var c Float32DoubleCodec 278 | c.Write(w, unsafe.Pointer(&test.val)) 279 | r := NewReadBuf(w.Bytes()) 280 | var actual float32 281 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 282 | t.Fatal(err) 283 | } 284 | if diff := cmp.Diff(test.val, actual, cmpopts.EquateNaNs()); diff != "" { 285 | t.Fatalf("result not as expected. %s", diff) 286 | } 287 | }) 288 | } 289 | } 290 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/philpearl/avro 2 | 3 | go 1.24 4 | 5 | toolchain go1.24.0 6 | 7 | replace github.com/unravelin/null => github.com/unravelin/null/v5 v5.0.1 8 | 9 | require ( 10 | github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1 11 | github.com/golang/snappy v1.0.0 12 | github.com/google/go-cmp v0.7.0 13 | github.com/unravelin/null v1.0.2 14 | ) 15 | 16 | require ( 17 | github.com/josharian/intern v1.0.0 // indirect 18 | github.com/mailru/easyjson v0.9.0 // indirect 19 | ) 20 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1 h1:+VexzzkMLb1tnvpuQdGT/DicIRW7MN8ozsXqBMgp0Hk= 4 | github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M= 5 | github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= 6 | github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 7 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 8 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 9 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 10 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 11 | github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= 12 | github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= 13 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 14 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 15 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 16 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 17 | github.com/unravelin/null/v5 v5.0.1 h1:FFAIq7N231O4CJreN7azzDPdtwIzJ3X+D4N/Gz3kHHE= 18 | github.com/unravelin/null/v5 v5.0.1/go.mod h1:W48ySiXKyk9D4taw9pUl3jYuUjsfWDXEDSu6CEBp1Cw= 19 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 20 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 21 | -------------------------------------------------------------------------------- /int.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | // Int64Codec is an avro codec for int64 10 | type IntCodec[T int64 | int32 | int16] struct{ omitEmpty bool } 11 | 12 | func (IntCodec[T]) Read(r *ReadBuf, p unsafe.Pointer) error { 13 | i, err := r.Varint() 14 | 15 | if i > int64(uint64(1)<<(unsafe.Sizeof(T(0))*8-1)-1) || 16 | i < -1<<(unsafe.Sizeof(T(0))*8-1) { 17 | return fmt.Errorf("value %d will not fit in %T", i, T(0)) 18 | } 19 | 20 | *(*T)(p) = T(i) 21 | return err 22 | } 23 | 24 | // Skip skips over an int 25 | func (IntCodec[T]) Skip(r *ReadBuf) error { 26 | _, err := r.Varint() 27 | return err 28 | } 29 | 30 | var ( 31 | int64Type = reflect.TypeFor[int64]() 32 | int32Type = reflect.TypeFor[int32]() 33 | int16Type = reflect.TypeFor[int16]() 34 | ) 35 | 36 | // New creates a pointer to a new int64 37 | func (IntCodec[T]) New(r *ReadBuf) unsafe.Pointer { 38 | switch unsafe.Sizeof(T(0)) { 39 | case 8: 40 | return r.Alloc(int64Type) 41 | case 4: 42 | return r.Alloc(int32Type) 43 | case 2: 44 | return r.Alloc(int16Type) 45 | } 46 | panic(fmt.Sprintf("unexpected int size %d", unsafe.Sizeof(T(0)))) 47 | } 48 | 49 | func (rc IntCodec[T]) Omit(p unsafe.Pointer) bool { 50 | return rc.omitEmpty && *(*T)(p) == 0 51 | } 52 | 53 | func (rc IntCodec[T]) Write(w *WriteBuf, p unsafe.Pointer) { 54 | w.Varint(int64(*(*T)(p))) 55 | } 56 | 57 | type ( 58 | Int64Codec = IntCodec[int64] 59 | Int32Codec = IntCodec[int32] 60 | Int16Codec = IntCodec[int16] 61 | ) 62 | -------------------------------------------------------------------------------- /int_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | "unsafe" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | ) 10 | 11 | func TestInt64Codec(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | data []byte 15 | exp int64 16 | }{ 17 | { 18 | name: "zero", 19 | data: []byte{0}, 20 | }, 21 | { 22 | name: "something", 23 | data: []byte{46}, 24 | exp: 23, 25 | }, 26 | { 27 | name: "-something", 28 | data: []byte{45}, 29 | exp: -23, 30 | }, 31 | { 32 | name: "max", 33 | data: []byte{254, 255, 255, 255, 255, 255, 255, 255, 255, 1}, 34 | exp: math.MaxInt64, 35 | }, 36 | { 37 | name: "min", 38 | data: []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 1}, 39 | exp: math.MinInt64, 40 | }, 41 | } 42 | var c Int64Codec 43 | for _, test := range tests { 44 | test := test 45 | t.Run(test.name, func(t *testing.T) { 46 | t.Parallel() 47 | r := NewReadBuf(test.data) 48 | var actual int64 49 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 50 | t.Fatal(err) 51 | } 52 | 53 | if diff := cmp.Diff(test.exp, actual); diff != "" { 54 | t.Fatalf("result not as expected. %s", diff) 55 | } 56 | if r.Len() != 0 { 57 | t.Fatalf("unread data %d", r.Len()) 58 | } 59 | }) 60 | t.Run(test.name+" skip", func(t *testing.T) { 61 | t.Parallel() 62 | r := NewReadBuf(test.data) 63 | if err := c.Skip(r); err != nil { 64 | t.Fatal(err) 65 | } 66 | if r.Len() != 0 { 67 | t.Fatalf("unread data %d", r.Len()) 68 | } 69 | }) 70 | } 71 | } 72 | 73 | func TestInt32Codec(t *testing.T) { 74 | tests := []struct { 75 | name string 76 | data []byte 77 | exp int32 78 | }{ 79 | { 80 | name: "zero", 81 | data: []byte{0}, 82 | }, 83 | { 84 | name: "something", 85 | data: []byte{46}, 86 | exp: 23, 87 | }, 88 | { 89 | name: "-something", 90 | data: []byte{45}, 91 | exp: -23, 92 | }, 93 | { 94 | name: "max", 95 | data: []byte{254, 255, 255, 255, 15}, 96 | exp: math.MaxInt32, 97 | }, 98 | { 99 | name: "min", 100 | data: []byte{255, 255, 255, 255, 15}, 101 | exp: math.MinInt32, 102 | }, 103 | } 104 | var c Int32Codec 105 | for _, test := range tests { 106 | test := test 107 | t.Run(test.name, func(t *testing.T) { 108 | t.Parallel() 109 | r := NewReadBuf(test.data) 110 | var actual int32 111 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 112 | t.Fatal(err) 113 | } 114 | 115 | if diff := cmp.Diff(test.exp, actual); diff != "" { 116 | t.Fatalf("result not as expected. %s", diff) 117 | } 118 | if r.Len() != 0 { 119 | t.Fatalf("unread data %d", r.Len()) 120 | } 121 | }) 122 | t.Run(test.name+" skip", func(t *testing.T) { 123 | t.Parallel() 124 | r := NewReadBuf(test.data) 125 | if err := c.Skip(r); err != nil { 126 | t.Fatal(err) 127 | } 128 | if r.Len() != 0 { 129 | t.Fatalf("unread data %d", r.Len()) 130 | } 131 | }) 132 | } 133 | } 134 | 135 | func TestInt16TooBig(t *testing.T) { 136 | var c Int16Codec 137 | r := NewReadBuf([]byte{128, 128, 4}) 138 | var actual int16 139 | err := c.Read(r, unsafe.Pointer(&actual)) 140 | if err == nil { 141 | t.Fatal("expected an error") 142 | } 143 | if s := err.Error(); s != "value 32768 will not fit in int16" { 144 | t.Fatalf("error not as expected: %q", s) 145 | } 146 | } 147 | 148 | func TestInt16Codec(t *testing.T) { 149 | tests := []struct { 150 | name string 151 | data []byte 152 | exp int16 153 | }{ 154 | { 155 | name: "zero", 156 | data: []byte{0}, 157 | }, 158 | { 159 | name: "something", 160 | data: []byte{46}, 161 | exp: 23, 162 | }, 163 | { 164 | name: "-something", 165 | data: []byte{45}, 166 | exp: -23, 167 | }, 168 | { 169 | name: "max", 170 | data: []byte{254, 255, 3}, 171 | exp: math.MaxInt16, 172 | }, 173 | { 174 | name: "min", 175 | data: []byte{255, 255, 3}, 176 | exp: math.MinInt16, 177 | }, 178 | } 179 | var c Int16Codec 180 | for _, test := range tests { 181 | test := test 182 | t.Run(test.name, func(t *testing.T) { 183 | t.Parallel() 184 | r := NewReadBuf(test.data) 185 | var actual int16 186 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 187 | t.Fatal(err) 188 | } 189 | 190 | if diff := cmp.Diff(test.exp, actual); diff != "" { 191 | t.Fatalf("result not as expected. %s", diff) 192 | } 193 | if r.Len() != 0 { 194 | t.Fatalf("unread data %d", r.Len()) 195 | } 196 | }) 197 | t.Run(test.name+" skip", func(t *testing.T) { 198 | t.Parallel() 199 | r := NewReadBuf(test.data) 200 | if err := c.Skip(r); err != nil { 201 | t.Fatal(err) 202 | } 203 | if r.Len() != 0 { 204 | t.Fatalf("unread data %d", r.Len()) 205 | } 206 | }) 207 | } 208 | } 209 | 210 | func TestInt64RoundTrip(t *testing.T) { 211 | tests := []struct { 212 | name string 213 | in int64 214 | }{ 215 | { 216 | name: "zero", 217 | in: 0, 218 | }, 219 | { 220 | name: "something", 221 | in: 23, 222 | }, 223 | { 224 | name: "-something", 225 | in: -23, 226 | }, 227 | { 228 | name: "max", 229 | in: math.MaxInt64, 230 | }, 231 | { 232 | name: "min", 233 | in: math.MinInt64, 234 | }, 235 | } 236 | var c Int64Codec 237 | for _, test := range tests { 238 | t.Run(test.name, func(t *testing.T) { 239 | buf := NewWriteBuf(nil) 240 | c.Write(buf, unsafe.Pointer(&test.in)) 241 | var actual int64 242 | if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil { 243 | t.Fatal(err) 244 | } 245 | if actual != test.in { 246 | t.Fatalf("%d does not match expected %d", actual, test.in) 247 | } 248 | }) 249 | } 250 | } 251 | 252 | func TestInt32RoundTrip(t *testing.T) { 253 | tests := []struct { 254 | name string 255 | in int32 256 | }{ 257 | { 258 | name: "zero", 259 | in: 0, 260 | }, 261 | { 262 | name: "something", 263 | in: 23, 264 | }, 265 | { 266 | name: "-something", 267 | in: -23, 268 | }, 269 | { 270 | name: "max", 271 | in: math.MaxInt32, 272 | }, 273 | { 274 | name: "min", 275 | in: math.MinInt32, 276 | }, 277 | } 278 | var c Int64Codec 279 | for _, test := range tests { 280 | t.Run(test.name, func(t *testing.T) { 281 | buf := NewWriteBuf(nil) 282 | c.Write(buf, unsafe.Pointer(&test.in)) 283 | var actual int32 284 | if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil { 285 | t.Fatal(err) 286 | } 287 | if actual != test.in { 288 | t.Fatalf("%d does not match expected %d", actual, test.in) 289 | } 290 | }) 291 | } 292 | } 293 | 294 | func TestInt16RoundTrip(t *testing.T) { 295 | tests := []struct { 296 | name string 297 | in int16 298 | }{ 299 | { 300 | name: "zero", 301 | in: 0, 302 | }, 303 | { 304 | name: "something", 305 | in: 23, 306 | }, 307 | { 308 | name: "-something", 309 | in: -23, 310 | }, 311 | { 312 | name: "max", 313 | in: math.MaxInt16, 314 | }, 315 | { 316 | name: "min", 317 | in: math.MinInt16, 318 | }, 319 | } 320 | var c Int64Codec 321 | for _, test := range tests { 322 | t.Run(test.name, func(t *testing.T) { 323 | buf := NewWriteBuf(nil) 324 | c.Write(buf, unsafe.Pointer(&test.in)) 325 | var actual int16 326 | if err := c.Read(NewReadBuf(buf.Bytes()), unsafe.Pointer(&actual)); err != nil { 327 | t.Fatal(err) 328 | } 329 | if actual != test.in { 330 | t.Fatalf("%d does not match expected %d", actual, test.in) 331 | } 332 | }) 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /interface.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import "unsafe" 4 | 5 | type eface struct { 6 | rtype unsafe.Pointer 7 | data unsafe.Pointer 8 | } 9 | 10 | func unpackEFace(obj interface{}) *eface { 11 | return (*eface)(unsafe.Pointer(&obj)) 12 | } 13 | -------------------------------------------------------------------------------- /map.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | // MapCodec is a decoder for map types. The key must always be string 10 | type MapCodec struct { 11 | valueCodec Codec 12 | rtype reflect.Type 13 | omitEmpty bool 14 | } 15 | 16 | func (m *MapCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 17 | // p is a pointer to a map pointer 18 | if *(*unsafe.Pointer)(p) == nil { 19 | *(*unsafe.Pointer)(p) = m.New(r) 20 | } 21 | mp := *(*unsafe.Pointer)(p) 22 | 23 | // Blocks are repeated until there's a zero count block 24 | for { 25 | count, err := r.Varint() 26 | if err != nil { 27 | return fmt.Errorf("failed to read count of map block. %w", err) 28 | } 29 | if count == 0 { 30 | break 31 | } 32 | 33 | if count < 0 { 34 | count = -count 35 | // Block size is more useful if we're skipping over the map 36 | if _, err := r.Varint(); err != nil { 37 | return fmt.Errorf("failed to read block size of map block. %w", err) 38 | } 39 | } 40 | 41 | var sc StringCodec 42 | for ; count > 0; count-- { 43 | var key string 44 | if err := sc.Read(r, unsafe.Pointer(&key)); err != nil { 45 | return fmt.Errorf("failed to read key for map. %w", err) 46 | } 47 | 48 | // TODO: can we just reuse one val? 49 | val := m.valueCodec.New(r) 50 | if err := m.valueCodec.Read(r, val); err != nil { 51 | return fmt.Errorf("failed to read value for map key %s. %w", key, err) 52 | } 53 | // Put the thing in the thing 54 | mapassign(unpackEFace(m.rtype).data, mp, unsafe.Pointer(&key), val) 55 | } 56 | } 57 | 58 | return nil 59 | } 60 | 61 | func (m *MapCodec) Skip(r *ReadBuf) error { 62 | for { 63 | count, err := r.Varint() 64 | if err != nil { 65 | return fmt.Errorf("failed to read count of map block. %w", err) 66 | } 67 | 68 | if count == 0 { 69 | break 70 | } 71 | 72 | if count < 0 { 73 | bs, err := r.Varint() 74 | if err != nil { 75 | return fmt.Errorf("failed to read block size of map block. %w", err) 76 | } 77 | if err := skip(r, bs); err != nil { 78 | return fmt.Errorf("failed skipping block of map. %w", err) 79 | } 80 | continue 81 | } 82 | 83 | var sc StringCodec 84 | for ; count > 0; count-- { 85 | if err := sc.Skip(r); err != nil { 86 | return fmt.Errorf("failed to skip key for map. %w", err) 87 | } 88 | 89 | if err := m.valueCodec.Skip(r); err != nil { 90 | return fmt.Errorf("failed to skip value for map. %w", err) 91 | } 92 | } 93 | } 94 | 95 | return nil 96 | } 97 | 98 | func (m *MapCodec) New(r *ReadBuf) unsafe.Pointer { 99 | return unsafe.Pointer(reflect.MakeMap(m.rtype).Pointer()) 100 | } 101 | 102 | func (m *MapCodec) Omit(p unsafe.Pointer) bool { 103 | return m.omitEmpty && maplen(p) == 0 104 | } 105 | 106 | func (m *MapCodec) Write(w *WriteBuf, p unsafe.Pointer) { 107 | // p is a pointer to a map pointer, but maps are already pointery 108 | p = *(*unsafe.Pointer)(p) 109 | 110 | // Start with the count. Note the same ability to use a negative count to 111 | // record a block size exists here too. 112 | l := maplen(p) 113 | w.Varint(int64(l)) 114 | if l == 0 { 115 | return 116 | } 117 | 118 | var iterM mapiter 119 | iter := (unsafe.Pointer)(&iterM) 120 | mapiterinit(unpackEFace(m.rtype).data, p, iter) 121 | 122 | var sc StringCodec 123 | 124 | for { 125 | k := mapiterkey(iter) 126 | if k == nil { 127 | break 128 | } 129 | v := mapiterelem(iter) 130 | 131 | sc.Write(w, k) 132 | m.valueCodec.Write(w, v) 133 | 134 | mapiternext(iter) 135 | } 136 | 137 | // like arrays, theoretically there can be multiple blocks so we need to write a zero count to say there's no more. 138 | w.Varint(0) 139 | } 140 | -------------------------------------------------------------------------------- /map_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | "unsafe" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | ) 10 | 11 | func TestMapCodec(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | data []byte 15 | exp map[string][]byte 16 | }{ 17 | { 18 | name: "1 simple block", 19 | data: []byte{ 20 | // block count 21 | 2, // meaning 1 22 | // no block size for positive count 23 | // key 24 | 6, 'f', 'o', 'o', 25 | // value 26 | 8, 1, 2, 3, 4, 27 | // zero block 28 | 0, 29 | }, 30 | exp: map[string][]byte{ 31 | "foo": {1, 2, 3, 4}, 32 | }, 33 | }, 34 | { 35 | name: "block with size", 36 | data: []byte{ 37 | // block count 38 | 1, 39 | 18, 40 | // key 41 | 6, 'f', 'o', 'o', 42 | // value 43 | 8, 1, 2, 3, 4, 44 | // zero block 45 | 0, 46 | }, 47 | exp: map[string][]byte{ 48 | "foo": {1, 2, 3, 4}, 49 | }, 50 | }, 51 | 52 | { 53 | name: "1 simple block, 2 vals", 54 | data: []byte{ 55 | // block count 56 | 4, // meaning 2 57 | // no block size for positive count 58 | // key 59 | 6, 'f', 'o', 'o', 60 | // value 61 | 8, 1, 2, 3, 4, 62 | // key 63 | 6, 'b', 'a', 'r', 64 | // value 65 | 8, 4, 3, 2, 1, 66 | // zero block 67 | 0, 68 | }, 69 | exp: map[string][]byte{ 70 | "foo": {1, 2, 3, 4}, 71 | "bar": {4, 3, 2, 1}, 72 | }, 73 | }, 74 | { 75 | name: "2 simple blocks", 76 | data: []byte{ 77 | // block count 78 | 2, // meaning 1 79 | // no block size for positive count 80 | // key 81 | 6, 'f', 'o', 'o', 82 | // value 83 | 8, 1, 2, 3, 4, 84 | // Next block 85 | 2, // meaning 1 86 | // no block size for positive count 87 | // key 88 | 6, 'b', 'a', 'r', 89 | // value 90 | 8, 4, 3, 2, 1, 91 | // zero block 92 | 0, 93 | }, 94 | exp: map[string][]byte{ 95 | "foo": {1, 2, 3, 4}, 96 | "bar": {4, 3, 2, 1}, 97 | }, 98 | }, 99 | } 100 | 101 | for _, test := range tests { 102 | t.Run(test.name, func(t *testing.T) { 103 | var m map[string][]byte 104 | typ := reflect.TypeOf(m) 105 | c := MapCodec{rtype: typ, valueCodec: BytesCodec{}} 106 | 107 | r := NewReadBuf(test.data) 108 | 109 | if err := c.Read(r, unsafe.Pointer(&m)); err != nil { 110 | t.Fatal(err) 111 | } 112 | 113 | if diff := cmp.Diff(test.exp, m); diff != "" { 114 | t.Fatalf("map not as expected. %s", diff) 115 | } 116 | 117 | if r.Len() != 0 { 118 | t.Fatalf("unread bytes. %d", r.Len()) 119 | } 120 | }) 121 | 122 | t.Run(test.name+" skip", func(t *testing.T) { 123 | c := MapCodec{valueCodec: BytesCodec{}} 124 | r := NewReadBuf(test.data) 125 | if err := c.Skip(r); err != nil { 126 | t.Fatal(err) 127 | } 128 | if r.Len() != 0 { 129 | t.Fatalf("unread bytes. %d", r.Len()) 130 | } 131 | }) 132 | 133 | t.Run(test.name+" roundtrip", func(t *testing.T) { 134 | typ := reflect.TypeOf(test.exp) 135 | c := MapCodec{rtype: typ, valueCodec: BytesCodec{}} 136 | w := NewWriteBuf(nil) 137 | 138 | c.Write(w, (unsafe.Pointer)(&test.exp)) 139 | var actual map[string][]byte 140 | r := NewReadBuf(w.Bytes()) 141 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 142 | t.Fatal(err) 143 | } 144 | if diff := cmp.Diff(test.exp, actual); diff != "" { 145 | t.Fatal(diff) 146 | } 147 | }) 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /null.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "unsafe" 5 | ) 6 | 7 | type nullCodec struct{} 8 | 9 | func (nullCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 10 | // TODO: could consider nil-ing the pointer 11 | return nil 12 | } 13 | 14 | func (nullCodec) Skip(r *ReadBuf) error { 15 | return nil 16 | } 17 | 18 | func (nullCodec) New(r *ReadBuf) unsafe.Pointer { 19 | return nil 20 | } 21 | 22 | func (rc nullCodec) Omit(p unsafe.Pointer) bool { 23 | return true 24 | } 25 | 26 | func (rc nullCodec) Write(w *WriteBuf, p unsafe.Pointer) { 27 | } 28 | -------------------------------------------------------------------------------- /null/null.go: -------------------------------------------------------------------------------- 1 | // Package null contains avro decoders for the types in github.com/unravelin/null. 2 | // Call RegisterCodecs to make these codecs available to avro 3 | package null 4 | 5 | import ( 6 | "fmt" 7 | "reflect" 8 | "unsafe" 9 | 10 | "github.com/philpearl/avro" 11 | avrotime "github.com/philpearl/avro/time" 12 | "github.com/unravelin/null" 13 | ) 14 | 15 | // RegisterCodecs registers the codecs from this package and makes them 16 | // available to avro. 17 | func RegisterCodecs() { 18 | avro.Register(reflect.TypeFor[null.Int](), buildNullIntCodec) 19 | avro.Register(reflect.TypeFor[null.Bool](), buildNullBoolCodec) 20 | avro.Register(reflect.TypeFor[null.Float](), buildNullFloatCodec) 21 | avro.Register(reflect.TypeFor[null.String](), buildNullStringCodec) 22 | avro.Register(reflect.TypeFor[null.Time](), buildNullTimeCodec) 23 | 24 | avro.RegisterSchema(reflect.TypeFor[null.Int](), nullableSchema(avro.Schema{Type: "long"})) 25 | avro.RegisterSchema(reflect.TypeFor[null.Bool](), nullableSchema(avro.Schema{Type: "boolean"})) 26 | avro.RegisterSchema(reflect.TypeFor[null.Float](), nullableSchema(avro.Schema{Type: "double"})) 27 | avro.RegisterSchema(reflect.TypeFor[null.String](), nullableSchema(avro.Schema{Type: "string"})) 28 | 29 | // This reflects the common use of null.Time within Ravelin, the owner of the null package. 30 | avro.RegisterSchema(reflect.TypeFor[null.Time](), nullableSchema(avro.Schema{Type: "string"})) 31 | } 32 | 33 | func nullableSchema(s avro.Schema) avro.Schema { 34 | return avro.Schema{ 35 | Type: "union", 36 | Union: []avro.Schema{ 37 | {Type: "null"}, 38 | s, 39 | }, 40 | } 41 | } 42 | 43 | func buildNullIntCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) { 44 | if schema.Type != "long" && schema.Type != "int" { 45 | return nil, fmt.Errorf("null.Int can only be used with long and int schema types") 46 | } 47 | return nullIntCodec{}, nil 48 | } 49 | 50 | type nullIntCodec struct { 51 | avro.Int64Codec 52 | } 53 | 54 | func (c nullIntCodec) Read(data *avro.ReadBuf, p unsafe.Pointer) error { 55 | ni := (*null.Int)(p) 56 | ni.Valid = true 57 | 58 | return c.Int64Codec.Read(data, unsafe.Pointer(&ni.Int64)) 59 | } 60 | 61 | var intType = reflect.TypeFor[null.Int]() 62 | 63 | func (c nullIntCodec) New(r *avro.ReadBuf) unsafe.Pointer { 64 | return r.Alloc(intType) 65 | } 66 | 67 | func (c nullIntCodec) Omit(p unsafe.Pointer) bool { 68 | ni := (*null.Int)(p) 69 | return !ni.Valid 70 | } 71 | 72 | func (c nullIntCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 73 | // I think we'll expect this codec to always be wrapped by a null union 74 | // codec, so checking for empty would be done elsewhere. 75 | ni := *(*null.Int)(p) 76 | c.Int64Codec.Write(w, unsafe.Pointer(&ni.Int64)) 77 | } 78 | 79 | func buildNullBoolCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) { 80 | if schema.Type != "boolean" { 81 | return nil, fmt.Errorf("null.Bool can only be used with boolean schema types") 82 | } 83 | return nullBoolCodec{}, nil 84 | } 85 | 86 | type nullBoolCodec struct { 87 | avro.BoolCodec 88 | } 89 | 90 | func (c nullBoolCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error { 91 | nb := (*null.Bool)(ptr) 92 | nb.Valid = true 93 | return c.BoolCodec.Read(data, unsafe.Pointer(&nb.Bool)) 94 | } 95 | 96 | var boolType = reflect.TypeFor[null.Bool]() 97 | 98 | func (c nullBoolCodec) New(r *avro.ReadBuf) unsafe.Pointer { 99 | return r.Alloc(boolType) 100 | } 101 | 102 | func (c nullBoolCodec) Omit(p unsafe.Pointer) bool { 103 | ni := (*null.Bool)(p) 104 | return !ni.Valid 105 | } 106 | 107 | func (c nullBoolCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 108 | // I think we'll expect this codec to always be wrapped by a null union 109 | // codec, so checking for empty would be done elsewhere. 110 | ni := *(*null.Bool)(p) 111 | c.BoolCodec.Write(w, unsafe.Pointer(&ni.Bool)) 112 | } 113 | 114 | func buildNullFloatCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) { 115 | if schema.Type == "double" { 116 | return nullDoubleCodec{}, nil 117 | } 118 | 119 | if schema.Type == "float" { 120 | return nullFloatCodec{}, nil 121 | } 122 | 123 | return nil, fmt.Errorf("null.Float can only be used with double & float schema types") 124 | } 125 | 126 | type nullDoubleCodec struct { 127 | avro.DoubleCodec 128 | } 129 | 130 | func (c nullDoubleCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error { 131 | nf := (*null.Float)(ptr) 132 | nf.Valid = true 133 | return c.DoubleCodec.Read(data, unsafe.Pointer(&nf.Float64)) 134 | } 135 | 136 | func (c nullDoubleCodec) Omit(p unsafe.Pointer) bool { 137 | ni := (*null.Float)(p) 138 | return !ni.Valid 139 | } 140 | 141 | var floatType = reflect.TypeFor[null.Float]() 142 | 143 | func (c nullDoubleCodec) New(r *avro.ReadBuf) unsafe.Pointer { 144 | return r.Alloc(floatType) 145 | } 146 | 147 | func (c nullDoubleCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 148 | // I think we'll expect this codec to always be wrapped by a null union 149 | // codec, so checking for empty would be done elsewhere. 150 | ni := *(*null.Float)(p) 151 | c.DoubleCodec.Write(w, unsafe.Pointer(&ni.Float64)) 152 | } 153 | 154 | type nullFloatCodec struct { 155 | avro.FloatCodec 156 | } 157 | 158 | func (c nullFloatCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error { 159 | var f float32 160 | if err := c.FloatCodec.Read(data, unsafe.Pointer(&f)); err != nil { 161 | return err 162 | } 163 | nf := (*null.Float)(ptr) 164 | nf.Valid = true 165 | nf.Float64 = float64(f) 166 | return nil 167 | } 168 | 169 | func (c nullFloatCodec) New(r *avro.ReadBuf) unsafe.Pointer { 170 | return r.Alloc(floatType) 171 | } 172 | 173 | func (c nullFloatCodec) Omit(p unsafe.Pointer) bool { 174 | ni := (*null.Float)(p) 175 | return !ni.Valid 176 | } 177 | 178 | func (c nullFloatCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 179 | // I think we'll expect this codec to always be wrapped by a null union 180 | // codec, so checking for empty would be done elsewhere. 181 | ni := *(*null.Float)(p) 182 | c.FloatCodec.Write(w, unsafe.Pointer(&ni.Float64)) 183 | } 184 | 185 | func buildNullStringCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) { 186 | if schema.Type != "string" { 187 | return nil, fmt.Errorf("null.String can only be used with string schema type, not %s", schema.Type) 188 | } 189 | return nullStringCodec{}, nil 190 | } 191 | 192 | type nullStringCodec struct { 193 | avro.StringCodec 194 | } 195 | 196 | func (c nullStringCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error { 197 | ns := (*null.String)(ptr) 198 | ns.Valid = true 199 | return c.StringCodec.Read(data, unsafe.Pointer(&ns.String)) 200 | } 201 | 202 | var stringType = reflect.TypeFor[null.String]() 203 | 204 | func (c nullStringCodec) New(r *avro.ReadBuf) unsafe.Pointer { 205 | return r.Alloc(stringType) 206 | } 207 | 208 | func (c nullStringCodec) Omit(p unsafe.Pointer) bool { 209 | ni := (*null.String)(p) 210 | return !ni.Valid 211 | } 212 | 213 | func (c nullStringCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 214 | // I think we'll expect this codec to always be wrapped by a null union 215 | // codec, so checking for empty would be done elsewhere. 216 | ni := *(*null.String)(p) 217 | c.StringCodec.Write(w, unsafe.Pointer(&ni.String)) 218 | } 219 | 220 | func buildNullTimeCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) { 221 | if schema.Type != "string" { 222 | return nil, fmt.Errorf("null.Time is only supported for string, not for %s", schema.Type) 223 | } 224 | return nullTimeCodec{}, nil 225 | } 226 | 227 | type nullTimeCodec struct { 228 | avrotime.StringCodec 229 | } 230 | 231 | func (c nullTimeCodec) Read(data *avro.ReadBuf, ptr unsafe.Pointer) error { 232 | nt := (*null.Time)(ptr) 233 | nt.Valid = true 234 | return c.StringCodec.Read(data, unsafe.Pointer(&nt.Time)) 235 | } 236 | 237 | var timeType = reflect.TypeFor[null.Time]() 238 | 239 | func (c nullTimeCodec) New(r *avro.ReadBuf) unsafe.Pointer { 240 | return r.Alloc(timeType) 241 | } 242 | 243 | func (c nullTimeCodec) Omit(p unsafe.Pointer) bool { 244 | ni := (*null.Time)(p) 245 | return !ni.Valid 246 | } 247 | 248 | func (c nullTimeCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 249 | // I think we'll expect this codec to always be wrapped by a null union 250 | // codec, so checking for empty would be done elsewhere. 251 | ni := *(*null.Time)(p) 252 | c.StringCodec.Write(w, unsafe.Pointer(&ni.Time)) 253 | } 254 | -------------------------------------------------------------------------------- /null/null_test.go: -------------------------------------------------------------------------------- 1 | package null 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "os" 7 | "testing" 8 | "time" 9 | "unsafe" 10 | 11 | "github.com/google/go-cmp/cmp" 12 | "github.com/philpearl/avro" 13 | "github.com/unravelin/null" 14 | ) 15 | 16 | func TestNullThings(t *testing.T) { 17 | RegisterCodecs() 18 | 19 | type mystruct struct { 20 | String null.String `json:"string,omitempty"` 21 | Int null.Int `json:"int,omitempty"` 22 | Bool null.Bool `json:"bool,omitempty"` 23 | Float null.Float `json:"float,omitempty"` 24 | } 25 | 26 | f, err := os.Open("./testdata/nullavro") 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | defer f.Close() 31 | 32 | var actual []mystruct 33 | var sbs []*avro.ResourceBank 34 | if err := avro.ReadFile(bufio.NewReader(f), mystruct{}, func(val unsafe.Pointer, sb *avro.ResourceBank) error { 35 | actual = append(actual, *(*mystruct)(val)) 36 | sbs = append(sbs, sb) 37 | return nil 38 | }); err != nil { 39 | t.Fatal(err) 40 | } 41 | 42 | exp := []mystruct{ 43 | { 44 | String: null.StringFrom("String"), 45 | Int: null.IntFrom(42), 46 | Bool: null.BoolFrom(false), 47 | Float: null.FloatFrom(13.37), 48 | }, 49 | {}, 50 | } 51 | 52 | if diff := cmp.Diff(exp, actual); diff != "" { 53 | t.Fatalf("result differs. %s", diff) 54 | } 55 | for _, sb := range sbs { 56 | sb.Close() 57 | } 58 | } 59 | 60 | func TestNullRoundTrip(t *testing.T) { 61 | RegisterCodecs() 62 | 63 | type mystruct struct { 64 | String null.String `json:"string,omitempty"` 65 | Int null.Int `json:"int,omitempty"` 66 | Bool null.Bool `json:"bool,omitempty"` 67 | Float null.Float `json:"float,omitempty"` 68 | Time null.Time `json:"time,omitempty"` 69 | } 70 | 71 | var buf bytes.Buffer 72 | 73 | enc, err := avro.NewEncoderFor[mystruct](&buf, avro.CompressionSnappy, 1024) 74 | if err != nil { 75 | t.Fatal(err) 76 | } 77 | 78 | if err := enc.Encode(&mystruct{ 79 | String: null.StringFrom("String"), 80 | Int: null.IntFrom(42), 81 | Bool: null.BoolFrom(true), 82 | Float: null.FloatFrom(13.37), 83 | Time: null.TimeFrom(time.Date(1970, 3, 15, 13, 37, 42, 0, time.UTC)), 84 | }); err != nil { 85 | t.Fatal(err) 86 | } 87 | 88 | if err := enc.Encode(&mystruct{}); err != nil { 89 | t.Fatal(err) 90 | } 91 | 92 | if err := enc.Encode(&mystruct{ 93 | String: null.StringFrom(""), 94 | Int: null.IntFrom(0), 95 | Bool: null.BoolFrom(false), 96 | Float: null.FloatFrom(0.0), 97 | Time: null.TimeFrom(time.Time{}), 98 | }); err != nil { 99 | t.Fatal(err) 100 | } 101 | 102 | if err := enc.Flush(); err != nil { 103 | t.Fatal(err) 104 | } 105 | 106 | var actual []mystruct 107 | var sbs []*avro.ResourceBank 108 | if err := avro.ReadFile(&buf, mystruct{}, func(val unsafe.Pointer, sb *avro.ResourceBank) error { 109 | actual = append(actual, *(*mystruct)(val)) 110 | sbs = append(sbs, sb) 111 | return nil 112 | }); err != nil { 113 | t.Fatal(err) 114 | } 115 | 116 | exp := []mystruct{ 117 | { 118 | String: null.StringFrom("String"), 119 | Int: null.IntFrom(42), 120 | Bool: null.BoolFrom(true), 121 | Float: null.FloatFrom(13.37), 122 | Time: null.TimeFrom(time.Date(1970, 3, 15, 13, 37, 42, 0, time.UTC)), 123 | }, 124 | {}, 125 | { 126 | String: null.StringFrom(""), 127 | Int: null.IntFrom(0), 128 | Bool: null.BoolFrom(false), 129 | Float: null.FloatFrom(0.0), 130 | Time: null.TimeFrom(time.Time{}), 131 | }, 132 | } 133 | 134 | if diff := cmp.Diff(exp, actual); diff != "" { 135 | t.Fatalf("result differs. %s", diff) 136 | } 137 | for _, sb := range sbs { 138 | sb.Close() 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /null/testdata/nullavro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philpearl/avro/bd3141c9da8fcf1cc4d374434d25f2a019fb2d68/null/testdata/nullavro -------------------------------------------------------------------------------- /pointer.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "unsafe" 6 | ) 7 | 8 | type PointerCodec struct { 9 | Codec 10 | } 11 | 12 | func (c *PointerCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 13 | pp := (*unsafe.Pointer)(p) 14 | if *pp == nil { 15 | *pp = c.Codec.New(r) 16 | } 17 | return c.Codec.Read(r, *pp) 18 | } 19 | 20 | var pointerType = reflect.TypeFor[unsafe.Pointer]() 21 | 22 | func (c *PointerCodec) New(r *ReadBuf) unsafe.Pointer { 23 | return r.Alloc(pointerType) 24 | } 25 | 26 | func (c *PointerCodec) Omit(p unsafe.Pointer) bool { 27 | return *(*unsafe.Pointer)(p) == nil 28 | } 29 | 30 | func (c *PointerCodec) Write(w *WriteBuf, p unsafe.Pointer) { 31 | // Note this codec will normally be wrapped by a union codec, so we don't 32 | // need to worry about writing the union selector. 33 | pp := *(*unsafe.Pointer)(p) 34 | if pp == nil { 35 | return 36 | } 37 | c.Codec.Write(w, pp) 38 | } 39 | -------------------------------------------------------------------------------- /pointer_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | ) 9 | 10 | func TestPointerCodec(t *testing.T) { 11 | type inStruct struct { 12 | A string 13 | B int 14 | } 15 | type myStruct struct { 16 | P *inStruct `json:",omitempty"` 17 | B int 18 | } 19 | 20 | s, err := SchemaForType(myStruct{}) 21 | if err != nil { 22 | t.Fatal(err) 23 | } 24 | 25 | if diff := cmp.Diff(Schema{ 26 | Type: "record", 27 | Object: &SchemaObject{ 28 | Name: "myStruct", 29 | Namespace: "github.com.philpearl.avro", 30 | Fields: []SchemaRecordField{ 31 | { 32 | Name: "P", 33 | Type: Schema{ 34 | Type: "union", 35 | Union: []Schema{ 36 | {Type: "null"}, 37 | { 38 | Type: "record", 39 | Object: &SchemaObject{ 40 | Name: "inStruct", 41 | Namespace: "github.com.philpearl.avro", 42 | Fields: []SchemaRecordField{ 43 | {Name: "A", Type: Schema{Type: "string"}}, 44 | {Name: "B", Type: Schema{Type: "long"}}, 45 | }, 46 | }, 47 | }, 48 | }, 49 | }, 50 | }, 51 | { 52 | Name: "B", 53 | Type: Schema{Type: "long"}, 54 | }, 55 | }, 56 | }, 57 | }, s); diff != "" { 58 | t.Fatal(diff) 59 | } 60 | 61 | c, err := s.Codec(myStruct{}) 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | 66 | w := NewWriteBuf(nil) 67 | c.Write(w, unsafe.Pointer(&myStruct{})) 68 | 69 | if diff := cmp.Diff([]byte{0x00, 0x00}, w.Bytes()); diff != "" { 70 | t.Fatal(diff) 71 | } 72 | 73 | var out myStruct 74 | if err := c.Read(NewReadBuf(w.Bytes()), unsafe.Pointer(&out)); err != nil { 75 | t.Fatal(err) 76 | } 77 | 78 | if diff := cmp.Diff(myStruct{}, out); diff != "" { 79 | t.Fatal(diff) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # avro 2 | 3 | [![GoDoc](https://godoc.org/github.com/philpearl/avro?status.svg)](https://godoc.org/github.com/philpearl/avro) 4 | 5 | avro is an encoder & decoder for Apache AVRO that decodes directly into Go structs and follows naming from JSON tags. It is intended primarily for decoding output from Google's Big Query. 6 | 7 | https://avro.apache.org/docs/1.8.1/spec.html 8 | -------------------------------------------------------------------------------- /record.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "reflect" 7 | "unsafe" 8 | ) 9 | 10 | type recordCodecField struct { 11 | // Codec for this field 12 | codec Codec 13 | // offset of this field within the struct representing the record. -1 if this 14 | // field is not in the struct and therefore should be skipped 15 | offset uintptr 16 | name string 17 | } 18 | 19 | type recordCodec struct { 20 | rtype reflect.Type 21 | fields []recordCodecField 22 | } 23 | 24 | func (rc *recordCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 25 | for i, f := range rc.fields { 26 | if f.offset == math.MaxUint64 { 27 | if err := f.codec.Skip(r); err != nil { 28 | return fmt.Errorf("failed to skip field %d %q of record. %w", i, f.name, err) 29 | } 30 | } else { 31 | if err := f.codec.Read(r, unsafe.Add(p, f.offset)); err != nil { 32 | return fmt.Errorf("failed reading field %d %q of record. %w", i, f.name, err) 33 | } 34 | } 35 | } 36 | return nil 37 | } 38 | 39 | func (rc *recordCodec) Skip(r *ReadBuf) error { 40 | for i, f := range rc.fields { 41 | if err := f.codec.Skip(r); err != nil { 42 | return fmt.Errorf("failed to skip field %d %q of record. %w", i, f.name, err) 43 | } 44 | } 45 | return nil 46 | } 47 | 48 | func (rc *recordCodec) New(r *ReadBuf) unsafe.Pointer { 49 | return r.Alloc(rc.rtype) 50 | } 51 | 52 | func (rc *recordCodec) Omit(p unsafe.Pointer) bool { 53 | return false 54 | } 55 | 56 | func (rc *recordCodec) Write(w *WriteBuf, p unsafe.Pointer) { 57 | for _, rf := range rc.fields { 58 | fp := unsafe.Add(p, rf.offset) 59 | rf.codec.Write(w, fp) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /record_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | "unsafe" 7 | 8 | "github.com/google/go-cmp/cmp" 9 | "github.com/google/go-cmp/cmp/cmpopts" 10 | ) 11 | 12 | func TestRecordCodec(t *testing.T) { 13 | type record struct { 14 | Name string `json:"name"` 15 | Hat string `json:"-"` 16 | } 17 | 18 | schema := Schema{ 19 | Type: "record", 20 | Object: &SchemaObject{ 21 | Name: "Record", 22 | Fields: []SchemaRecordField{ 23 | { 24 | Name: "name", 25 | Type: Schema{ 26 | Type: "string", 27 | }, 28 | }, 29 | { 30 | Name: "Hat", 31 | Type: Schema{ 32 | Type: "string", 33 | }, 34 | }, 35 | }, 36 | }, 37 | } 38 | 39 | data := []byte{ 40 | 6, 'j', 'i', 'm', 41 | 6, 'c', 'a', 't', 42 | } 43 | 44 | var r record 45 | c, err := buildRecordCodec(schema, reflect.TypeOf(r)) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | 50 | buf := NewReadBuf(data) 51 | if err := c.Read(buf, unsafe.Pointer(&r)); err != nil { 52 | t.Fatal(err) 53 | } 54 | 55 | if diff := cmp.Diff(record{Name: "jim"}, r); diff != "" { 56 | t.Fatalf("record differs. %s", diff) 57 | } 58 | 59 | if buf.Len() != 0 { 60 | t.Fatalf("unread data (%d)", buf.Len()) 61 | } 62 | 63 | // Now test skip 64 | buf.Reset(data) 65 | if err := c.Skip(buf); err != nil { 66 | t.Fatal(err) 67 | } 68 | if buf.Len() != 0 { 69 | t.Fatalf("unread data (%d)", buf.Len()) 70 | } 71 | } 72 | 73 | func TestRecordRoundTrip(t *testing.T) { 74 | type mustruct struct { 75 | Name string `json:"name"` 76 | Hat string `json:",omitempty"` 77 | V int 78 | Q float64 79 | Bytes []byte 80 | La []int `json:"la"` 81 | W int32 `json:"w,omitempty"` 82 | Z *int64 `json:"z"` 83 | Mmm map[string]string 84 | } 85 | 86 | var zval int64 = 1020202 87 | 88 | tests := []struct { 89 | name string 90 | data mustruct 91 | }{ 92 | { 93 | name: "basic", 94 | data: mustruct{ 95 | Name: "jim", 96 | Hat: "cat", 97 | V: 31, 98 | Q: 3.14, 99 | Bytes: []byte{1, 2, 3, 4}, 100 | La: []int{1, 2, 3, 4}, 101 | W: 0, 102 | Z: &zval, 103 | Mmm: map[string]string{"foo": "bar", "baz": "qux"}, 104 | }, 105 | }, 106 | { 107 | name: "empty", 108 | data: mustruct{}, 109 | }, 110 | } 111 | 112 | for _, test := range tests { 113 | t.Run(test.name, func(t *testing.T) { 114 | s, err := SchemaForType(&test.data) 115 | if err != nil { 116 | t.Fatal(err) 117 | } 118 | 119 | c, err := s.Codec(&test.data) 120 | if err != nil { 121 | t.Fatal(err) 122 | } 123 | 124 | buf := NewWriteBuf(nil) 125 | 126 | c.Write(buf, unsafe.Pointer(&test.data)) 127 | 128 | var actual mustruct 129 | r := NewReadBuf(buf.Bytes()) 130 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 131 | t.Fatal(err) 132 | } 133 | 134 | if diff := cmp.Diff(test.data, actual, cmpopts.EquateEmpty()); diff != "" { 135 | t.Fatalf("record differs. %s", diff) 136 | } 137 | }) 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /schema.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | 7 | "github.com/go-json-experiment/json" 8 | "github.com/go-json-experiment/json/jsontext" 9 | ) 10 | 11 | // Schema is a representation of AVRO schema JSON. Primitive types populate Type 12 | // only. UnionTypes populate Type and Union fields. All other types populate 13 | // Type and a subset of Object fields. 14 | type Schema struct { 15 | Type string 16 | Object *SchemaObject 17 | Union []Schema 18 | } 19 | 20 | // Codec creates a codec for the given schema and output type 21 | func (s Schema) Codec(out any) (Codec, error) { 22 | typ := reflect.TypeOf(out) 23 | if typ.Kind() == reflect.Ptr { 24 | typ = typ.Elem() 25 | } 26 | if typ.Kind() != reflect.Struct { 27 | return nil, fmt.Errorf("out must be a struct or pointer to a struct") 28 | } 29 | 30 | return buildCodec(s, typ, false) 31 | } 32 | 33 | func (s *Schema) Marshal() ([]byte, error) { 34 | return json.Marshal(s) 35 | } 36 | 37 | // SchemaFromString decodes a JSON string into a Schema 38 | func SchemaFromString(in string) (Schema, error) { 39 | var schema Schema 40 | if err := json.Unmarshal([]byte(in), &schema); err != nil { 41 | return schema, fmt.Errorf("could not decode schema JSON. %w", err) 42 | } 43 | return schema, nil 44 | } 45 | 46 | // SchemaObject contains all the fields of more complex schema types 47 | type SchemaObject struct { 48 | Type string `json:"type"` 49 | LogicalType string `json:"logicalType,omitempty"` 50 | Name string `json:"name,omitempty"` 51 | Namespace string `json:"namespace,omitempty"` 52 | // Fields in a record 53 | Fields []SchemaRecordField `json:"fields,omitempty"` 54 | // The type of each item in an array 55 | Items Schema `json:"items,omitempty"` 56 | // The value types of a map (keys are strings) 57 | Values Schema `json:"values,omitempty"` 58 | // The size of a fixed type 59 | Size int `json:"size,omitempty"` 60 | // The values of an enum 61 | Symbols []string `json:"symbols,omitempty"` 62 | } 63 | 64 | // SchemaRecordField represents one field of a Record schema 65 | type SchemaRecordField struct { 66 | Name string `json:"name,omitempty"` 67 | Type Schema `json:"type,omitempty"` 68 | } 69 | 70 | func (s *Schema) UnmarshalJSONFrom(dec *jsontext.Decoder) error { 71 | switch dec.PeekKind() { 72 | case '"': 73 | token, err := dec.ReadToken() 74 | if err != nil { 75 | return fmt.Errorf("reading string: %w", err) 76 | } 77 | s.Type = token.String() 78 | case '[': 79 | // This is an array of Schemas 80 | s.Type = "union" 81 | if err := json.UnmarshalDecode(dec, &s.Union); err != nil { 82 | return fmt.Errorf("decoding union: %w", err) 83 | } 84 | case '{': 85 | s.Object = &SchemaObject{} 86 | // do we need to isolate these decoders? 87 | if err := json.UnmarshalDecode(dec, s.Object); err != nil { 88 | return fmt.Errorf("decoding union: %w", err) 89 | } 90 | 91 | s.Type = s.Object.Type 92 | s.Object.Type = "" 93 | 94 | default: 95 | return fmt.Errorf("unexpected token unmarshalling schema: %s", dec.PeekKind()) 96 | } 97 | return nil 98 | } 99 | 100 | func (s *Schema) MarshalJSONTo(enc *jsontext.Encoder) error { 101 | switch { 102 | case s.Object != nil: 103 | if err := enc.WriteToken(jsontext.BeginObject); err != nil { 104 | return fmt.Errorf("writing begin object: %w", err) 105 | } 106 | if err := enc.WriteToken(jsontext.String("type")); err != nil { 107 | return fmt.Errorf("writing type key: %w", err) 108 | } 109 | if err := enc.WriteToken(jsontext.String(s.Type)); err != nil { 110 | return fmt.Errorf("writing type value: %w", err) 111 | } 112 | if s.Object.LogicalType != "" { 113 | if err := enc.WriteToken(jsontext.String("logicalType")); err != nil { 114 | return fmt.Errorf("writing logicalType key: %w", err) 115 | } 116 | if err := enc.WriteToken(jsontext.String(s.Object.LogicalType)); err != nil { 117 | return fmt.Errorf("writing logicalType value: %w", err) 118 | } 119 | } 120 | if s.Object.Name != "" { 121 | if err := enc.WriteToken(jsontext.String("name")); err != nil { 122 | return fmt.Errorf("writing name key: %w", err) 123 | } 124 | if err := enc.WriteToken(jsontext.String(s.Object.Name)); err != nil { 125 | return fmt.Errorf("writing name value: %w", err) 126 | } 127 | } 128 | if s.Object.Namespace != "" { 129 | if err := enc.WriteToken(jsontext.String("namespace")); err != nil { 130 | return fmt.Errorf("writing namespace key: %w", err) 131 | } 132 | if err := enc.WriteToken(jsontext.String(s.Object.Namespace)); err != nil { 133 | return fmt.Errorf("writing namespace value: %w", err) 134 | } 135 | } 136 | switch s.Type { 137 | case "record": 138 | if err := enc.WriteToken(jsontext.String("fields")); err != nil { 139 | return fmt.Errorf("writing fields key: %w", err) 140 | } 141 | if err := json.MarshalEncode(enc, s.Object.Fields); err != nil { 142 | return fmt.Errorf("encoding record fields: %w", err) 143 | } 144 | case "enum": 145 | if err := enc.WriteToken(jsontext.String("symbols")); err != nil { 146 | return fmt.Errorf("writing symbols key: %w", err) 147 | } 148 | if err := json.MarshalEncode(enc, s.Object.Symbols); err != nil { 149 | return fmt.Errorf("encoding enum symbols: %w", err) 150 | } 151 | case "array": 152 | if err := enc.WriteToken(jsontext.String("items")); err != nil { 153 | return fmt.Errorf("writing items key: %w", err) 154 | } 155 | if err := json.MarshalEncode(enc, s.Object.Items); err != nil { 156 | return fmt.Errorf("encoding items: %w", err) 157 | } 158 | case "map": 159 | if err := enc.WriteToken(jsontext.String("values")); err != nil { 160 | return fmt.Errorf("writing values key: %w", err) 161 | } 162 | if err := json.MarshalEncode(enc, s.Object.Values); err != nil { 163 | return fmt.Errorf("encoding values: %w", err) 164 | } 165 | case "fixed": 166 | if err := enc.WriteToken(jsontext.String("size")); err != nil { 167 | return fmt.Errorf("writing size key: %w", err) 168 | } 169 | if err := enc.WriteToken(jsontext.Int(int64(s.Object.Size))); err != nil { 170 | return fmt.Errorf("writing size value: %w", err) 171 | } 172 | } 173 | if err := enc.WriteToken(jsontext.EndObject); err != nil { 174 | return fmt.Errorf("writing end object: %w", err) 175 | } 176 | 177 | case len(s.Union) != 0: 178 | if err := json.MarshalEncode(enc, s.Union); err != nil { 179 | return fmt.Errorf("encoding union: %w", err) 180 | } 181 | default: 182 | enc.WriteToken(jsontext.String(s.Type)) 183 | } 184 | return nil 185 | } 186 | -------------------------------------------------------------------------------- /schema_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-json-experiment/json" 7 | "github.com/google/go-cmp/cmp" 8 | ) 9 | 10 | func TestSchemaEncoding(t *testing.T) { 11 | data, err := avroFileSchema.Marshal() 12 | if err != nil { 13 | t.Fatal(err) 14 | } 15 | 16 | if diff := cmp.Diff(`{"type":"record","name":"org.apache.avro.file.Header","fields":[{"name":"magic","type":{"type":"fixed","name":"Magic","size":4}},{"name":"meta","type":{"type":"map","values":"bytes"}},{"name":"sync","type":{"type":"fixed","name":"Sync","size":16}}]}`, string(data)); diff != "" { 17 | t.Fatalf("results differ. %s", diff) 18 | } 19 | 20 | var out Schema 21 | if err := json.Unmarshal(data, &out); err != nil { 22 | t.Fatal(err) 23 | } 24 | if diff := cmp.Diff(avroFileSchema, out); diff != "" { 25 | t.Fatalf("results differ. %s", diff) 26 | } 27 | 28 | out2, err := SchemaFromString(string(data)) 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | if diff := cmp.Diff(avroFileSchema, out2); diff != "" { 33 | t.Fatalf("results differ. %s", diff) 34 | } 35 | } 36 | 37 | func TestUnmarshal(t *testing.T) { 38 | // This tests we can unmarshal loads of different schemas correctly. 39 | tests := []struct { 40 | schema string 41 | want Schema 42 | }{ 43 | { 44 | schema: `{"type":"record","name":"test","fields":[{"name":"a","type":"int"}]}`, 45 | want: Schema{ 46 | Type: "record", 47 | Object: &SchemaObject{ 48 | Name: "test", 49 | Fields: []SchemaRecordField{ 50 | { 51 | Name: "a", 52 | Type: Schema{ 53 | Type: "int", 54 | }, 55 | }, 56 | }, 57 | }, 58 | }, 59 | }, 60 | { 61 | schema: `{"type":"enum","name":"test","symbols":["a","b"]}`, 62 | want: Schema{ 63 | Type: "enum", 64 | Object: &SchemaObject{ 65 | Name: "test", 66 | Symbols: []string{"a", "b"}, 67 | }, 68 | }, 69 | }, 70 | { 71 | schema: `{"type":"fixed","name":"test","size":4}`, 72 | want: Schema{ 73 | Type: "fixed", 74 | Object: &SchemaObject{ 75 | Name: "test", 76 | Size: 4, 77 | }, 78 | }, 79 | }, 80 | { 81 | schema: `{"type":"array","items":"int"}`, 82 | want: Schema{ 83 | Type: "array", 84 | Object: &SchemaObject{ 85 | Items: Schema{ 86 | Type: "int", 87 | }, 88 | }, 89 | }, 90 | }, 91 | { 92 | schema: `{"type":"map","values":"int"}`, 93 | want: Schema{ 94 | Type: "map", 95 | Object: &SchemaObject{ 96 | Values: Schema{ 97 | Type: "int", 98 | }, 99 | }, 100 | }, 101 | }, 102 | { 103 | schema: `"null"`, 104 | want: Schema{ 105 | Type: "null", 106 | }, 107 | }, 108 | { 109 | schema: `"boolean"`, 110 | want: Schema{ 111 | Type: "boolean", 112 | }, 113 | }, 114 | { 115 | schema: `"int"`, 116 | want: Schema{ 117 | Type: "int", 118 | }, 119 | }, 120 | { 121 | schema: `"long"`, 122 | want: Schema{ 123 | Type: "long", 124 | }, 125 | }, 126 | { 127 | schema: `"float"`, 128 | want: Schema{ 129 | Type: "float", 130 | }, 131 | }, 132 | { 133 | schema: `"double"`, 134 | want: Schema{ 135 | Type: "double", 136 | }, 137 | }, 138 | { 139 | schema: `"bytes"`, 140 | want: Schema{ 141 | Type: "bytes", 142 | }, 143 | }, 144 | { 145 | schema: `"string"`, 146 | want: Schema{ 147 | Type: "string", 148 | }, 149 | }, 150 | 151 | { 152 | schema: `["null","int"]`, 153 | want: Schema{ 154 | Type: "union", 155 | Union: []Schema{ 156 | { 157 | Type: "null", 158 | }, 159 | { 160 | Type: "int", 161 | }, 162 | }, 163 | }, 164 | }, 165 | } 166 | 167 | for _, test := range tests { 168 | var got Schema 169 | if err := json.Unmarshal([]byte(test.schema), &got); err != nil { 170 | t.Fatalf("failed to unmarshal %s. %v", test.schema, err) 171 | } 172 | if diff := cmp.Diff(test.want, got); diff != "" { 173 | t.Fatalf("results differ. %s", diff) 174 | } 175 | 176 | data, err := json.Marshal(&test.want) 177 | if err != nil { 178 | t.Fatalf("failed to marshal. %v", err) 179 | } 180 | if string(data) != test.schema { 181 | t.Fatalf("expected %s got %s", test.schema, string(data)) 182 | } 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /string.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | // StringCodec is a decoder for strings 10 | type StringCodec struct{ omitEmpty bool } 11 | 12 | func (StringCodec) Read(r *ReadBuf, ptr unsafe.Pointer) error { 13 | // ptr is a *string 14 | l, err := r.Varint() 15 | if err != nil { 16 | return fmt.Errorf("failed to read length of string. %w", err) 17 | } 18 | if l < 0 { 19 | return fmt.Errorf("cannot make string with length %d", l) 20 | } 21 | data, err := r.NextAsString(int(l)) 22 | if err != nil { 23 | return fmt.Errorf("failed to read %d bytes of string body. %w", l, err) 24 | } 25 | *(*string)(ptr) = data 26 | return nil 27 | } 28 | 29 | func (StringCodec) Skip(r *ReadBuf) error { 30 | l, err := r.Varint() 31 | if err != nil { 32 | return fmt.Errorf("failed to read length of string. %w", err) 33 | } 34 | return skip(r, l) 35 | } 36 | 37 | var stringType = reflect.TypeFor[string]() 38 | 39 | func (StringCodec) New(r *ReadBuf) unsafe.Pointer { 40 | return r.Alloc(stringType) 41 | } 42 | 43 | func (sc StringCodec) Omit(p unsafe.Pointer) bool { 44 | return sc.omitEmpty && len(*(*string)(p)) == 0 45 | } 46 | 47 | func (StringCodec) Write(w *WriteBuf, p unsafe.Pointer) { 48 | s := *(*string)(p) 49 | w.Varint(int64(len(s))) 50 | w.Write([]byte(s)) 51 | } 52 | -------------------------------------------------------------------------------- /string_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | ) 7 | 8 | func TestStringCodec(t *testing.T) { 9 | tests := []struct { 10 | name string 11 | data []byte 12 | exp string 13 | }{ 14 | { 15 | name: "empty", 16 | data: []byte{0}, 17 | exp: "", 18 | }, 19 | { 20 | name: "hello", 21 | 22 | data: []byte{10, 'h', 'e', 'l', 'l', 'o'}, 23 | exp: "hello", 24 | }, 25 | } 26 | c := StringCodec{} 27 | for _, test := range tests { 28 | t.Run(test.name, func(t *testing.T) { 29 | r := NewReadBuf(test.data) 30 | var actual string 31 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 32 | t.Fatal(err) 33 | } 34 | if test.exp != actual { 35 | t.Fatalf("%q does not match expected %q", actual, test.exp) 36 | } 37 | if r.Len() != 0 { 38 | t.Fatalf("%d bytes left", r.Len()) 39 | } 40 | }) 41 | 42 | t.Run(test.name+" skip", func(t *testing.T) { 43 | r := NewReadBuf(test.data) 44 | 45 | if err := c.Skip(r); err != nil { 46 | t.Fatal(err) 47 | } 48 | if r.Len() != 0 { 49 | t.Fatalf("%d bytes left", r.Len()) 50 | } 51 | }) 52 | 53 | } 54 | } 55 | 56 | func TestStringRoundTrip(t *testing.T) { 57 | tests := []struct { 58 | name string 59 | in string 60 | }{ 61 | { 62 | name: "empty", 63 | in: "", 64 | }, 65 | { 66 | name: "hello", 67 | in: "hello", 68 | }, 69 | { 70 | name: "unicode", 71 | in: "こんにちは", 72 | }, 73 | 74 | { 75 | name: "emoji", 76 | in: "👋", 77 | }, 78 | } 79 | 80 | c := StringCodec{} 81 | for _, test := range tests { 82 | t.Run(test.name, func(t *testing.T) { 83 | w := NewWriteBuf(nil) 84 | c.Write(w, unsafe.Pointer(&test.in)) 85 | var actual string 86 | r := NewReadBuf(w.Bytes()) 87 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 88 | t.Fatal(err) 89 | } 90 | if test.in != actual { 91 | t.Fatalf("%q does not match expected %q", actual, test.in) 92 | } 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /testdata/avro1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philpearl/avro/bd3141c9da8fcf1cc4d374434d25f2a019fb2d68/testdata/avro1 -------------------------------------------------------------------------------- /time/parse.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // parseTime parses an RFC3339 timestamp. It exists because custom parsing of 11 | // this particular timezone is faster than using time.Parse, and parsing string 12 | // timestamps comes up rather more often than is ideal 13 | func parseTime(in string) (time.Time, error) { 14 | if len(in) < 10 { 15 | return time.Time{}, fmt.Errorf("expect time string to be at least 10 characters long %q", in) 16 | } 17 | if in[4] != '-' || in[7] != '-' { 18 | return time.Time{}, fmt.Errorf("date not formatted as expected, missing -") 19 | } 20 | 21 | // "2006-01-02T15:04:05Z07:00" 22 | y, err := atoi4(in[:4]) 23 | if err != nil { 24 | return time.Time{}, fmt.Errorf("could not parse year %q: %w", in[:4], err) 25 | } 26 | m, err := atoi2(in[5:7]) 27 | if err != nil { 28 | return time.Time{}, fmt.Errorf("could not parse month %q: %w", in[5:7], err) 29 | } 30 | d, err := atoi2(in[8:10]) 31 | if err != nil { 32 | return time.Time{}, fmt.Errorf("could not parse day %q: %w", in[8:10], err) 33 | } 34 | 35 | if len(in) == 10 { 36 | return time.Date(y, time.Month(m), d, 0, 0, 0, 0, time.UTC), nil 37 | } 38 | 39 | if len(in) < 20 { 40 | return time.Time{}, fmt.Errorf("expect time string to be at least 20 characters long if greater than 10 characters %q", in) 41 | } 42 | 43 | if in[10] != 'T' { 44 | return time.Time{}, fmt.Errorf("time not formatted as expected, missing T") 45 | } 46 | 47 | if in[13] != ':' || in[16] != ':' { 48 | return time.Time{}, fmt.Errorf("time not formatted as expected, missing ':': %q", in) 49 | } 50 | 51 | h, err := atoi2(in[11:13]) 52 | if err != nil { 53 | return time.Time{}, fmt.Errorf("could not parse hour %q: %w", in[11:13], err) 54 | } 55 | min, err := atoi2(in[14:16]) 56 | if err != nil { 57 | return time.Time{}, fmt.Errorf("could not parse minute %q: %w", in[14:16], err) 58 | } 59 | s, err := atoi2(in[17:19]) 60 | if err != nil { 61 | return time.Time{}, fmt.Errorf("could not parse seconds %q: %w", in[17:19], err) 62 | } 63 | 64 | remaining := in[19:] 65 | c := remaining[0] 66 | 67 | var nsec int 68 | if c == '.' || c == ',' { 69 | remaining = remaining[1:] 70 | // Fractional seconds! 71 | var val, i int 72 | var c rune 73 | var mult int = 1e9 74 | for i, c = range remaining { 75 | if c >= '0' && c <= '9' { 76 | val = val*10 + int(c-'0') 77 | mult /= 10 78 | } else { 79 | i -= 1 80 | break 81 | } 82 | } 83 | nsec = val * mult 84 | remaining = remaining[i+1:] 85 | if len(remaining) == 0 { 86 | return time.Time{}, fmt.Errorf("too short to contain timezone") 87 | } 88 | } 89 | 90 | c = remaining[0] 91 | remaining = remaining[1:] 92 | var tz *time.Location 93 | if c == 'Z' { 94 | tz = time.UTC 95 | } else { 96 | var sign int 97 | switch c { 98 | case '+': 99 | sign = 1 100 | case '-': 101 | sign = -1 102 | default: 103 | return time.Time{}, fmt.Errorf("TZ must start with + or -, not %c", c) 104 | } 105 | if len(remaining) < 5 { 106 | return time.Time{}, fmt.Errorf("TZ info wrong length") 107 | } 108 | if remaining[2] != ':' { 109 | return time.Time{}, fmt.Errorf("TZ info does not include ':'") 110 | } 111 | tzh, err := atoi2(remaining[:2]) 112 | if err != nil { 113 | return time.Time{}, fmt.Errorf("could not parse timezone offset hours %q: %w", remaining[:2], err) 114 | } 115 | tzm, err := atoi2(remaining[3:5]) 116 | if err != nil { 117 | return time.Time{}, fmt.Errorf("could not parse timezone offset minutes %q: %w", remaining[3:5], err) 118 | } 119 | 120 | tz = getTimezone(sign * (tzh*60*60 + tzm*60)) 121 | 122 | remaining = remaining[5:] 123 | } 124 | 125 | if len(remaining) != 0 { 126 | return time.Time{}, fmt.Errorf("unparsed data remains after parsing complete (%q)", remaining) 127 | } 128 | 129 | return time.Date(y, time.Month(m), d, h, min, s, nsec, tz), nil 130 | } 131 | 132 | var ( 133 | tzLock sync.Mutex 134 | tzMap = make(map[int]*time.Location) 135 | ) 136 | 137 | func getTimezone(offset int) *time.Location { 138 | tzLock.Lock() 139 | defer tzLock.Unlock() 140 | tz, ok := tzMap[offset] 141 | if !ok { 142 | tz = time.FixedZone("", offset) 143 | tzMap[offset] = tz 144 | } 145 | return tz 146 | } 147 | 148 | var errCannotParseNumber = errors.New("couldn't parse number") 149 | 150 | func atoi2(in string) (int, error) { 151 | _ = in[1] 152 | a, b := int(in[0]-'0'), int(in[1]-'0') 153 | if a < 0 || a > 9 || b < 0 || b > 9 { 154 | return 0, errCannotParseNumber 155 | } 156 | return a*10 + b, nil 157 | } 158 | 159 | func atoi4(in string) (int, error) { 160 | _ = in[3] 161 | a, b, c, d := int(in[0]-'0'), int(in[1]-'0'), int(in[2]-'0'), int(in[3]-'0') 162 | if a < 0 || a > 9 || b < 0 || b > 9 || c < 0 || c > 9 || d < 0 || d > 9 { 163 | return 0, errCannotParseNumber 164 | } 165 | return a*1000 + b*100 + c*10 + d, nil 166 | } 167 | -------------------------------------------------------------------------------- /time/parse_test.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func TestParse(t *testing.T) { 9 | tests := []string{ 10 | "2006-01-02T13:37:42Z", 11 | "2006-01-02T13:37:42,326Z", 12 | "2006-01-02T13:37:42.0Z", 13 | "2006-01-02T13:37:42.0000Z", 14 | "2006-01-02T13:37:42,326876Z", 15 | "2006-01-02T13:37:42,326876123Z", 16 | "2006-01-02T13:37:42.326876123Z", 17 | "2006-01-02T13:37:42,326+08:00", 18 | "2006-01-02T13:37:42.326-08:00", 19 | "2006-01-02T13:37:42.326-08:21", 20 | "2006-01-02T13:37:42.326+08:21", 21 | "2021-09-30T08:28:33.137578Z", 22 | } 23 | 24 | for _, test := range tests { 25 | t.Run(test, func(t *testing.T) { 26 | exp, err := time.Parse(time.RFC3339, test) 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | 31 | actual, err := parseTime(test) 32 | if err != nil { 33 | t.Fatal(err) 34 | } 35 | 36 | if !exp.Equal(actual) { 37 | t.Fatalf("parsed time incorrect. Got %s for %s", actual, test) 38 | } 39 | }) 40 | } 41 | } 42 | 43 | func TestParseDate(t *testing.T) { 44 | tests := []string{ 45 | "2021-09-30", 46 | "1970-01-01", 47 | } 48 | 49 | for _, test := range tests { 50 | t.Run(test, func(t *testing.T) { 51 | exp, err := time.Parse(time.DateOnly, test) 52 | if err != nil { 53 | t.Fatal(err) 54 | } 55 | 56 | actual, err := parseTime(test) 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | 61 | if !exp.Equal(actual) { 62 | t.Fatalf("parsed time incorrect. Got %s for %s", actual, test) 63 | } 64 | }) 65 | } 66 | } 67 | 68 | func TestParseFails(t *testing.T) { 69 | tests := []string{ 70 | "", 71 | "2006", 72 | "2006-01-02T13:37:42", 73 | "2006-01-02T13:37:42,326", 74 | // "2006-01-02T13:37:42.Z", 75 | "2006:01-02T13:37:42Z", 76 | "2006-01:02T13:37:42Z", 77 | "2006-01-02 13:37:42Z", 78 | "2006-01-02T13-37:42Z", 79 | "2006-01-02T13:37-42Z", 80 | "200a-01-02T13:37:42Z", 81 | "2006-0b-02T13:37:42Z", 82 | "2006-01-0cT13:37:42Z", 83 | "2006-01-02T1d:37:42Z", 84 | "2006-01-02T13:3e:42Z", 85 | "2006-01-02T13:37:4fZ", 86 | "2006-01-02T13:37:42.727", 87 | "2006-01-02T13:37:42x08:00", 88 | "2006-01-02T13:37:42+08x00", 89 | "2006-01-02T13:37:42+0a:00", 90 | "2006-01-02T13:37:42+08:0a", 91 | "2006-01-02T13:37:42+08:0", 92 | "2006-01-02T13:37:42+08:00hello", 93 | "2006-01-02§13:37:42+08:00", 94 | } 95 | 96 | for _, test := range tests { 97 | t.Run(test, func(t *testing.T) { 98 | if _, err := time.Parse(time.RFC3339, test); err == nil { 99 | t.Errorf("%q parsed OK for the standard library", test) 100 | } 101 | 102 | if _, err := parseTime(test); err == nil { 103 | t.Errorf("%q parsed OK", test) 104 | } 105 | }) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /time/time.go: -------------------------------------------------------------------------------- 1 | // Package time contains avro decoders for time.Time. 2 | package time 3 | 4 | import ( 5 | "fmt" 6 | "reflect" 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/philpearl/avro" 11 | ) 12 | 13 | // RegisterCodecs makes the codecs in this package available to avro 14 | func RegisterCodecs() { 15 | avro.Register(reflect.TypeFor[time.Time](), buildTimeCodec) 16 | avro.RegisterSchema(reflect.TypeFor[time.Time](), avro.Schema{ 17 | Type: "union", 18 | Union: []avro.Schema{ 19 | {Type: "null"}, 20 | {Type: "string"}, 21 | }, 22 | }) 23 | } 24 | 25 | func buildTimeCodec(schema avro.Schema, typ reflect.Type, omit bool) (avro.Codec, error) { 26 | // If in future we want to decode an integer unix epoc time we can add a 27 | // switch here 28 | switch schema.Type { 29 | case "string": 30 | return StringCodec{}, nil 31 | case "long": 32 | var c LongCodec 33 | c.mult = 1 34 | if schema.Object != nil { 35 | switch schema.Object.LogicalType { 36 | case "timestamp-micros": 37 | c.mult = 1000 38 | case "timestamp-millis": 39 | c.mult = 1e6 40 | } 41 | } 42 | return c, nil 43 | case "int": 44 | if schema.Object != nil { 45 | switch schema.Object.LogicalType { 46 | // BigQuery claims to use this for it's DATE type but doesn't. We've 47 | // seen DATEs as strings with no logical type. Format is 2006-01-02 48 | case "date": 49 | return DateCodec{}, nil 50 | } 51 | } 52 | } 53 | 54 | return nil, fmt.Errorf("time.Time codec works only with string and long schema, not %q", schema.Type) 55 | } 56 | 57 | // DateCodec is a decoder from an AVRO date logical type, which is a number of 58 | // days since 1 Jan 1970 59 | type DateCodec struct{ avro.Int32Codec } 60 | 61 | func (c DateCodec) Read(r *avro.ReadBuf, p unsafe.Pointer) error { 62 | var l int64 63 | if err := c.Int32Codec.Read(r, unsafe.Pointer(&l)); err != nil { 64 | return err 65 | } 66 | 67 | *(*time.Time)(p) = time.Date(1970, 1, 1+int(l), 0, 0, 0, 0, time.UTC) 68 | return nil 69 | } 70 | 71 | // New create a pointer to a new time.Time 72 | func (c DateCodec) New(r *avro.ReadBuf) unsafe.Pointer { 73 | return r.Alloc(timeType) 74 | } 75 | 76 | func (c DateCodec) Omit(p unsafe.Pointer) bool { 77 | t := (*time.Time)(p) 78 | return t.IsZero() 79 | } 80 | 81 | func (c DateCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 82 | t := *(*time.Time)(p) 83 | // TODO: wrangle this into Time.AppendFormat? 84 | day := int32(t.Unix() / (60 * 60 * 24)) 85 | 86 | c.Int32Codec.Write(w, unsafe.Pointer(&day)) 87 | } 88 | 89 | // StringCodec is a decoder from an AVRO string with RFC3339 encoding to a time.Time 90 | type StringCodec struct{ avro.StringCodec } 91 | 92 | func (c StringCodec) Read(r *avro.ReadBuf, p unsafe.Pointer) error { 93 | // Can we do better than using the underlying string codec? 94 | l, err := r.Varint() 95 | if err != nil { 96 | return fmt.Errorf("failed to read length of time: %w", err) 97 | } 98 | 99 | if l == 0 { 100 | // pragmatically better to just leave the time alone if there's no 101 | // content to parse. 102 | return nil 103 | } 104 | 105 | data, err := r.Next(int(l)) 106 | if err != nil { 107 | return fmt.Errorf("failed to read %d bytes of time string body: %w", l, err) 108 | } 109 | 110 | s := *(*string)(unsafe.Pointer(&data)) 111 | t, err := parseTime(s) 112 | if err != nil { 113 | return fmt.Errorf("failed to parse time: %w", err) 114 | } 115 | *(*time.Time)(p) = t 116 | return nil 117 | } 118 | 119 | var timeType = reflect.TypeFor[time.Time]() 120 | 121 | // New create a pointer to a new time.Time 122 | func (c StringCodec) New(r *avro.ReadBuf) unsafe.Pointer { 123 | return r.Alloc(timeType) 124 | } 125 | 126 | func (c StringCodec) Omit(p unsafe.Pointer) bool { 127 | t := (*time.Time)(p) 128 | return t.IsZero() 129 | } 130 | 131 | func (c StringCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 132 | t := *(*time.Time)(p) 133 | // TODO: wrangle this into Time.AppendFormat? 134 | s := t.Format(time.RFC3339Nano) 135 | 136 | c.StringCodec.Write(w, unsafe.Pointer(&s)) 137 | } 138 | 139 | // LongCodec is a decoder from an AVRO long where the time is encoded as 140 | // nanoseconds since the UNIX epoch 141 | type LongCodec struct { 142 | avro.Int64Codec 143 | mult int64 144 | } 145 | 146 | func (c LongCodec) Read(r *avro.ReadBuf, p unsafe.Pointer) error { 147 | var l int64 148 | if err := c.Int64Codec.Read(r, unsafe.Pointer(&l)); err != nil { 149 | return err 150 | } 151 | 152 | *(*time.Time)(p) = time.Unix(0, l*c.mult).UTC() 153 | return nil 154 | } 155 | 156 | // New create a pointer to a new time.Time 157 | func (c LongCodec) New(r *avro.ReadBuf) unsafe.Pointer { 158 | return r.Alloc(timeType) 159 | } 160 | 161 | func (c LongCodec) Omit(p unsafe.Pointer) bool { 162 | t := (*time.Time)(p) 163 | return t.IsZero() 164 | } 165 | 166 | func (c LongCodec) Write(w *avro.WriteBuf, p unsafe.Pointer) { 167 | t := *(*time.Time)(p) 168 | l := t.UnixMicro() 169 | 170 | c.Int64Codec.Write(w, unsafe.Pointer(&l)) 171 | } 172 | -------------------------------------------------------------------------------- /time/time_test.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | import ( 4 | "encoding/binary" 5 | "strconv" 6 | "testing" 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/google/go-cmp/cmp" 11 | "github.com/philpearl/avro" 12 | ) 13 | 14 | func TestTime(t *testing.T) { 15 | now := time.Now() 16 | ts := now.Format(time.RFC3339Nano) 17 | data := []byte{byte(len(ts) << 1)} 18 | data = append(data, ts...) 19 | 20 | b := avro.NewReadBuf(data) 21 | c := StringCodec{} 22 | 23 | var out time.Time 24 | if err := c.Read(b, unsafe.Pointer(&out)); err != nil { 25 | t.Fatal(err) 26 | } 27 | 28 | if !out.Equal(now) { 29 | t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(out)) 30 | } 31 | } 32 | 33 | func TestTimeEmpty(t *testing.T) { 34 | b := avro.NewReadBuf([]byte{0}) 35 | c := StringCodec{} 36 | 37 | var out time.Time 38 | if err := c.Read(b, unsafe.Pointer(&out)); err != nil { 39 | t.Fatal(err) 40 | } 41 | 42 | if !out.IsZero() { 43 | t.Fatalf("times %s but expected zero", out) 44 | } 45 | } 46 | 47 | func TestTimePtr(t *testing.T) { 48 | now := time.Now() 49 | ts := now.Format(time.RFC3339Nano) 50 | data := []byte{byte(len(ts) << 1)} 51 | data = append(data, ts...) 52 | 53 | b := avro.NewReadBuf(data) 54 | 55 | c := avro.PointerCodec{ 56 | Codec: StringCodec{}, 57 | } 58 | 59 | var out *time.Time 60 | if err := c.Read(b, unsafe.Pointer(&out)); err != nil { 61 | t.Fatal(err) 62 | } 63 | 64 | if !out.Equal(now) { 65 | t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(*out)) 66 | } 67 | } 68 | 69 | func TestTimeLong(t *testing.T) { 70 | now := time.Now() 71 | data := make([]byte, binary.MaxVarintLen64) 72 | l := binary.PutVarint(data, now.UnixNano()) 73 | data = data[:l] 74 | 75 | b := avro.NewReadBuf(data) 76 | c := LongCodec{mult: 1} 77 | 78 | var out time.Time 79 | if err := c.Read(b, unsafe.Pointer(&out)); err != nil { 80 | t.Fatal(err) 81 | } 82 | 83 | if !out.Equal(now) { 84 | t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(out)) 85 | } 86 | } 87 | 88 | func TestDate(t *testing.T) { 89 | t0 := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) 90 | for _, l := range []int{0, 1, 573} { 91 | t.Run(strconv.Itoa(l), func(t *testing.T) { 92 | exp := t0.AddDate(0, 0, l) 93 | data := make([]byte, binary.MaxVarintLen64) 94 | l := binary.PutVarint(data, int64(l)) 95 | data = data[:l] 96 | 97 | b := avro.NewReadBuf(data) 98 | c := DateCodec{} 99 | 100 | var out time.Time 101 | if err := c.Read(b, unsafe.Pointer(&out)); err != nil { 102 | t.Fatal(err) 103 | } 104 | 105 | if !out.Equal(exp) { 106 | t.Fatalf("times %s & %s differ by %s", exp, out, exp.Sub(out)) 107 | } 108 | }) 109 | } 110 | } 111 | 112 | func TestTimeLongPtr(t *testing.T) { 113 | now := time.Now() 114 | data := make([]byte, binary.MaxVarintLen64) 115 | l := binary.PutVarint(data, now.UnixNano()) 116 | data = data[:l] 117 | 118 | b := avro.NewReadBuf(data) 119 | 120 | c := avro.PointerCodec{ 121 | Codec: LongCodec{mult: 1}, 122 | } 123 | 124 | var out *time.Time 125 | if err := c.Read(b, unsafe.Pointer(&out)); err != nil { 126 | t.Fatal(err) 127 | } 128 | 129 | if !out.Equal(now) { 130 | t.Fatalf("times %s & %s differ by %s", now, out, now.Sub(*out)) 131 | } 132 | } 133 | 134 | func BenchmarkTime(b *testing.B) { 135 | now := time.Now().UTC() 136 | ts := now.Format(time.RFC3339Nano) 137 | data := []byte{byte(len(ts) << 1)} 138 | data = append(data, ts...) 139 | 140 | buf := avro.NewReadBuf(data) 141 | c := StringCodec{} 142 | 143 | b.ReportAllocs() 144 | b.ResetTimer() 145 | 146 | for b.Loop() { 147 | buf.Reset(data) 148 | 149 | var out time.Time 150 | if err := c.Read(buf, unsafe.Pointer(&out)); err != nil { 151 | b.Fatal(err) 152 | } 153 | } 154 | } 155 | 156 | func BenchmarkLongTime(b *testing.B) { 157 | now := time.Now() 158 | data := make([]byte, binary.MaxVarintLen64) 159 | l := binary.PutVarint(data, now.UnixNano()) 160 | data = data[:l] 161 | 162 | buf := avro.NewReadBuf(data) 163 | c := LongCodec{mult: 1} 164 | 165 | b.ReportAllocs() 166 | b.ResetTimer() 167 | 168 | for b.Loop() { 169 | buf.Reset(data) 170 | 171 | var out time.Time 172 | if err := c.Read(buf, unsafe.Pointer(&out)); err != nil { 173 | b.Fatal(err) 174 | } 175 | } 176 | } 177 | 178 | func BenchmarkParseTime(b *testing.B) { 179 | ts := time.Now().UTC().Format(time.RFC3339Nano) 180 | b.ResetTimer() 181 | b.ReportAllocs() 182 | 183 | for b.Loop() { 184 | _, err := time.Parse(time.RFC3339, ts) 185 | if err != nil { 186 | b.Fatal(err) 187 | } 188 | } 189 | } 190 | 191 | func BenchmarkParseTimeOurselves(b *testing.B) { 192 | ts := time.Now().UTC().Format(time.RFC3339Nano) 193 | b.ResetTimer() 194 | b.ReportAllocs() 195 | 196 | for b.Loop() { 197 | _, err := parseTime(ts) 198 | if err != nil { 199 | b.Fatal(err) 200 | } 201 | } 202 | } 203 | 204 | func TestASchema(t *testing.T) { 205 | RegisterCodecs() 206 | 207 | s, err := avro.SchemaFromString(`{ 208 | "type": "record", 209 | "name": "Root", 210 | "fields": [ 211 | { 212 | "name": "timestamp", 213 | "type": [ 214 | "null", 215 | { 216 | "type": "long", 217 | "logicalType": "timestamp-micros" 218 | } 219 | ], 220 | "default": null 221 | } 222 | ] 223 | } 224 | `) 225 | if err != nil { 226 | t.Fatal(err) 227 | } 228 | 229 | if diff := cmp.Diff(avro.Schema{ 230 | Type: "record", 231 | Object: &avro.SchemaObject{ 232 | Name: "Root", 233 | Fields: []avro.SchemaRecordField{ 234 | { 235 | Name: "timestamp", 236 | Type: avro.Schema{ 237 | Type: "union", 238 | Union: []avro.Schema{ 239 | {Type: "null"}, 240 | { 241 | Type: "long", 242 | Object: &avro.SchemaObject{LogicalType: "timestamp-micros"}, 243 | }, 244 | }, 245 | }, 246 | }, 247 | }, 248 | }, 249 | }, s); diff != "" { 250 | t.Fatal(diff) 251 | } 252 | 253 | type Thing struct { 254 | Timestamp time.Time `json:"timestamp"` 255 | } 256 | 257 | if _, err := s.Codec(Thing{}); err != nil { 258 | t.Fatal(err) 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /union.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "fmt" 5 | "unsafe" 6 | ) 7 | 8 | type unionCodec struct { 9 | codecs []Codec 10 | } 11 | 12 | func (u *unionCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 13 | index, err := r.Varint() 14 | if err != nil { 15 | return fmt.Errorf("failed reading union selector. %w", err) 16 | } 17 | if index < 0 || index >= int64(len(u.codecs)) { 18 | return fmt.Errorf("union selector %d out of range (%d types)", index, len(u.codecs)) 19 | } 20 | 21 | c := u.codecs[index] 22 | return c.Read(r, p) 23 | } 24 | 25 | func (u *unionCodec) Skip(r *ReadBuf) error { 26 | index, err := r.Varint() 27 | if err != nil { 28 | return fmt.Errorf("failed reading union selector. %w", err) 29 | } 30 | if index < 0 || index >= int64(len(u.codecs)) { 31 | return fmt.Errorf("union selector %d out of range (%d types)", index, len(u.codecs)) 32 | } 33 | 34 | c := u.codecs[index] 35 | return c.Skip(r) 36 | } 37 | 38 | func (u *unionCodec) New(r *ReadBuf) unsafe.Pointer { 39 | return nil 40 | } 41 | 42 | func (u *unionCodec) Omit(p unsafe.Pointer) bool { 43 | return false 44 | } 45 | 46 | func (u *unionCodec) Write(w *WriteBuf, p unsafe.Pointer) { 47 | // TODO: Need a way to determine which type! 48 | panic("union codec not implemented!") 49 | } 50 | 51 | type unionOneAndNullCodec struct { 52 | codec Codec 53 | nonNull uint8 54 | } 55 | 56 | func (u *unionOneAndNullCodec) Read(r *ReadBuf, p unsafe.Pointer) error { 57 | // index must be less than 1 byte in this case. 58 | // The result should be 2 or 4 59 | index, err := r.ReadByte() 60 | if err != nil { 61 | return fmt.Errorf("failed reading union selector. %w", err) 62 | } 63 | index /= 2 64 | if (index)&0xFE != 0 { 65 | return fmt.Errorf("union selector %d out of range (2 types)", index) 66 | } 67 | 68 | if index == u.nonNull { 69 | return u.codec.Read(r, p) 70 | } 71 | return nil 72 | } 73 | 74 | func (u *unionOneAndNullCodec) Skip(r *ReadBuf) error { 75 | // index must be less than 1 byte in this case 76 | index, err := r.ReadByte() 77 | if err != nil { 78 | return fmt.Errorf("failed reading union selector. %w", err) 79 | } 80 | index /= 2 81 | if (index)&0xFE != 0 { 82 | return fmt.Errorf("union selector %d out of range (2 types)", index) 83 | } 84 | 85 | if index == u.nonNull { 86 | return u.codec.Skip(r) 87 | } 88 | return nil 89 | } 90 | 91 | func (u *unionOneAndNullCodec) New(r *ReadBuf) unsafe.Pointer { 92 | return nil 93 | } 94 | 95 | func (u *unionOneAndNullCodec) Omit(p unsafe.Pointer) bool { 96 | // The union codec itself is never omitted 97 | return false 98 | } 99 | 100 | func (u *unionOneAndNullCodec) Write(w *WriteBuf, p unsafe.Pointer) { 101 | if u.codec.Omit(p) { 102 | // TODO: this assumes the null type is always first. 103 | w.Varint(0) 104 | return 105 | } 106 | w.Varint(int64(u.nonNull)) 107 | u.codec.Write(w, p) 108 | } 109 | 110 | type unionNullString struct { 111 | codec StringCodec 112 | nonNull byte 113 | } 114 | 115 | func (u *unionNullString) Read(r *ReadBuf, p unsafe.Pointer) error { 116 | // index must be less than 1 byte in this case 117 | index, err := r.ReadByte() 118 | if err != nil { 119 | return fmt.Errorf("failed reading union selector. %w", err) 120 | } 121 | index /= 2 122 | if (index)&0xFE != 0 { 123 | return fmt.Errorf("union selector %d out of range (2 types)", index) 124 | } 125 | 126 | if index == u.nonNull { 127 | return u.codec.Read(r, p) 128 | } 129 | return nil 130 | } 131 | 132 | func (u *unionNullString) Skip(r *ReadBuf) error { 133 | // index must be less than 1 byte in this case 134 | index, err := r.ReadByte() 135 | if err != nil { 136 | return fmt.Errorf("failed reading union selector. %w", err) 137 | } 138 | index /= 2 139 | if (index)&0xFE != 0 { 140 | return fmt.Errorf("union selector %d out of range (2 types)", index) 141 | } 142 | 143 | if index == u.nonNull { 144 | return u.codec.Skip(r) 145 | } 146 | return nil 147 | } 148 | 149 | func (u *unionNullString) New(r *ReadBuf) unsafe.Pointer { 150 | return nil 151 | } 152 | 153 | func (u *unionNullString) Omit(p unsafe.Pointer) bool { 154 | return false 155 | } 156 | 157 | func (u *unionNullString) Write(w *WriteBuf, p unsafe.Pointer) { 158 | if u.codec.Omit(p) { 159 | w.Varint(0) 160 | } 161 | 162 | w.Varint(1) 163 | u.codec.Write(w, p) 164 | } 165 | -------------------------------------------------------------------------------- /union_test.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | ) 7 | 8 | func TestUnionCodec(t *testing.T) { 9 | c := unionCodec{ 10 | codecs: []Codec{nullCodec{}, StringCodec{}}, 11 | } 12 | 13 | tests := []struct { 14 | name string 15 | data []byte 16 | exp string 17 | }{ 18 | { 19 | name: "null", 20 | data: []byte{0}, 21 | exp: "", 22 | }, 23 | { 24 | name: "string", 25 | data: []byte{2, 6, 'f', 'o', 'o'}, 26 | exp: "foo", 27 | }, 28 | } 29 | 30 | for _, test := range tests { 31 | t.Run(test.name, func(t *testing.T) { 32 | r := NewReadBuf(test.data) 33 | var actual string 34 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 35 | t.Fatal(err) 36 | } 37 | if actual != test.exp { 38 | t.Fatalf("result %q does not match expected %q", actual, test.exp) 39 | } 40 | if r.Len() != 0 { 41 | t.Fatalf("%d bytes unread", r.Len()) 42 | } 43 | }) 44 | t.Run(test.name+" skip", func(t *testing.T) { 45 | r := NewReadBuf(test.data) 46 | if err := c.Skip(r); err != nil { 47 | t.Fatal(err) 48 | } 49 | if r.Len() != 0 { 50 | t.Fatalf("%d bytes unread", r.Len()) 51 | } 52 | }) 53 | 54 | } 55 | } 56 | 57 | func TestUnionOneCodec(t *testing.T) { 58 | c := unionOneAndNullCodec{ 59 | codec: StringCodec{}, 60 | nonNull: 1, 61 | } 62 | 63 | tests := []struct { 64 | name string 65 | data []byte 66 | exp string 67 | }{ 68 | { 69 | name: "null", 70 | data: []byte{0}, 71 | exp: "", 72 | }, 73 | { 74 | name: "string", 75 | data: []byte{2, 6, 'f', 'o', 'o'}, 76 | exp: "foo", 77 | }, 78 | } 79 | 80 | for _, test := range tests { 81 | t.Run(test.name, func(t *testing.T) { 82 | r := NewReadBuf(test.data) 83 | var actual string 84 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 85 | t.Fatal(err) 86 | } 87 | if actual != test.exp { 88 | t.Fatalf("result %q does not match expected %q", actual, test.exp) 89 | } 90 | if r.Len() != 0 { 91 | t.Fatalf("%d bytes unread", r.Len()) 92 | } 93 | }) 94 | t.Run(test.name+" skip", func(t *testing.T) { 95 | r := NewReadBuf(test.data) 96 | if err := c.Skip(r); err != nil { 97 | t.Fatal(err) 98 | } 99 | if r.Len() != 0 { 100 | t.Fatalf("%d bytes unread", r.Len()) 101 | } 102 | }) 103 | t.Run(test.name+" roundtrip", func(t *testing.T) { 104 | w := NewWriteBuf(nil) 105 | c.Write(w, unsafe.Pointer(&test.exp)) 106 | var actual string 107 | r := NewReadBuf(w.Bytes()) 108 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 109 | t.Fatal(err) 110 | } 111 | if actual != test.exp { 112 | t.Fatalf("result %q does not match expected %q", actual, test.exp) 113 | } 114 | }) 115 | } 116 | } 117 | 118 | func TestUnionStringCodec(t *testing.T) { 119 | c := unionNullString{ 120 | nonNull: 1, 121 | } 122 | 123 | tests := []struct { 124 | name string 125 | data []byte 126 | exp string 127 | }{ 128 | { 129 | name: "null", 130 | data: []byte{0}, 131 | exp: "", 132 | }, 133 | { 134 | name: "string", 135 | data: []byte{2, 6, 'f', 'o', 'o'}, 136 | exp: "foo", 137 | }, 138 | } 139 | 140 | for _, test := range tests { 141 | t.Run(test.name, func(t *testing.T) { 142 | r := NewReadBuf(test.data) 143 | var actual string 144 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 145 | t.Fatal(err) 146 | } 147 | if actual != test.exp { 148 | t.Fatalf("result %q does not match expected %q", actual, test.exp) 149 | } 150 | if r.Len() != 0 { 151 | t.Fatalf("%d bytes unread", r.Len()) 152 | } 153 | }) 154 | t.Run(test.name+" skip", func(t *testing.T) { 155 | r := NewReadBuf(test.data) 156 | if err := c.Skip(r); err != nil { 157 | t.Fatal(err) 158 | } 159 | if r.Len() != 0 { 160 | t.Fatalf("%d bytes unread", r.Len()) 161 | } 162 | }) 163 | t.Run(test.name+" roundtrip", func(t *testing.T) { 164 | w := NewWriteBuf(nil) 165 | c.Write(w, unsafe.Pointer(&test.exp)) 166 | var actual string 167 | r := NewReadBuf(w.Bytes()) 168 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 169 | t.Fatal(err) 170 | } 171 | if actual != test.exp { 172 | t.Fatalf("result %q does not match expected %q", actual, test.exp) 173 | } 174 | }) 175 | t.Run(test.name+" roundtrip omitempty", func(t *testing.T) { 176 | c := unionNullString{ 177 | nonNull: 1, 178 | codec: StringCodec{omitEmpty: true}, 179 | } 180 | 181 | w := NewWriteBuf(nil) 182 | c.Write(w, unsafe.Pointer(&test.exp)) 183 | var actual string 184 | r := NewReadBuf(w.Bytes()) 185 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 186 | t.Fatal(err) 187 | } 188 | if actual != test.exp { 189 | t.Fatalf("result %q does not match expected %q", actual, test.exp) 190 | } 191 | }) 192 | 193 | } 194 | } 195 | 196 | func BenchmarkUnionStringCodec(b *testing.B) { 197 | c := unionNullString{ 198 | nonNull: 1, 199 | } 200 | data := []byte{2, 6, 'f', 'o', 'o'} 201 | 202 | b.Run("read", func(b *testing.B) { 203 | b.ReportAllocs() 204 | b.RunParallel(func(pb *testing.PB) { 205 | r := NewReadBuf(nil) 206 | var actual string 207 | for pb.Next() { 208 | r.Reset(data) 209 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 210 | b.Fatal(err) 211 | } 212 | r.ExtractResourceBank().Close() 213 | } 214 | }) 215 | }) 216 | b.Run("skip", func(b *testing.B) { 217 | b.ReportAllocs() 218 | b.RunParallel(func(pb *testing.PB) { 219 | r := NewReadBuf(nil) 220 | for pb.Next() { 221 | r.Reset(data) 222 | if err := c.Skip(r); err != nil { 223 | b.Fatal(err) 224 | } 225 | } 226 | }) 227 | }) 228 | } 229 | 230 | func BenchmarkUnionOneCodec(b *testing.B) { 231 | c := unionOneAndNullCodec{ 232 | codec: StringCodec{}, 233 | nonNull: 1, 234 | } 235 | data := []byte{2, 6, 'f', 'o', 'o'} 236 | 237 | b.Run("read", func(b *testing.B) { 238 | b.ReportAllocs() 239 | b.RunParallel(func(pb *testing.PB) { 240 | r := NewReadBuf(nil) 241 | var actual string 242 | for pb.Next() { 243 | r.Reset(data) 244 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 245 | b.Fatal(err) 246 | } 247 | r.ExtractResourceBank().Close() 248 | } 249 | }) 250 | }) 251 | b.Run("skip", func(b *testing.B) { 252 | b.ReportAllocs() 253 | b.RunParallel(func(pb *testing.PB) { 254 | r := NewReadBuf(nil) 255 | for pb.Next() { 256 | r.Reset(data) 257 | if err := c.Skip(r); err != nil { 258 | b.Fatal(err) 259 | } 260 | } 261 | }) 262 | }) 263 | } 264 | 265 | func BenchmarkUnionCodec(b *testing.B) { 266 | c := unionCodec{ 267 | codecs: []Codec{nullCodec{}, StringCodec{}}, 268 | } 269 | data := []byte{2, 6, 'f', 'o', 'o'} 270 | 271 | b.Run("read", func(b *testing.B) { 272 | b.ReportAllocs() 273 | b.RunParallel(func(pb *testing.PB) { 274 | r := NewReadBuf(nil) 275 | var actual string 276 | for pb.Next() { 277 | r.Reset(data) 278 | if err := c.Read(r, unsafe.Pointer(&actual)); err != nil { 279 | b.Fatal(err) 280 | } 281 | r.ExtractResourceBank().Close() 282 | } 283 | }) 284 | }) 285 | b.Run("skip", func(b *testing.B) { 286 | b.ReportAllocs() 287 | b.RunParallel(func(pb *testing.PB) { 288 | r := NewReadBuf(nil) 289 | for pb.Next() { 290 | r.Reset(data) 291 | if err := c.Skip(r); err != nil { 292 | b.Fatal(err) 293 | } 294 | } 295 | }) 296 | }) 297 | } 298 | -------------------------------------------------------------------------------- /unsafetricks.go: -------------------------------------------------------------------------------- 1 | package avro 2 | 3 | import "unsafe" 4 | 5 | //go:linkname unsafe_New reflect.unsafe_New 6 | func unsafe_New(rtype unsafe.Pointer) unsafe.Pointer 7 | 8 | //go:linkname unsafe_NewArray reflect.unsafe_NewArray 9 | func unsafe_NewArray(rtype unsafe.Pointer, length int) unsafe.Pointer 10 | 11 | // typedslicecopy copies a slice of elemType values from src to dst, 12 | // returning the number of elements copied. 13 | // 14 | //go:linkname typedslicecopy reflect.typedslicecopy 15 | //go:noescape 16 | func typedslicecopy(elemType unsafe.Pointer, dst, src sliceHeader) int 17 | 18 | //go:linkname mapassign reflect.mapassign 19 | //go:noescape 20 | func mapassign(typ unsafe.Pointer, hmap unsafe.Pointer, key, val unsafe.Pointer) 21 | 22 | // typedmemclr zeros the value at ptr of type t. 23 | // 24 | //go:linkname typedmemclr reflect.typedmemclr 25 | //go:noescape 26 | func typedmemclr(typ, ptr unsafe.Pointer) 27 | 28 | // typedarrayclear clears the array at ptr 29 | // 30 | //go:linkname typedarrayclear reflect.typedarrayclear 31 | //go:noescape 32 | func typedarrayclear(typ, ptr unsafe.Pointer, len int) 33 | 34 | // We could use the reflect version of mapiterinit, but that forces a heap 35 | // allocation per map iteration. Instead we can use the runtime version, but 36 | // then we have to allocate a runtime private struct for it to use instead. We 37 | // can do this, and it uses stack memory, so that's less GC pressure and more 38 | // speed. But it isn't excellent from a maintenance point of view. Things will 39 | // break if the struct changes and we won't find out. But let's go for it. 40 | // 41 | // mapiter matches hiter in runtime/map.go. Using matching-ish types rather than 42 | // a big enough array of unsafe.Pointer just in case the GC would run into an 43 | // issue if something it thought was a pointer was not. Don't attempt to access 44 | // any of the fields in this struct directly! On the plus side this hasn't 45 | // changed significantly for 6 years 46 | // 47 | // Hmm, actually, as of Go 1.24 the underlying map has changed and this only 48 | // works as there's an explicit shim in the Go code to allow it to! It costs a 49 | // single heap allocation (I think?) 50 | type mapiter struct { 51 | key unsafe.Pointer 52 | elem unsafe.Pointer 53 | t unsafe.Pointer 54 | h unsafe.Pointer 55 | buckets unsafe.Pointer 56 | bptr unsafe.Pointer 57 | overflow unsafe.Pointer 58 | oldoverflow unsafe.Pointer 59 | startBucket uintptr 60 | offset uint8 61 | wrapped bool 62 | B uint8 63 | i uint8 64 | bucket uintptr 65 | checkBucket uintptr 66 | } 67 | 68 | //go:linkname mapiterinit runtime.mapiterinit 69 | //go:noescape 70 | func mapiterinit(t unsafe.Pointer, m unsafe.Pointer, hi unsafe.Pointer) 71 | 72 | //go:linkname mapiterkey reflect.mapiterkey 73 | //go:noescape 74 | func mapiterkey(it unsafe.Pointer) (key unsafe.Pointer) 75 | 76 | //go:linkname mapiterelem reflect.mapiterelem 77 | //go:noescape 78 | func mapiterelem(it unsafe.Pointer) (elem unsafe.Pointer) 79 | 80 | //go:linkname mapiternext reflect.mapiternext 81 | //go:noescape 82 | func mapiternext(it unsafe.Pointer) 83 | 84 | //go:linkname maplen reflect.maplen 85 | //go:noescape 86 | func maplen(m unsafe.Pointer) int 87 | --------------------------------------------------------------------------------