├── .gitignore ├── raw.go ├── LICENSE ├── raw_test.go ├── README.md └── cmd └── bolt-rawgen └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | -------------------------------------------------------------------------------- /raw.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package raw provides utilities for mapping raw Go structs to byte slices. 3 | */ 4 | package raw 5 | 6 | import ( 7 | "unsafe" 8 | ) 9 | 10 | // String represents an offset and pointer to a string in a byte slice. 11 | type String struct { 12 | Offset uint16 13 | Length uint16 14 | } 15 | 16 | // Encode writes a string to a byte slice and updates the offset/length. 17 | func (s *String) Encode(str string, value *[]byte) { 18 | s.Offset = uint16(len(*value)) 19 | s.Length = uint16(len(str)) 20 | *value = append(*value, []byte(str)...) 21 | } 22 | 23 | // Bytes returns a byte slice pointing to the string's contents. 24 | func (s *String) Bytes(value []byte) []byte { 25 | return (*[0xFFFF]byte)(unsafe.Pointer(&value[s.Offset]))[:s.Length] 26 | } 27 | 28 | // String returns a Go string of the string value from an encoded byte slice. 29 | func (s *String) String(value []byte) string { 30 | return string(s.Bytes(value)) 31 | } 32 | 33 | // Time is a marker type for time.Time. 34 | type Time int64 35 | 36 | // Duration is a marker type for time.Duration. 37 | type Duration int64 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 BoltDB 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /raw_test.go: -------------------------------------------------------------------------------- 1 | package raw_test 2 | 3 | import ( 4 | "testing" 5 | "unsafe" 6 | 7 | . "github.com/boltdb/raw" 8 | ) 9 | 10 | // Ensure that an event can be correctly encoded. 11 | func TestString_Encode(t *testing.T) { 12 | // Encode to a byte slice. 13 | o := &O{MyString1: "foo", MyInt: 1000, MyString2: "bar"} 14 | v := o.Encode() 15 | 16 | // Map to a raw event and verify. 17 | r := ((*R)(unsafe.Pointer(&v[0]))) 18 | if s := r.MyString1.String(v); s != "foo" { 19 | t.Fatalf("invalid string decode(1): %q", s) 20 | } 21 | if i := r.MyInt; i != 1000 { 22 | t.Fatalf("invalid int decode: %q", i) 23 | } 24 | if s := r.MyString2.String(v); s != "bar" { 25 | t.Fatalf("invalid string decode(1): %q", s) 26 | } 27 | } 28 | 29 | func BenchmarkStringEncode(b *testing.B) { 30 | o := &O{MyString1: "foo", MyInt: 1000, MyString2: "bar"} 31 | for i := 0; i < b.N; i++ { 32 | v := o.Encode() 33 | if len(v) == 0 { 34 | b.Fatalf("invalid string length: %d", len(v)) 35 | } 36 | } 37 | } 38 | 39 | func BenchmarkStringDecode(b *testing.B) { 40 | o := &O{MyString1: "foo", MyInt: 1000, MyString2: "bar"} 41 | v := o.Encode() 42 | 43 | for i := 0; i < b.N; i++ { 44 | r := ((*R)(unsafe.Pointer(&v[0]))) 45 | if len(r.MyString1.Bytes(v)) == 0 { 46 | b.Fatalf("invalid string length") 47 | } 48 | } 49 | } 50 | 51 | // O represents a test struct that will encode into R. 52 | type O struct { 53 | MyString1 string 54 | MyInt int 55 | MyString2 string 56 | } 57 | 58 | // Encode encodes an Event into a byte slice that can be read by a RawEvent. 59 | func (o *O) Encode() []byte { 60 | var r R 61 | b := make([]byte, unsafe.Sizeof(r), int(unsafe.Sizeof(r))+len(o.MyString1)+len(o.MyString2)) 62 | r.MyString1.Encode(o.MyString1, &b) 63 | r.MyInt = int64(o.MyInt) 64 | r.MyString2.Encode(o.MyString2, &b) 65 | copy(b, (*[unsafe.Sizeof(r)]byte)(unsafe.Pointer(&r))[:]) 66 | return b 67 | } 68 | 69 | // R represents a raw struct. 70 | type R struct { 71 | MyString1 String 72 | MyInt int64 73 | MyString2 String 74 | } 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | raw [![Build Status](https://drone.io/github.com/boltdb/raw/status.png)](https://drone.io/github.com/boltdb/raw/latest) [![Coverage Status](https://img.shields.io/coveralls/boltdb/raw.svg)](https://coveralls.io/r/boltdb/raw?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/raw?status.png)](https://godoc.org/github.com/boltdb/raw) ![Project status](http://img.shields.io/status/experimental.png?color=red) 2 | === 3 | 4 | This is simple library for working with raw Go struct data. Most of the time 5 | it's good to serialize your data to a common encoding format (e.g. JSON, 6 | MessagePack, Protocol Buffers) when saving and retrieving data to disk or 7 | sending over a network. These encodings can provide a common interface for data 8 | and support versioning and other useful features. 9 | 10 | However, serialization comes at a cost. Converting between types and copying 11 | memory all has overhead so when you need to go really fast, sometimes you need 12 | to skip serialization all together. 13 | 14 | 15 | ## Usage 16 | 17 | ### Basics 18 | 19 | Go provides the ability to perform type conversion on byte slices to convert 20 | them into pointers of Go types. You can do this using the `unsafe` package. 21 | As the name suggests, it's not safe. You need to know what you're doing. 22 | 23 | ```go 24 | // Create a byte slice with 4 bytes. 25 | b := make([]byte, 4) 26 | 27 | // Create an 32-bit int pointer to the first byte of the slice and set a value. 28 | x := (*int32)(unsafe.Pointer(&b[0])) 29 | *x = 1000 30 | 31 | // Verify that the underlying byte slice changed. 32 | fmt.Printf("%x\n", b) 33 | ``` 34 | 35 | This will print out the value: `e8030000` which is the hex representation of `1000`. 36 | 37 | 38 | ### Using Raw 39 | 40 | The primitive integer and float types in Go map directly to byte slices. However, 41 | the string type does not. Its internal representation is publicly accessible 42 | or guaranteed not to change between Go versions. So to map variable length 43 | Go strings to byte slices in our code we can use the `raw.String` type: 44 | 45 | ```go 46 | var s String 47 | b := make([]byte, unsafe.Sizeof(s)) 48 | s.Encode("foo", &b) 49 | copy(b, (*[unsafe.Sizeof(s)]byte)(unsafe.Pointer(&s))[:]) 50 | ``` 51 | 52 | That will encode the string offset and length followed by the bytes, `"foo"`. 53 | Then when you want to use the string, type convert the byte slice to your 54 | `raw.String` and extract the data: 55 | 56 | ```go 57 | s := ((*raw.String)(unsafe.Pointer(&b[0]))) 58 | fmt.Print(s.String()) 59 | 60 | // Prints: foo 61 | ``` 62 | 63 | If this seems like a lot of work just to encode a string then you'd be correct. 64 | However, it's fast and when multiple strings are combined in a struct it allows 65 | us to only deserialize the fields we need. 66 | 67 | 68 | ## Performance 69 | 70 | To get an idea of the performance of this approach, please see the benchmarks 71 | inside the test suite. 72 | 73 | On my Intel Core i7 2.9GHz Macbook Pro, I see the following stats: 74 | 75 | ```sh 76 | $ go test -bench=. -benchmem 77 | PASS 78 | BenchmarkString_Encode 10000000 214 ns/op 64 B/op 3 allocs/op 79 | BenchmarkString_Decode 500000000 3.77 ns/op 0 B/op 0 allocs/op 80 | ok github.com/boltdb/raw 4.635s 81 | ``` 82 | 83 | YMMV. 84 | -------------------------------------------------------------------------------- /cmd/bolt-rawgen/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "flag" 6 | "fmt" 7 | "go/ast" 8 | "go/parser" 9 | "go/token" 10 | "io" 11 | "io/ioutil" 12 | "log" 13 | "os" 14 | "path/filepath" 15 | "regexp" 16 | "strings" 17 | "unicode" 18 | ) 19 | 20 | // verbose turns on trace-level debugging. 21 | var verbose = flag.Bool("v", false, "verbose") 22 | 23 | func main() { 24 | log.SetFlags(0) 25 | 26 | // Parse command line arguments. 27 | flag.Parse() 28 | root := flag.Arg(0) 29 | if root == "" { 30 | log.Fatal("path required") 31 | } 32 | 33 | // Iterate over the tree and process files importing boltdb/raw. 34 | if err := filepath.Walk(root, walk); err != nil { 35 | log.Fatal(err) 36 | } 37 | } 38 | 39 | // Walk recursively iterates over all files in a directory and processes any 40 | // file that imports "github.com/boltdb/raw". 41 | func walk(path string, info os.FileInfo, err error) error { 42 | traceln("walk:", path) 43 | 44 | if info == nil { 45 | return fmt.Errorf("file not found: %s", err) 46 | } else if info.IsDir() { 47 | traceln("skipping: is directory") 48 | return nil 49 | } else if filepath.Ext(path) != ".go" { 50 | traceln("skipping: is not a go file") 51 | return nil 52 | } 53 | 54 | // Check if file imports boltdb/raw. 55 | if v, err := importsRaw(path); err != nil { 56 | return err 57 | } else if !v { 58 | traceln("skipping: does not import raw") 59 | return nil 60 | } 61 | 62 | // Process each file. 63 | if err := process(path); err != nil { 64 | return err 65 | } 66 | 67 | return nil 68 | } 69 | 70 | // importsRaw returns true if a given path imports boltdb/raw. 71 | func importsRaw(path string) (bool, error) { 72 | f, err := parser.ParseFile(token.NewFileSet(), path, nil, parser.ImportsOnly) 73 | if err != nil { 74 | return false, err 75 | } 76 | for _, i := range f.Imports { 77 | traceln("✓ imports", i.Path.Value) 78 | if i.Path.Value == `"github.com/boltdb/raw"` { 79 | return true, nil 80 | } 81 | } 82 | return false, nil 83 | } 84 | 85 | // process parses and rewrites a file by generating the appropriate exported 86 | // types for raw types. 87 | func process(path string) error { 88 | b, err := ioutil.ReadFile(path) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | // Remove code between begin/end pragma comments. 94 | b = regexp.MustCompile(`(?is)//raw:codegen:begin.+?//raw:codegen:end`).ReplaceAll(b, []byte{}) 95 | b = []byte(strings.TrimRight(string(b), " \n\r")) 96 | 97 | // Re-parse the file without the pragmas. 98 | f, err := parser.ParseFile(token.NewFileSet(), path, b, 0) 99 | if err != nil { 100 | return err 101 | } 102 | 103 | // Iterate over all the nodes and add exported types where appropriate. 104 | var g generator 105 | g.w.Write(b) 106 | g.w.WriteString("\n\n") 107 | 108 | ast.Walk(&g, f) 109 | if g.err != nil { 110 | return g.err 111 | } 112 | 113 | // Rewrite original file. 114 | ioutil.WriteFile(path, g.w.Bytes(), 0600) 115 | 116 | log.Println("OK", path) 117 | 118 | return nil 119 | } 120 | 121 | // generator iterates over every AST node and generates code as appropriate. 122 | type generator struct { 123 | w bytes.Buffer 124 | err error 125 | } 126 | 127 | // Visit implements the ast.Visitor interface. It is called once for every AST node. 128 | func (g *generator) Visit(node ast.Node) ast.Visitor { 129 | if g.err != nil || node == nil { 130 | return nil 131 | } 132 | 133 | switch node := node.(type) { 134 | case *ast.TypeSpec: 135 | if err := g.visitTypeSpec(node); err != nil { 136 | g.err = err 137 | } 138 | } 139 | return g 140 | } 141 | 142 | // visitTypeSpec is called for every type declaration. Each declaration is 143 | // checked for raw usage and an exported type is generated if appropriate. 144 | func (g *generator) visitTypeSpec(node *ast.TypeSpec) error { 145 | // Only process struct types. 146 | s, ok := node.Type.(*ast.StructType) 147 | if !ok { 148 | return nil 149 | } 150 | 151 | // Check if this struct type contains only raw fields. 152 | if !isRawStructType(s) { 153 | traceln("not raw:", node.Name.Name) 154 | return nil 155 | } 156 | 157 | // Disallow raw structs that are exported. 158 | if unicode.IsUpper(rune(node.Name.Name[0])) { 159 | return fmt.Errorf("raw struct cannot be exported: %s", node.Name.Name) 160 | } 161 | 162 | // Generate an exported name. 163 | unexp := node.Name.Name 164 | exp := tocamelcase(node.Name.Name) 165 | 166 | tracef("• processing: %s -> %s", unexp, exp) 167 | 168 | // Generate exported struct and functions. 169 | fmt.Fprint(&g.w, "//raw:codegen:begin\n\n") 170 | fmt.Fprint(&g.w, "//\n") 171 | fmt.Fprint(&g.w, "// DO NOT CHANGE\n") 172 | fmt.Fprint(&g.w, "// This section has been generated by bolt-rawgen.\n") 173 | fmt.Fprint(&g.w, "//\n\n") 174 | if err := writeExportedType(exp, s, &g.w); err != nil { 175 | return fmt.Errorf("generate exported type: %s", s) 176 | } 177 | if err := writeEncodeFunc(unexp, exp, s, &g.w); err != nil { 178 | return fmt.Errorf("generate encode func: %s", s) 179 | } 180 | if err := writeDecodeFunc(unexp, exp, s, &g.w); err != nil { 181 | return fmt.Errorf("generate decode func: %s", s) 182 | } 183 | if err := writeAccessorFuncs(unexp, s, &g.w); err != nil { 184 | return fmt.Errorf("generate accessor funcs: %s", s) 185 | } 186 | fmt.Fprint(&g.w, "//raw:codegen:end\n\n") 187 | 188 | return nil 189 | } 190 | 191 | // writeExportedType writes a generated exported type for a raw struct type. 192 | func writeExportedType(name string, node *ast.StructType, w io.Writer) error { 193 | fmt.Fprintf(w, "type %s struct {\n", name) 194 | 195 | for _, f := range node.Fields.List { 196 | var typ string 197 | switch tostr(f.Type) { 198 | case "bool": 199 | typ = "bool" 200 | case "int8", "int16", "int32", "int64": 201 | typ = "int" 202 | case "uint8", "uint16", "uint32", "uint64": 203 | typ = "uint" 204 | case "float32": 205 | typ = "float32" 206 | case "float64": 207 | typ = "float64" 208 | case "raw.Time": 209 | typ = "time.Time" 210 | case "raw.Duration": 211 | typ = "time.Duration" 212 | case "raw.String": 213 | typ = "string" 214 | default: 215 | return fmt.Errorf("invalid raw type: %s", tostr(f.Type)) 216 | } 217 | 218 | for _, n := range f.Names { 219 | fmt.Fprintf(w, "\t%s %s\n", tocamelcase(n.Name), typ) 220 | } 221 | } 222 | 223 | fmt.Fprintf(w, "}\n\n") 224 | return nil 225 | } 226 | 227 | // writeEncodeFunc writes a generated encoding function for a raw struct type. 228 | func writeEncodeFunc(unexp, exp string, node *ast.StructType, w io.Writer) error { 229 | fmt.Fprintf(w, "func (o *%s) Encode() []byte {\n", exp) 230 | fmt.Fprintf(w, "\tvar r %s\n", unexp) 231 | fmt.Fprintf(w, "\tb := make([]byte, unsafe.Sizeof(r), int(unsafe.Sizeof(r)))\n") 232 | 233 | for _, f := range node.Fields.List { 234 | typ := tostr(f.Type) 235 | for _, n := range f.Names { 236 | switch typ { 237 | case "bool": 238 | fmt.Fprintf(w, "\tr.%s = o.%s\n", n.Name, tocamelcase(n.Name)) 239 | case "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64": 240 | fmt.Fprintf(w, "\tr.%s = %s(o.%s)\n", n.Name, typ, tocamelcase(n.Name)) 241 | typ = "uint" 242 | case "raw.Time": 243 | fmt.Fprintf(w, "\tr.%s = raw.Time(o.%s.UnixNano())\n", n.Name, tocamelcase(n.Name)) 244 | case "raw.Duration": 245 | fmt.Fprintf(w, "\tr.%s = raw.Duration(o.%s)\n", n.Name, tocamelcase(n.Name)) 246 | case "raw.String": 247 | fmt.Fprintf(w, "\tr.%s.Encode(o.%s, &b)\n", n.Name, tocamelcase(n.Name)) 248 | default: 249 | return fmt.Errorf("invalid raw type: %s", tostr(f.Type)) 250 | } 251 | } 252 | } 253 | 254 | fmt.Fprintf(w, "\tcopy(b, (*[unsafe.Sizeof(r)]byte)(unsafe.Pointer(&r))[:])\n") 255 | fmt.Fprintf(w, "\treturn b\n") 256 | fmt.Fprintf(w, "}\n\n") 257 | return nil 258 | } 259 | 260 | // writeDecodeFunc writes a generated decoding function for a raw struct type. 261 | func writeDecodeFunc(unexp, exp string, node *ast.StructType, w io.Writer) error { 262 | fmt.Fprintf(w, "func (o *%s) Decode(b []byte) {\n", exp) 263 | fmt.Fprintf(w, "\tr := (*%s)(unsafe.Pointer(&b[0]))\n", unexp) 264 | 265 | for _, f := range node.Fields.List { 266 | for _, n := range f.Names { 267 | fmt.Fprintf(w, "\to.%s = r.%s()\n", tocamelcase(n.Name), tocamelcase(n.Name)) 268 | } 269 | } 270 | 271 | fmt.Fprintf(w, "}\n\n") 272 | return nil 273 | } 274 | 275 | // writeAccessorFuncs writes a accessor functions for a raw struct type. 276 | func writeAccessorFuncs(name string, node *ast.StructType, w io.Writer) error { 277 | for _, f := range node.Fields.List { 278 | typ := tostr(f.Type) 279 | for _, n := range f.Names { 280 | switch typ { 281 | case "bool": 282 | fmt.Fprintf(w, "func (r *%s) %s() bool { return r.%s }\n\n", name, tocamelcase(n.Name), n.Name) 283 | case "int8", "int16", "int32", "int64": 284 | fmt.Fprintf(w, "func (r *%s) %s() int { return int(r.%s) }\n\n", name, tocamelcase(n.Name), n.Name) 285 | case "uint8", "uint16", "uint32", "uint64": 286 | fmt.Fprintf(w, "func (r *%s) %s() uint { return uint(r.%s) }\n\n", name, tocamelcase(n.Name), n.Name) 287 | case "float32", "float64": 288 | fmt.Fprintf(w, "func (r *%s) %s() %s { return r.%s }\n\n", name, tocamelcase(n.Name), typ, n.Name) 289 | case "raw.Time": 290 | fmt.Fprintf(w, "func (r *%s) %s() time.Time { return time.Unix(0, int64(r.%s)).UTC() }\n\n", name, tocamelcase(n.Name), n.Name) 291 | case "raw.Duration": 292 | fmt.Fprintf(w, "func (r *%s) %s() time.Duration { return time.Duration(r.%s) }\n\n", name, tocamelcase(n.Name), n.Name) 293 | case "raw.String": 294 | fmt.Fprintf(w, "func (r *%s) %s() string { return r.%s.String(((*[0xFFFF]byte)(unsafe.Pointer(r)))[:]) }\n", name, tocamelcase(n.Name), n.Name) 295 | fmt.Fprintf(w, "func (r *%s) %sBytes() []byte { return r.%s.Bytes(((*[0xFFFF]byte)(unsafe.Pointer(r)))[:]) }\n\n", name, tocamelcase(n.Name), n.Name) 296 | default: 297 | return fmt.Errorf("invalid raw type: %s", tostr(f.Type)) 298 | } 299 | } 300 | } 301 | return nil 302 | } 303 | 304 | // isRawStructType returns true when a type declaration uses all raw types. 305 | func isRawStructType(node *ast.StructType) bool { 306 | for _, f := range node.Fields.List { 307 | switch tostr(f.Type) { 308 | case "bool": 309 | case "int8", "int16", "int32", "int64": 310 | case "uint8", "uint16", "uint32", "uint64": 311 | case "float32", "float64": 312 | case "raw.Time", "raw.Duration": 313 | case "raw.String": 314 | default: 315 | return false 316 | } 317 | } 318 | return true 319 | } 320 | 321 | // tostr converts a node to a string. 322 | func tostr(node ast.Node) string { 323 | switch node := node.(type) { 324 | case *ast.Ident: 325 | return node.Name 326 | case *ast.SelectorExpr: 327 | return tostr(node.X) + "." + tostr(node.Sel) 328 | } 329 | return "" 330 | } 331 | 332 | func tocamelcase(s string) string { 333 | if s == "" { 334 | return s 335 | } 336 | return string(unicode.ToUpper(rune(s[0]))) + string(s[1:]) 337 | } 338 | 339 | func trace(v ...interface{}) { 340 | if *verbose { 341 | log.Print(v...) 342 | } 343 | } 344 | 345 | func tracef(format string, v ...interface{}) { 346 | if *verbose { 347 | log.Printf(format, v...) 348 | } 349 | } 350 | 351 | func traceln(v ...interface{}) { 352 | if *verbose { 353 | log.Println(v...) 354 | } 355 | } 356 | --------------------------------------------------------------------------------