2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is furnished
8 | to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/cmd/sno/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "flag"
5 | )
6 |
7 | const (
8 | cmdGenerate = "generate"
9 | cmdInspect = "inspect"
10 | cmdVersion = "version"
11 | cmdHelp = "help"
12 | )
13 |
14 | var (
15 | meta string
16 | part string
17 | )
18 |
19 | func init() {
20 | flag.StringVar(&meta, "meta", "", "The metabyte to set on generated IDs, given in decimal (base10)")
21 | flag.StringVar(&part, "partition", "", "The partition to set on generated IDs, given in decimal (base10)")
22 | flag.Parse()
23 | }
24 |
25 | func main() {
26 | var (
27 | args = flag.Args()
28 | argsN = len(args)
29 | )
30 |
31 | if argsN < 2 {
32 | // No args at all or "generate" without arg simply passes on to generate one sno.
33 | // Opts will still get passed through, if they were given.
34 | if argsN == 0 || args[0] == cmdGenerate {
35 | generate("1")
36 | }
37 |
38 | switch args[0] {
39 | case cmdVersion:
40 | version()
41 | case cmdHelp:
42 | usage()
43 | }
44 | } else if argsN == 2 {
45 | switch args[0] {
46 | case cmdGenerate:
47 | generate(args[1])
48 | case cmdInspect:
49 | inspect(args[1])
50 | }
51 | }
52 |
53 | usage()
54 | }
55 |
--------------------------------------------------------------------------------
/internal/encoding_test.go:
--------------------------------------------------------------------------------
1 | package internal
2 |
3 | import (
4 | "bytes"
5 | "testing"
6 | )
7 |
8 | func testEncoding(t *testing.T) {
9 | runEncodingWithFallback("encode", t, testEncodingEncode)
10 | runEncodingWithFallback("decode", t, testEncodingDecode)
11 | }
12 |
13 | var encdec = [...]struct {
14 | dec string
15 | enc [10]byte
16 | }{
17 | {"2222222222222222", [10]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
18 | {"brpk4q72xwf2m63l", [10]byte{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}},
19 | {"xxxxxxxxxxxxxxxx", [10]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
20 | }
21 |
22 | func testEncodingEncode(t *testing.T) {
23 | for _, c := range encdec {
24 | var (
25 | actual = Encode(&c.enc)
26 | expected = []byte(c.dec)
27 | )
28 |
29 | if !bytes.Equal(actual[:], expected) {
30 | t.Errorf("expected [%s], got [%s]", expected, actual)
31 | }
32 | }
33 | }
34 |
35 | func testEncodingDecode(t *testing.T) {
36 | for _, c := range encdec {
37 | var (
38 | actual = Decode([]byte(c.dec))
39 | expected = c.enc
40 | )
41 |
42 | if actual != expected {
43 | t.Errorf("expected [%v], got [%v]", expected, actual)
44 | }
45 | }
46 | }
47 |
48 | func runEncodingWithFallback(name string, t *testing.T, f func(t *testing.T)) {
49 | t.Run(name, func(t *testing.T) {
50 | var actualVectorSupport = hasVectorSupport
51 | if actualVectorSupport {
52 | t.Run("vectorized", f)
53 | }
54 |
55 | hasVectorSupport = false
56 | t.Run("fallback", f)
57 | hasVectorSupport = actualVectorSupport
58 | })
59 | }
60 |
--------------------------------------------------------------------------------
/cmd/sno/generate.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 |
6 | "github.com/muyo/rush/chars"
7 | "github.com/muyo/sno"
8 | )
9 |
10 | func generate(in string) {
11 | c, ok := chars.ParseUint64(in)
12 | if !ok {
13 | _, _ = os.Stderr.Write([]byte("Need a valid number of IDs to generate.\n"))
14 | os.Exit(1)
15 | }
16 |
17 | metabyte, snapshot := parseGenerateOpts()
18 |
19 | g, err := sno.NewGenerator(snapshot, nil)
20 | if err != nil {
21 | _, _ = os.Stderr.Write([]byte("Failed to create a generator.\n"))
22 | os.Exit(1)
23 | }
24 |
25 | ids := make([]sno.ID, c)
26 | for i := 0; i < int(c); i++ {
27 | ids[i] = g.New(metabyte)
28 | }
29 |
30 | buf := make([]byte, sno.SizeEncoded+1)
31 | buf[sno.SizeEncoded] = '\n'
32 |
33 | for i := 0; i < int(c); i++ {
34 | enc, _ := ids[i].MarshalText()
35 | copy(buf, enc)
36 | if _, err := os.Stdout.Write(buf); err != nil {
37 | os.Exit(1)
38 | }
39 | }
40 |
41 | os.Exit(0)
42 | }
43 |
44 | func parseGenerateOpts() (metabyte byte, snapshot *sno.GeneratorSnapshot) {
45 | var ok bool
46 |
47 | if meta != "" {
48 | if metabyte, ok = chars.ParseUint8(meta); !ok {
49 | _, _ = os.Stderr.Write([]byte("-meta must be a valid base10 number smaller than 256\n"))
50 | os.Exit(1)
51 | }
52 | }
53 |
54 | if part != "" {
55 | pu16, ok := chars.ParseUint16(part)
56 | if !ok {
57 | _, _ = os.Stderr.Write([]byte("-partition must be a valid base10 number smaller than 65536\n"))
58 | os.Exit(1)
59 | }
60 |
61 | var partition sno.Partition
62 | partition.PutUint16(pu16)
63 |
64 | snapshot = &sno.GeneratorSnapshot{
65 | Partition: partition,
66 | }
67 | }
68 |
69 | return
70 | }
71 |
--------------------------------------------------------------------------------
/internal/time.go:
--------------------------------------------------------------------------------
1 | //go:build !(windows && amd64) && !(linux && amd64 && go1.17)
2 | // +build !windows !amd64
3 | // +build !linux !amd64 !go1.17
4 |
5 | package internal
6 |
7 | import _ "unsafe" // Required for go:linkname
8 |
9 | // ostime returns the current wall clock time reported by the OS.
10 | //
11 | // The function is linked against runtime.walltime() directly, which is only available since the
12 | // introduction of faketime in Go 1.14 (which is the version sno depends on at minimum). This being
13 | // linked to an internal function instead of a semi-stable one like time.now() is somewhat brittle,
14 | // but the rationale is explained below.
15 | //
16 | // POSIXy arch/OS combinations use some form of clock_gettime with CLOCK_REALTIME, either through
17 | // a syscall, libc call (Darwin) or vDSO (Linux).
18 | // These calls are relatively slow, even using vDSO. Not using time.Now() allows us to bypass getting
19 | // the monotonic clock readings which is a separate invocation of the underlying kernel facility and
20 | // roughly doubles the execution time.
21 | //
22 | // As a result, doing sno.New(0).Time() tends to be actually faster on those platforms than time.Now(),
23 | // despite an entire ID being generated alongside. That is, if you're fine with the precision reduced to 4ms.
24 | //
25 | // On Windows/amd64 we use an even more efficient implementation which allows us to also bypass
26 | // some unnecessary unit conversions, which isn't as trivially possible on POSIXy systems (as their
27 | // kernels keep track of time and provide secs and fractional secs instead of a singular higher
28 | // resolution source).
29 | //
30 | // See https://lore.kernel.org/linux-arm-kernel/20190621095252.32307-1-vincenzo.frascino@arm.com
31 | // to get an overview of the perf numbers involved on Linux-based distros.
32 | //
33 | //go:linkname ostime runtime.walltime
34 | func ostime() (sec int64, nsec int32)
35 |
36 | // Snotime returns the current wall clock time reported by the OS as adjusted to our internal epoch.
37 | func Snotime() uint64 {
38 | wallSec, wallNsec := ostime()
39 |
40 | return (uint64(wallSec)*1e9 + uint64(wallNsec) - epochNsec) / timeUnit
41 | }
42 |
--------------------------------------------------------------------------------
/internal/time_windows_amd64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "funcdata.h"
3 |
4 | // Uses the same approach as Go's runtime to get the current OS time as documented on:
5 | // https://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure
6 | // https://github.com/golang/go/blob/450d0b2f30e820f402a638799de0b886c1da8dbe/src/runtime/sys_windows_amd64.s#L499
7 | //
8 | // However, we skip a few things the runtime does to provide the facility to time.Now():
9 | // - There is no fallback to QPC, which means this won't work on Wine except the most recent versions;
10 | // - We offset the time straight into the sno epoch instead of into Unix first;
11 | // - We do not perform a unit conversion from 100nsec (as returned by the OS) into 1nsec. Instead we
12 | // return this as is and the unit conversion is done in the wrapping snotime() function, where the
13 | // division gets optimized by the compiler;
14 | // - There is no split into seconds and fractional nsecs, since - unlike time.Now() - this is the opposite
15 | // of what we want;
16 | //
17 | // All in all this lets us shave off about a dozen instructions - including a fairly expensive back-and-forth
18 | // conversion between time units.
19 | //
20 | // func ostime() uint64
21 | TEXT ·ostime(SB), NOSPLIT, $0-8
22 | MOVQ $2147352596, DI // 0x7ffe0014 -> 2147352596
23 | time:
24 | MOVL 4(DI), AX // time_hi1
25 | MOVL 0(DI), BX // time_lo
26 | MOVL 8(DI), CX // time_hi2
27 | CMPL AX, CX
28 | JNE time
29 |
30 | SHLQ $32, AX
31 | ORQ BX, AX
32 |
33 | // Windows time as stored within _KUSER_SHARED_DATA starts at Jan 1st 1601.
34 | // The offset in the Windows units (100ns) to Unix epoch is a SUBQ by 116 444 736 000 000 000.
35 | //
36 | // Our internal epoch is:
37 | // 1 262 304 000 seconds on top of Unix.
38 | // 12 623 040 000 000 000 in units of 100nsec (secs * 1e7)
39 | //
40 | // As such we SUBQ 116444736000000000 (Windows to Unix diff) + 12623040000000000 (Sno to Unix diff)
41 | // 116 444 736 000 000 000
42 | // 12 623 040 000 000 000
43 | // ----
44 | // 129 067 776 000 000 000
45 |
46 | MOVQ $129067776000000000, DI
47 | SUBQ DI, AX
48 |
49 | MOVQ AX, ret+0(FP)
50 | RET
51 |
--------------------------------------------------------------------------------
/errors.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import "fmt"
4 |
5 | const (
6 | errInvalidDataSizeMsg = "sno: unrecognized data size"
7 | errInvalidTypeFmt = "sno: unrecognized data type: %T"
8 | errInvalidSequenceBoundsFmt = "sno: %s; min: %d, sequence: %d, max: %d, pool: %d"
9 | errSequenceBoundsIdenticalMsg = "sno: sequence bounds are identical - need a sequence pool with a capacity of at least 4"
10 | errSequenceUnderflowsBound = "sno: current sequence underflows the given lower bound"
11 | errSequencePoolTooSmallMsg = "sno: generators require a sequence pool with a capacity of at least 4"
12 | errPartitionPoolExhaustedMsg = "sno: process exceeded maximum number of possible defaults-configured generators"
13 | )
14 |
15 | // InvalidDataSizeError gets returned when attempting to unmarshal or decode an ID from data that
16 | // is not nil and not of a size of: SizeBinary, SizeEncoded nor 0.
17 | type InvalidDataSizeError struct {
18 | Size int
19 | }
20 |
21 | func (e *InvalidDataSizeError) Error() string { return errInvalidDataSizeMsg }
22 |
23 | // InvalidTypeError gets returned when attempting to scan a value that is neither...
24 | // - a string
25 | // - a byte slice
26 | // - nil
27 | // ... into an ID via ID.Scan().
28 | type InvalidTypeError struct {
29 | Value interface{}
30 | }
31 |
32 | func (e *InvalidTypeError) Error() string {
33 | return fmt.Sprintf(errInvalidTypeFmt, e.Value)
34 | }
35 |
36 | // InvalidSequenceBoundsError gets returned when a Generator gets seeded with sequence boundaries
37 | // which are invalid, e.g. the pool is too small or the current sequence overflows the bounds.
38 | type InvalidSequenceBoundsError struct {
39 | Cur uint32
40 | Min uint16
41 | Max uint16
42 | Msg string
43 | }
44 |
45 | func (e *InvalidSequenceBoundsError) Error() string {
46 | return fmt.Sprintf(errInvalidSequenceBoundsFmt, e.Msg, e.Min, e.Cur, e.Max, e.Max-e.Min+1)
47 | }
48 |
49 | // PartitionPoolExhaustedError gets returned when attempting to create more than MaxPartition (65535)
50 | // Generators using the default configuration (eg. without snapshots).
51 | //
52 | // Should you ever run into this, please consult the docs on the genPartition() internal function.
53 | type PartitionPoolExhaustedError struct{}
54 |
55 | func (e *PartitionPoolExhaustedError) Error() string { return errPartitionPoolExhaustedMsg }
56 |
--------------------------------------------------------------------------------
/global.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import (
4 | "sort"
5 | "time"
6 | "unsafe"
7 |
8 | "github.com/muyo/sno/internal"
9 | )
10 |
11 | var (
12 | generator *Generator
13 | zero ID
14 | )
15 |
16 | func init() {
17 | doInit()
18 | }
19 |
20 | func doInit() {
21 | g, err := NewGenerator(nil, nil)
22 | if err != nil {
23 | panic(err)
24 | }
25 |
26 | generator = g
27 | }
28 |
29 | // New uses the package-level generator to generate a new ID using the current system
30 | // time for its timestamp.
31 | func New(meta byte) ID {
32 | return generator.New(meta)
33 | }
34 |
35 | // NewWithTime uses the package-level generator to generate a new ID using the given time
36 | // for the timestamp.
37 | //
38 | // IDs generated using this method are subject to several caveats.
39 | // See generator.NewWithTime() for their documentation.
40 | func NewWithTime(meta byte, t time.Time) ID {
41 | return generator.NewWithTime(meta, t)
42 | }
43 |
44 | // FromBinaryBytes takes a byte slice and copies its contents into an ID, returning the bytes as an ID.
45 | //
46 | // The slice must have a length of 10. Returns a InvalidDataSizeError if it does not.
47 | func FromBinaryBytes(src []byte) (id ID, err error) {
48 | return id, id.UnmarshalBinary(src)
49 | }
50 |
51 | // FromEncodedBytes decodes a canonically base32-encoded byte slice representation of an ID
52 | // into its binary representation and returns it.
53 | //
54 | // The slice must have a length of 16. Returns a InvalidDataSizeError if it does not.
55 | func FromEncodedBytes(src []byte) (id ID, err error) {
56 | return id, id.UnmarshalText(src)
57 | }
58 |
59 | // FromEncodedString decodes a canonically base32-encoded string representation of an ID
60 | // into its binary representation and returns it.
61 | //
62 | // The string must have a length of 16. Returns a InvalidDataSizeError if it does not.
63 | func FromEncodedString(src string) (id ID, err error) {
64 | if len(src) != SizeEncoded {
65 | return zero, &InvalidDataSizeError{Size: len(src)}
66 | }
67 |
68 | // We only read in the data pointer (and input is read-only), so this does the job.
69 | return internal.Decode(*(*[]byte)(unsafe.Pointer(&src))), nil
70 | }
71 |
72 | type collection []ID
73 |
74 | func (ids collection) Len() int { return len(ids) }
75 | func (ids collection) Less(i, j int) bool { return ids[i].Compare(ids[j]) < 0 }
76 | func (ids collection) Swap(i, j int) { ids[i], ids[j] = ids[j], ids[i] }
77 |
78 | // Sort performs an in-place lexicographic sort of a slice of sno IDs.
79 | func Sort(s []ID) {
80 | sort.Sort(collection(s))
81 | }
82 |
83 | // Zero returns the zero value of an ID, which is 10 zero bytes and equivalent to:
84 | //
85 | // id := sno.ID{}
86 | // ... e.g. ...
87 | // id := sno.ID{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
88 | func Zero() ID {
89 | return zero
90 | }
91 |
--------------------------------------------------------------------------------
/partition_test.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import (
4 | "sync/atomic"
5 | "testing"
6 | )
7 |
8 | func TestPartition_Public_Conversions(t *testing.T) {
9 | t.Run("AsUint16", func(t *testing.T) {
10 | src := Partition{255, 255}
11 | expected := uint16(MaxPartition)
12 | actual := src.AsUint16()
13 |
14 | if actual != expected {
15 | t.Errorf("expected [%d], got [%d]", expected, actual)
16 | }
17 | })
18 |
19 | t.Run("PutUint16", func(t *testing.T) {
20 | expected := Partition{255, 255}
21 | actual := Partition{}
22 | actual.PutUint16(MaxPartition)
23 |
24 | if actual != expected {
25 | t.Errorf("expected [%s], got [%s]", expected, actual)
26 | }
27 | })
28 | }
29 |
30 | func TestPartition_Internal_Conversions(t *testing.T) {
31 | public := Partition{255, 255}
32 | internal := uint32(MaxPartition) << 16
33 |
34 | t.Run("to-internal", func(t *testing.T) {
35 | expected := internal
36 | actual := partitionToInternalRepr(public)
37 |
38 | if actual != expected {
39 | t.Errorf("expected [%d], got [%d]", expected, actual)
40 | }
41 | })
42 |
43 | t.Run("to-public", func(t *testing.T) {
44 | expected := public
45 | actual := partitionToPublicRepr(internal)
46 |
47 | if actual != expected {
48 | t.Errorf("expected [%d], got [%d]", expected, actual)
49 | }
50 | })
51 | }
52 |
53 | func TestPartition_Internal_Generation(t *testing.T) {
54 | t.Run("monotonic-increments", func(t *testing.T) {
55 | // Reset global count (leaving seed as is).
56 | atomic.StoreUint32(&partitions, 0)
57 |
58 | var prevPartition = uint32(seed) << 16
59 |
60 | for i := 0; i < 100; i++ {
61 | p, err := genPartition()
62 | if err != nil {
63 | t.Fatal(err)
64 | }
65 |
66 | // Note: genPartition() shifts to make space for the sequence,
67 | // so we can't simply check for an increment of 1 within the resulting
68 | // uint32. The below is a tiny bit faster than converting back
69 | // to an uint16.
70 | if p-prevPartition != 1<<16 {
71 | t.Errorf("expected [%d], got [%d]", prevPartition+1<<16, p)
72 | break
73 | }
74 |
75 | prevPartition = p
76 | }
77 | })
78 |
79 | t.Run("pool-exhaustion", func(t *testing.T) {
80 | // Reset global count (leaving seed as is).
81 | atomic.StoreUint32(&partitions, 0)
82 |
83 | for i := 0; i < 2*MaxPartition; i++ {
84 | _, err := genPartition()
85 |
86 | if err != nil {
87 | verr, ok := err.(*PartitionPoolExhaustedError)
88 | if !ok {
89 | t.Fatalf("expected error type [%T], got [%T]", &PartitionPoolExhaustedError{}, err)
90 | return
91 | }
92 |
93 | if i < MaxPartition {
94 | t.Fatalf("expected errors no sooner than after [%d] iterations, got to [%d]", MaxPartition, i)
95 | return
96 | }
97 |
98 | errMsgActual := verr.Error()
99 | errMsgExpected := errPartitionPoolExhaustedMsg
100 |
101 | if errMsgActual != errMsgExpected {
102 | t.Fatalf("expected error msg [%s], got [%s]", errMsgExpected, errMsgActual)
103 | }
104 | }
105 |
106 | if i >= MaxPartition {
107 | if err == nil {
108 | t.Fatalf("expected constant errors after [%d] iterations, got no error at [%d]", MaxPartition, i)
109 | return
110 | }
111 | }
112 | }
113 | })
114 |
115 | // Clean up.
116 | atomic.StoreUint32(&partitions, 0)
117 | }
118 |
--------------------------------------------------------------------------------
/internal/encoding.go:
--------------------------------------------------------------------------------
1 | // +build !amd64
2 |
3 | package internal
4 |
5 | const (
6 | // The encoding is a custom base32 variant stemming from base32hex.
7 | // The alphabet is 2 contiguous ASCII ranges: `50..57` (digits) and `97..120` (lowercase letters).
8 | // A canonically encoded ID can be validated with a regexp of `[2-9a-x]{16}`.
9 | enc = "23456789abcdefghijklmnopqrstuvwx"
10 | )
11 |
12 | var (
13 | // Decoding LUT.
14 | dec = [256]byte{
15 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
16 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
17 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
18 | 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
19 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
20 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
21 | 0xFF, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
22 | 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
23 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
25 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
26 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
27 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
28 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
29 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
30 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
31 | }
32 |
33 | // Dummy flag to be set by the respective build (used by tests).
34 | hasVectorSupport bool
35 | )
36 |
37 | // Encode returns the sno32-encoded representation of src as an array of 16 bytes.
38 | func Encode(src *[10]byte) (dst [16]byte) {
39 | dst[15] = enc[src[9]&0x1F]
40 | dst[14] = enc[(src[9]>>5|src[8]<<3)&0x1F]
41 | dst[13] = enc[src[8]>>2&0x1F]
42 | dst[12] = enc[(src[8]>>7|src[7]<<1)&0x1F]
43 | dst[11] = enc[(src[7]>>4|src[6]<<4)&0x1F]
44 | dst[10] = enc[src[6]>>1&0x1F]
45 | dst[9] = enc[(src[6]>>6|src[5]<<2)&0x1F]
46 | dst[8] = enc[src[5]>>3]
47 |
48 | dst[7] = enc[src[4]&0x1F]
49 | dst[6] = enc[(src[4]>>5|src[3]<<3)&0x1F]
50 | dst[5] = enc[src[3]>>2&0x1F]
51 | dst[4] = enc[(src[3]>>7|src[2]<<1)&0x1F]
52 | dst[3] = enc[(src[2]>>4|src[1]<<4)&0x1F]
53 | dst[2] = enc[src[1]>>1&0x1F]
54 | dst[1] = enc[(src[1]>>6|src[0]<<2)&0x1F]
55 | dst[0] = enc[src[0]>>3]
56 |
57 | return
58 | }
59 |
60 | // Decode returns the binary representation of a sno32-encoded src as an array of bytes.
61 | //
62 | // Src does not get validated and must have a length of 16 - otherwise Decode will panic.
63 | func Decode(src []byte) (dst [10]byte) {
64 | _ = src[15] // BCE hint.
65 |
66 | dst[9] = dec[src[14]]<<5 | dec[src[15]]
67 | dst[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3
68 | dst[7] = dec[src[11]]<<4 | dec[src[12]]>>1
69 | dst[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4
70 | dst[5] = dec[src[8]]<<3 | dec[src[9]]>>2
71 |
72 | dst[4] = dec[src[6]]<<5 | dec[src[7]]
73 | dst[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3
74 | dst[2] = dec[src[3]]<<4 | dec[src[4]]>>1
75 | dst[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4
76 | dst[0] = dec[src[0]]<<3 | dec[src[1]]>>2
77 |
78 | return
79 | }
80 |
--------------------------------------------------------------------------------
/partition.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import "sync/atomic"
4 |
5 | // Partition represents the fixed identifier of a Generator.
6 | //
7 | // If you'd rather define Partitions as integers instead of as byte arrays, then:
8 | // var p sno.Partition
9 | // p.PutUint16(65535)
10 | type Partition [2]byte
11 |
12 | // AsUint16 returns the Partition as a uint16.
13 | func (p Partition) AsUint16() uint16 {
14 | return uint16(p[0])<<8 | uint16(p[1])
15 | }
16 |
17 | // PutUint16 sets Partition to the given uint16 in big-endian order.
18 | func (p *Partition) PutUint16(u uint16) {
19 | p[0] = byte(u >> 8)
20 | p[1] = byte(u)
21 | }
22 |
23 | // genPartition generates a Partition in its internal representation from a time based seed.
24 | //
25 | // While this alone would be enough if we only used this once (for the global generator),
26 | // generators created with the default configuration also use generated partitions - a case
27 | // for which we want to avoid collisions, at the very least within our process.
28 | //
29 | // Considering we only have a tiny period of 2**16 available, and that predictability of
30 | // the partitions is a non-factor, using even a 16-bit Xorshift PRNG would be overkill.
31 | //
32 | // If we used a PRNG without adjustment, we'd have the following pitfalls:
33 | // - we'd need to maintain its state and synchronize access to it. As it can't run atomically,
34 | // this would require maintaining a global lock separately;
35 | // - our space is limited to barely 65535 partitions, making collisions quite likely
36 | // and we have no way of determining them without maintaining yet additional state,
37 | // at the very least as a bit set (potentially growing to 8192 bytes for the entire
38 | // space). It'd also need to be synchronized. With collisions becoming more and
39 | // and more likely as we hand out partitions, we'd need a means of determining free
40 | // partitions in the set to be efficient.
41 | //
42 | // And others. At which point the complexity becomes unreasonable for what we're aiming
43 | // to do, so instead of all of that, we go back to the fact that predictability is a non-factor
44 | // and our goal being only the prevention of collisions, we simply start off with
45 | // a time based seed... which we then atomically increment.
46 | //
47 | // This way access is safely synchronized and we're guaranteed to get 65535 partitions
48 | // without collisions in-process with just a tiny bit of code in comparison.
49 | //
50 | // Should we ever exceed that number, we however panic. If your usage pattern is weird enough
51 | // to hit this edge case, please consider managing the partition space yourself and starting
52 | // the Generators using configuration snapshots, instead.
53 | //
54 | // Note: This being entirely predictable has the upside that the order of creation and the count
55 | // of in-process generators created without snapshots can be simply inferred by comparing their
56 | // partitions (including comparing to the global generator, which starts at 0 - i.e. at the seed).
57 | func genPartition() (uint32, error) {
58 | n := atomic.AddUint32(&partitions, 1)
59 |
60 | if n > MaxPartition {
61 | return 0, &PartitionPoolExhaustedError{}
62 | }
63 |
64 | // Convert to our internal representation leaving 2 bytes empty
65 | // for the sequence to simply get ORed at runtime.
66 | return uint32(seed+uint16(n)) << 16, nil
67 | }
68 |
69 | var (
70 | // Counter starts at -1 since genPartition() will increase it on each call, including
71 | // the first. This means the global generator gets an N of 0 and always has a Partition = seed.
72 | partitions = ^uint32(0)
73 | seed = func() uint16 {
74 | t := snotime()
75 |
76 | return uint16((t >> 32) ^ t)
77 | }()
78 | )
79 |
80 | func partitionToInternalRepr(p Partition) uint32 {
81 | return uint32(p[0])<<24 | uint32(p[1])<<16
82 | }
83 |
84 | func partitionToPublicRepr(p uint32) Partition {
85 | return Partition{byte(p >> 24), byte(p >> 16)}
86 | }
87 |
--------------------------------------------------------------------------------
/internal/cpu_amd64_test.go:
--------------------------------------------------------------------------------
1 | package internal
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func testCPU(t *testing.T) {
8 | t.Run("real", testCPUReal)
9 | t.Run("mocked", testCPUMocked)
10 | }
11 |
12 | // First tests are run against the real hardware and actual cpuid instruction.
13 | // While we can't reliably assume the availability of the instruction sets,
14 | // at the very least we may catch anomalies when the highest function parameter
15 | // returned is not sane - or when SSE2 instructions are not available where we
16 | // assume they should be.
17 | func testCPUReal(t *testing.T) {
18 | t.Run("highest-function-parameter-valid", testCPURealMFIValid)
19 | t.Run("has-base-set", testCPURealHasBaseSet)
20 | t.Run("has-vector-support-attempt", testCPURealHasVectorSupportAttempt)
21 | }
22 |
23 | func testCPURealMFIValid(t *testing.T) {
24 | eax, _, _, _ := cpuid(0)
25 | if eax < 1 {
26 | t.Errorf("expected a non-zero highest function parameter, got [%d]", eax)
27 | }
28 | }
29 |
30 | func testCPURealHasBaseSet(t *testing.T) {
31 | _, _, _, edx := cpuid(1)
32 | if (edx & (1 << 26)) == 0 {
33 | t.Error("expected the SSE2 instruction set to be available, does not appear to be")
34 | }
35 | }
36 |
37 | func testCPURealHasVectorSupportAttempt(t *testing.T) {
38 | defer func() {
39 | catch(t, recover(), "")
40 | }()
41 |
42 | // Note: We don't care about the result as we can't assume to get a 'true'.
43 | // We only care for this to not panic.
44 | checkVectorSupport()
45 | }
46 |
47 | // Note: Those tests must not run in parallel to any tests that rely
48 | // on real hardware and the actual cpuid implementation (vide enc/dec),
49 | // as the cpuid function gets swapped out for mocks.
50 | func testCPUMocked(t *testing.T) {
51 | cpuid = cpu.id
52 |
53 | t.Run("highest-function-parameter-invalid", testCPUHasVectorSupportMFIInvalid)
54 | t.Run("highest-function-parameter-too-low", testCPUHasVectorSupportMFILow)
55 | t.Run("lacks-base-set", testCPUHasVectorSupportLacksBaseSet)
56 | t.Run("lacks-extended-sets", testCPUHasVectorSupportLacksExtendedSets)
57 | t.Run("passes", testCPUHasVectorPasses)
58 |
59 | // Restore real implementation.
60 | cpuid = cpuidReal
61 | }
62 |
63 | func testCPUHasVectorSupportMFIInvalid(t *testing.T) {
64 | defer func() {
65 | catch(t, recover(), cpuLacksSSE2ErrMsg)
66 | }()
67 |
68 | cpu.reset()
69 | cpu.eax = 0
70 | expectVectorSupport(t, false)
71 | }
72 |
73 | func testCPUHasVectorSupportMFILow(t *testing.T) {
74 | defer func() {
75 | catch(t, recover(), "")
76 | }()
77 |
78 | cpu.reset()
79 | cpu.eax = 6
80 | expectVectorSupport(t, false)
81 | }
82 |
83 | func testCPUHasVectorSupportLacksBaseSet(t *testing.T) {
84 | defer func() {
85 | catch(t, recover(), cpuLacksSSE2ErrMsg)
86 | }()
87 |
88 | cpu.reset()
89 | cpu.edx ^= 1 << 26 // SSE2 is featured as 1 << 26, so we simply set everything *but*.
90 | expectVectorSupport(t, false)
91 | }
92 |
93 | func testCPUHasVectorSupportLacksExtendedSets(t *testing.T) {
94 | defer func() {
95 | catch(t, recover(), "")
96 | }()
97 |
98 | for _, c := range []struct {
99 | name string
100 | ebx uint32
101 | ecx uint32
102 | }{
103 | {"SSE3", 0, ^uint32(0x00000001)},
104 | {"SSSE3", 0, ^uint32(0x00000200)},
105 | {"SSE4", 0, ^uint32(0x00080000)},
106 | {"SSE4.2", 0, ^uint32(0x00100000)},
107 | {"BMI1", ^uint32(0x00000008), 0},
108 | {"BMI2", ^uint32(0x00000100), 0},
109 | } {
110 | t.Run(c.name, func(t *testing.T) {
111 | cpu.reset()
112 | if c.ebx != 0 {
113 | cpu.ebx = c.ebx
114 | }
115 |
116 | if c.ecx != 0 {
117 | cpu.ecx = c.ecx
118 | }
119 |
120 | expectVectorSupport(t, false)
121 | })
122 | }
123 | }
124 |
125 | func testCPUHasVectorPasses(t *testing.T) {
126 | defer func() {
127 | catch(t, recover(), "")
128 | }()
129 |
130 | cpu.reset()
131 | expectVectorSupport(t, true)
132 | }
133 |
134 | var cpu = func() *cpuMock {
135 | c := &cpuMock{}
136 | c.reset()
137 |
138 | return c
139 | }()
140 |
141 | type cpuMock struct {
142 | eax, ebx, ecx, edx uint32
143 | }
144 |
145 | func (c *cpuMock) reset() {
146 | c.eax = 7
147 | c.ebx = 0x00000108
148 | c.ecx = 0x00180201
149 | c.edx = 1 << 26
150 | }
151 |
152 | func (c *cpuMock) id(_ uint32) (eax, ebx, ecx, edx uint32) {
153 | return c.eax, c.ebx, c.ecx, c.edx
154 | }
155 |
156 | func catch(t *testing.T, err interface{}, expected string) {
157 | if expected != "" {
158 | if err == nil {
159 | t.Fatalf("expected a panic with message [%s]", expected)
160 | }
161 |
162 | if err != expected {
163 | t.Errorf("expected a panic with message [%s], got [%s]", expected, err)
164 | }
165 |
166 | return
167 | }
168 |
169 | if err != nil {
170 | t.Fatalf("expected to not panic, panicked with [%s]", err)
171 | }
172 | }
173 |
174 | func expectVectorSupport(t *testing.T, expected bool) {
175 | if actual := checkVectorSupport(); actual != expected {
176 | t.Errorf("expected [%t], got [%t]", expected, actual)
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/benchmark/encoding.go:
--------------------------------------------------------------------------------
1 | package benchmark
2 |
3 | import (
4 | "crypto/rand"
5 | "testing"
6 | "time"
7 |
8 | "github.com/bwmarrin/snowflake"
9 | "github.com/celrenheit/sandflake"
10 | "github.com/gofrs/uuid"
11 | "github.com/muyo/sno"
12 | "github.com/oklog/ulid"
13 | "github.com/rs/xid"
14 | "github.com/segmentio/ksuid"
15 | )
16 |
17 | func benchmarkEncoding(b *testing.B) {
18 | println("\n-- Encoding ----------------------------------------------------------------------------------\n")
19 | b.Run("enc", benchmarkEncode)
20 | println("\n-- Decoding ----------------------------------------------------------------------------------\n")
21 | b.Run("dec", benchmarkDecode)
22 | }
23 |
24 | func benchmarkEncode(b *testing.B) {
25 | b.Run("sno", benchmarkEncodeSno)
26 | b.Run("xid", benchmarkEncodeXid)
27 | b.Run("snowflake", benchmarkEncodeSnowflake)
28 | b.Run("sandflake", benchmarkEncodeSandflake)
29 | b.Run("uuid", benchmarkEncodeUUID)
30 | b.Run("ulid", benchmarkEncodeULID)
31 | b.Run("ksuid", benchmarkEncodeKSUID)
32 | }
33 |
34 | func benchmarkDecode(b *testing.B) {
35 | b.Run("sno", benchmarkDecodeSno)
36 | b.Run("xid", benchmarkDecodeXid)
37 | b.Run("snowflake", benchmarkDecodeSnowflake)
38 | b.Run("sandflake", benchmarkDecodeSandflake)
39 | b.Run("uuid", benchmarkDecodeUUID)
40 | b.Run("ulid", benchmarkDecodeULID)
41 | b.Run("ksuid", benchmarkDecodeKSUID)
42 | }
43 |
44 | func benchmarkEncodeSno(b *testing.B) {
45 | id := sno.New(255)
46 | b.ResetTimer()
47 |
48 | b.RunParallel(func(pb *testing.PB) {
49 | for pb.Next() {
50 | _ = id.String()
51 | }
52 | })
53 | }
54 |
55 | func benchmarkEncodeXid(b *testing.B) {
56 | id := xid.New()
57 | b.ResetTimer()
58 |
59 | b.RunParallel(func(pb *testing.PB) {
60 | for pb.Next() {
61 | _ = id.String()
62 | }
63 | })
64 | }
65 |
66 | func benchmarkEncodeSnowflake(b *testing.B) {
67 | n, _ := snowflake.NewNode(255)
68 | id := n.Generate()
69 | b.ResetTimer()
70 |
71 | b.RunParallel(func(pb *testing.PB) {
72 | for pb.Next() {
73 | _ = id.String()
74 | }
75 | })
76 | }
77 |
78 | func benchmarkEncodeSandflake(b *testing.B) {
79 | var g sandflake.Generator
80 | id := g.Next()
81 | b.ResetTimer()
82 |
83 | b.RunParallel(func(pb *testing.PB) {
84 | for pb.Next() {
85 | _ = id.String()
86 | }
87 | })
88 | }
89 |
90 | func benchmarkEncodeUUID(b *testing.B) {
91 | b.Run("v1", benchmarkEncodeUUIDv1)
92 | b.Run("v4", benchmarkEncodeUUIDv4)
93 | }
94 |
95 | func benchmarkEncodeUUIDv1(b *testing.B) {
96 | id, _ := uuid.NewV1()
97 | b.ResetTimer()
98 |
99 | b.RunParallel(func(pb *testing.PB) {
100 | for pb.Next() {
101 | _ = id.String()
102 | }
103 | })
104 | }
105 |
106 | func benchmarkEncodeUUIDv4(b *testing.B) {
107 | id, _ := uuid.NewV4()
108 | b.ResetTimer()
109 |
110 | b.RunParallel(func(pb *testing.PB) {
111 | for pb.Next() {
112 | _ = id.String()
113 | }
114 | })
115 | }
116 |
117 | func benchmarkEncodeULID(b *testing.B) {
118 | id, _ := ulid.New(ulid.Timestamp(time.Now()), rand.Reader)
119 | b.ResetTimer()
120 |
121 | b.RunParallel(func(pb *testing.PB) {
122 | for pb.Next() {
123 | _ = id.String()
124 | }
125 | })
126 | }
127 |
128 | func benchmarkEncodeKSUID(b *testing.B) {
129 | id, _ := ksuid.NewRandom()
130 | b.ResetTimer()
131 |
132 | b.RunParallel(func(pb *testing.PB) {
133 | for pb.Next() {
134 | _ = id.String()
135 | }
136 | })
137 | }
138 |
139 | func benchmarkDecodeSno(b *testing.B) {
140 | id := sno.New(255).String()
141 | b.ResetTimer()
142 |
143 | b.RunParallel(func(pb *testing.PB) {
144 | for pb.Next() {
145 | _, _ = sno.FromEncodedString(id)
146 | }
147 | })
148 | }
149 |
150 | func benchmarkDecodeXid(b *testing.B) {
151 | id := xid.New().String()
152 | b.ResetTimer()
153 |
154 | b.RunParallel(func(pb *testing.PB) {
155 | for pb.Next() {
156 | _, _ = xid.FromString(id)
157 | }
158 | })
159 | }
160 |
161 | func benchmarkDecodeSnowflake(b *testing.B) {
162 | n, _ := snowflake.NewNode(255)
163 | id := n.Generate().String()
164 | b.ResetTimer()
165 |
166 | b.RunParallel(func(pb *testing.PB) {
167 | for pb.Next() {
168 | _, _ = snowflake.ParseString(id)
169 | }
170 | })
171 | }
172 |
173 | func benchmarkDecodeSandflake(b *testing.B) {
174 | var g sandflake.Generator
175 | id := g.Next().String()
176 | b.ResetTimer()
177 |
178 | b.RunParallel(func(pb *testing.PB) {
179 | for pb.Next() {
180 | _, _ = sandflake.Parse(id)
181 | }
182 | })
183 | }
184 |
185 | func benchmarkDecodeUUID(b *testing.B) {
186 | b.Run("v1", benchmarkDecodeUUIDv1)
187 | b.Run("v4", benchmarkDecodeUUIDv4)
188 | }
189 |
190 | func benchmarkDecodeUUIDv1(b *testing.B) {
191 | id, _ := uuid.NewV1()
192 | s := id.String()
193 | b.ResetTimer()
194 |
195 | b.RunParallel(func(pb *testing.PB) {
196 | for pb.Next() {
197 | _, _ = uuid.FromString(s)
198 | }
199 | })
200 | }
201 |
202 | func benchmarkDecodeUUIDv4(b *testing.B) {
203 | id, _ := uuid.NewV4()
204 | s := id.String()
205 | b.ResetTimer()
206 |
207 | b.RunParallel(func(pb *testing.PB) {
208 | for pb.Next() {
209 | _, _ = uuid.FromString(s)
210 | }
211 | })
212 | }
213 |
214 | func benchmarkDecodeULID(b *testing.B) {
215 | id, _ := ulid.New(ulid.Timestamp(time.Now()), rand.Reader)
216 | s := id.String()
217 | b.ResetTimer()
218 |
219 | b.RunParallel(func(pb *testing.PB) {
220 | for pb.Next() {
221 | _, _ = ulid.Parse(s)
222 | }
223 | })
224 | }
225 |
226 | func benchmarkDecodeKSUID(b *testing.B) {
227 | id, _ := ksuid.NewRandom()
228 | s := id.String()
229 | b.ResetTimer()
230 |
231 | b.RunParallel(func(pb *testing.PB) {
232 | for pb.Next() {
233 | _, _ = ksuid.Parse(s)
234 | }
235 | })
236 | }
237 |
--------------------------------------------------------------------------------
/internal/encoding_amd64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "funcdata.h"
3 |
4 | DATA shuffleVec<>+0(SB)/8, $0x0001020304050607
5 | DATA shuffleVec<>+8(SB)/8, $0x08090A0B0C0D0E0F
6 | GLOBL shuffleVec<>(SB), (NOPTR+RODATA), $16
7 |
8 | DATA offsetCharset<>+0(SB)/8, $0x3232323232323232 // 50
9 | DATA offsetCharset<>+8(SB)/8, $0x3232323232323232
10 | GLOBL offsetCharset<>(SB), (NOPTR+RODATA), $16
11 |
12 | DATA selectLetters<>+0(SB)/8, $0x0707070707070707
13 | DATA selectLetters<>+8(SB)/8, $0x0707070707070707
14 | GLOBL selectLetters<>(SB), (NOPTR+RODATA), $16
15 |
16 | DATA subLetters<>+0(SB)/8, $0xD8D8D8D8D8D8D8D8 // 216
17 | DATA subLetters<>+8(SB)/8, $0xD8D8D8D8D8D8D8D8
18 | GLOBL subLetters<>(SB), (NOPTR+RODATA), $16
19 |
20 | DATA interleave<>+0(SB)/8, $0x1f1f1f1f1f1f1f1f
21 | DATA interleave<>+8(SB)/8, $0x1f1f1f1f1f1f1f1f
22 | GLOBL interleave<>(SB), (NOPTR+RODATA), $16
23 |
24 | // func Encode(src *[10]byte) (dst [16]byte)
25 | TEXT ·Encode(SB), NOSPLIT, $0-24
26 | MOVQ src+0(FP), BX
27 |
28 | MOVQ 0(BX), AX
29 | BSWAPQ AX
30 | SHRQ $24, AX
31 |
32 | MOVQ 5(BX), BX
33 | BSWAPQ BX
34 | SHRQ $24, BX
35 |
36 | CMPB ·hasVectorSupport(SB), $1
37 | JEQ encodeVec
38 |
39 | LEAQ dst+8(FP), DX
40 |
41 | MOVB AX, 7(DX)
42 | SHRQ $5, AX
43 | MOVB AX, 6(DX)
44 | SHRQ $5, AX
45 | MOVB AX, 5(DX)
46 | SHRQ $5, AX
47 | MOVB AX, 4(DX)
48 | SHRQ $5, AX
49 | MOVB AX, 3(DX)
50 | SHRQ $5, AX
51 | MOVB AX, 2(DX)
52 | SHRQ $5, AX
53 | MOVB AX, 1(DX)
54 | SHRQ $5, AX
55 | MOVB AX, 0(DX)
56 |
57 | MOVB BX, 15(DX)
58 | SHRQ $5, BX
59 | MOVB BX, 14(DX)
60 | SHRQ $5, BX
61 | MOVB BX, 13(DX)
62 | SHRQ $5, BX
63 | MOVB BX, 12(DX)
64 | SHRQ $5, BX
65 | MOVB BX, 11(DX)
66 | SHRQ $5, BX
67 | MOVB BX, 10(DX)
68 | SHRQ $5, BX
69 | MOVB BX, 9(DX)
70 | SHRQ $5, BX
71 | MOVB BX, 8(DX)
72 |
73 | MOVOU (DX), X0
74 | PAND interleave<>+0(SB), X0
75 |
76 | JMP encodeFinish
77 |
78 | encodeVec:
79 | PDEPQ interleave<>+0(SB), AX, AX
80 | PDEPQ interleave<>+0(SB), BX, BX
81 |
82 | MOVQ AX, X0
83 | PINSRQ $1, BX, X0
84 | PSHUFB shuffleVec<>+0(SB), X0
85 |
86 | encodeFinish:
87 | MOVOA X0, X1
88 | PADDB offsetCharset<>+0(SB), X0 // Add 50, where 50 is the beginning of our alphabet (ASCII '2')
89 | // That takes care of all digits. We need to offset letters, though,
90 | // as they start at char('a'), which is 97 in dec.
91 | PCMPGTB selectLetters<>+0(SB), X1 // PCMPGTB will set all bytes with letters to 255.
92 | PSUBUSB subLetters<>+0(SB), X1 // We need to add 39 to each letter in X0 to move them into the right range.
93 | // Note: Not 47 (50 + 47 = 97), as our letters are in the [8..31] range.
94 | // And so we simply do a (unsigned) subtraction of 216 and as a result
95 | // get a mask of 39 (the offset) in dec where all the letters are.
96 | PADDB X1, X0 // Add them together and done.
97 |
98 | MOVOU X0, dst+8(FP)
99 |
100 | RET
101 |
102 |
103 | //func Decode(src []byte) (dst [10]byte)
104 | TEXT ·Decode(SB), NOSPLIT, $0-34
105 | // The entirety of this function is simply the inverse of encode.
106 | MOVQ src+0(FP), BX
107 | LEAQ dst+24(FP), DX
108 | MOVOU (BX), X0
109 |
110 | PSUBB offsetCharset<>+0(SB), X0
111 | MOVOA X0, X1
112 |
113 | PCMPGTB selectLetters<>+0(SB), X1
114 | PSUBUSB subLetters<>+0(SB), X1
115 | PSUBB X1, X0
116 |
117 | CMPB ·hasVectorSupport(SB), $0
118 | JEQ decodeFallback
119 |
120 | PSHUFB shuffleVec<>+0(SB), X0
121 |
122 | MOVQ X0, R8
123 | PEXTRQ $1, X0, R9
124 |
125 | PEXTQ interleave<>+0(SB), R8, R8
126 | BSWAPQ R8
127 | SHRQ $24, R8
128 |
129 | PEXTQ interleave<>+0(SB), R9, R9
130 | BSWAPQ R9
131 | SHRQ $24, R9
132 |
133 | MOVQ R8, 0(DX)
134 | MOVQ R9, 5(DX)
135 |
136 | RET
137 |
138 | decodeFallback:
139 | // TODO(alcore) Subject to an optimization pass.
140 | MOVQ X0, R8
141 | PSRLO $8, X0
142 | MOVQ X0, R9
143 |
144 | // Timestamp block - 0
145 | MOVB R8, BX
146 | SHLB $3, BX
147 |
148 | SHRQ $8, R8 // 1
149 | MOVB R8, AX
150 | SHRB $2, AX
151 | ORB AX, BX
152 |
153 | MOVB BX, 0(DX)
154 |
155 | MOVB R8, BX
156 | SHLB $6, BX
157 |
158 | SHRQ $8, R8 // 2
159 | MOVB R8, AX
160 | SHLB $1, AX
161 | ORB AX, BX
162 |
163 | SHRQ $8, R8 // 3
164 | MOVB R8, CX
165 | SHRB $4, CX
166 | ORB CX, BX
167 |
168 | MOVB BX, 1(DX)
169 |
170 | MOVB R8, BX
171 | SHLB $4, BX
172 |
173 | SHRQ $8, R8 // 4
174 | MOVB R8, AX
175 | SHRB $1, AX
176 | ORB AX, BX
177 |
178 | MOVB BX, 2(DX)
179 |
180 | MOVB R8, BX
181 | SHLB $7, BX
182 |
183 | SHRQ $8, R8 // 5
184 | MOVB R8, CX
185 | SHLB $2, CX
186 | ORB CX, BX
187 |
188 | SHRQ $8, R8 // 6
189 | MOVB R8, AX
190 | SHRB $3, AX
191 | ORB AX, BX
192 |
193 | MOVB BX, 3(DX)
194 |
195 | MOVB R8, BX
196 | SHLB $5, BX
197 |
198 | SHRQ $8, R8 // 7
199 | ORB R8, BX
200 |
201 | MOVB BX, 4(DX)
202 |
203 | // Payload block - 8
204 | MOVB R9, BX
205 | SHLB $3, BX
206 |
207 | SHRQ $8, R9 // 9
208 | MOVB R9, AX
209 | SHRB $2, AX
210 | ORB AX, BX
211 |
212 | MOVB BX, 5(DX)
213 |
214 | MOVB R9, BX
215 | SHLB $6, BX
216 |
217 | SHRQ $8, R9 // 10
218 | MOVB R9, AX
219 | SHLB $1, AX
220 | ORB AX, BX
221 |
222 | SHRQ $8, R9 // 11
223 | MOVB R9, CX
224 | SHRB $4, CX
225 | ORB CX, BX
226 |
227 | MOVB BX, 6(DX)
228 |
229 | MOVB R9, BX
230 | SHLB $4, BX
231 |
232 | SHRQ $8, R9 // 12
233 | MOVB R9, AX
234 | SHRB $1, AX
235 | ORB AX, BX
236 |
237 | MOVB BX, 7(DX)
238 |
239 | MOVB R9, BX
240 | SHLB $7, BX
241 |
242 | SHRQ $8, R9 // 13
243 | MOVB R9, CX
244 | SHLB $2, CX
245 | ORB CX, BX
246 |
247 | SHRQ $8, R9 // 14
248 | MOVB R9, AX
249 | SHRB $3, AX
250 | ORB AX, BX
251 |
252 | MOVB BX, 8(DX)
253 |
254 | MOVB R9, BX
255 | SHLB $5, BX
256 |
257 | SHRQ $8, R9 // 15
258 | ORB R9, BX
259 |
260 | MOVB BX, 9(DX)
261 |
262 | RET
263 |
--------------------------------------------------------------------------------
/global_test.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import (
4 | "reflect"
5 | "testing"
6 | )
7 |
8 | func TestGlobal_Init(t *testing.T) {
9 | t.Run("sane", func(t *testing.T) {
10 | defer func() {
11 | if err := recover(); err != nil {
12 | t.Fatal("expected init to not panic")
13 | }
14 | }()
15 |
16 | // Must never panic.
17 | doInit()
18 | })
19 |
20 | t.Run("panics", func(t *testing.T) {
21 | defer func() {
22 | err := recover()
23 | if err == nil {
24 | t.Fatal("expected init to panic")
25 | }
26 |
27 | if _, ok := err.(*PartitionPoolExhaustedError); !ok {
28 | t.Errorf("expected panic with type [%T], got [%T]", &PartitionPoolExhaustedError{}, err)
29 | return
30 | }
31 | }()
32 |
33 | // Theoretically impossible to happen but ensure that we cover all "potential" cases
34 | // where the global generator could fail to get constructed and we need to panic.
35 | //
36 | // At present only one branch even has an error return, so we simulate that... impossibility
37 | // by trying to create more Generators without snapshots than we have a Partition pool for.
38 | // Note that we are invoking doInit() instead of NewGenerator() directly.
39 | for i := 0; i < 2*MaxPartition; i++ {
40 | doInit()
41 | }
42 | })
43 | }
44 |
45 | func TestGlobal_FromEncodedString_Valid(t *testing.T) {
46 | src := "brpk4q72xwf2m63l"
47 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
48 |
49 | actual, err := FromEncodedString(src)
50 | if err != nil {
51 | t.Fatal(err)
52 | }
53 |
54 | if actual != expected {
55 | t.Errorf("expected [%v], got [%v]", expected, actual)
56 | }
57 | }
58 |
59 | func TestGlobal_FromEncodedString_Invalid(t *testing.T) {
60 | _, err := FromEncodedString("012brpk4q72xwf2m63l1245453gfdgxz")
61 |
62 | if _, ok := err.(*InvalidDataSizeError); !ok {
63 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err)
64 | }
65 |
66 | if err != nil && err.Error() != errInvalidDataSizeMsg {
67 | t.Errorf("expected error [%s], got [%s]", errInvalidDataSizeMsg, err.Error())
68 | }
69 | }
70 |
71 | func TestGlobal_FromEncodedBytes_Valid(t *testing.T) {
72 | src := []byte("brpk4q72xwf2m63l")
73 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
74 |
75 | actual, err := FromEncodedBytes(src)
76 | if err != nil {
77 | t.Fatal(err)
78 | }
79 |
80 | if actual != expected {
81 | t.Errorf("expected [%v], got [%v]", expected, actual)
82 | }
83 | }
84 |
85 | func TestGlobal_FromEncodedBytes_Invalid(t *testing.T) {
86 | _, err := FromEncodedBytes([]byte("012brpk4q72xwf2m63l1245453gfdgxz"))
87 |
88 | if _, ok := err.(*InvalidDataSizeError); !ok {
89 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err)
90 | }
91 |
92 | if err != nil && err.Error() != errInvalidDataSizeMsg {
93 | t.Errorf("expected error [%s], got [%s]", errInvalidDataSizeMsg, err.Error())
94 | }
95 | }
96 |
97 | func TestGlobal_FromBinaryBytes_Valid(t *testing.T) {
98 | src := []byte{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
99 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
100 |
101 | actual, err := FromBinaryBytes(src)
102 | if err != nil {
103 | t.Fatal(err)
104 | }
105 |
106 | if actual != expected {
107 | t.Errorf("expected [%v], got [%v]", expected, actual)
108 | }
109 | }
110 |
111 | func TestGlobal_FromBinaryBytes_Invariant(t *testing.T) {
112 | expected := New(255)
113 | actual, err := FromBinaryBytes(expected[:])
114 | if err != nil {
115 | t.Fatal(err)
116 | }
117 |
118 | if actual != expected {
119 | t.Errorf("expected [%v], got [%v]", expected, actual)
120 | }
121 | }
122 |
123 | func TestGlobal_FromBinaryBytes_Invalid(t *testing.T) {
124 | for _, c := range []struct {
125 | n int
126 | invalid bool
127 | }{
128 | {4, true},
129 | {8, true},
130 | {10, false},
131 | {12, true},
132 | {16, true},
133 | } {
134 | b := make([]byte, c.n)
135 | _, err := FromBinaryBytes(b)
136 |
137 | if actual, expected := err != nil, c.invalid; actual != expected {
138 | t.Errorf("expected error [%v], got [%v]", expected, actual)
139 | }
140 | }
141 | }
142 |
143 | func TestGlobal_Collection(t *testing.T) {
144 | var ids = []ID{{1}, {2}, {3}, {4}, {5}, {6}}
145 |
146 | t.Run("len", makeCollectionLenTest(ids))
147 | t.Run("less", makeCollectionLessTest(ids))
148 | t.Run("swap", makeCollectionSwapTest(ids))
149 | t.Run("sort", makeCollectionSortTest(ids))
150 | }
151 |
152 | func makeCollectionLenTest(ids []ID) func(t *testing.T) {
153 | n := len(ids)
154 | return func(t *testing.T) {
155 | if actual, expected := collection([]ID{}).Len(), 0; actual != expected {
156 | t.Errorf("Len() %v, want %v", expected, actual)
157 | }
158 |
159 | if actual, expected := collection(ids).Len(), n; actual != expected {
160 | t.Errorf("expected [%v], got [%v]", expected, actual)
161 | }
162 | }
163 | }
164 |
165 | func makeCollectionLessTest(ids []ID) func(t *testing.T) {
166 | return func(t *testing.T) {
167 | c := collection(ids)
168 | if c.Less(0, 0) {
169 | t.Errorf("expected [false], got [true]")
170 | }
171 |
172 | if !c.Less(0, 1) {
173 | t.Errorf("expected [true], got [false]")
174 | }
175 |
176 | if !c.Less(1, 2) {
177 | t.Errorf("expected [true], got [false]")
178 | }
179 | }
180 | }
181 |
182 | func makeCollectionSwapTest(ids []ID) func(t *testing.T) {
183 | return func(t *testing.T) {
184 | b := make([]ID, len(ids))
185 | copy(b, ids)
186 |
187 | c := collection(b)
188 | c.Swap(1, 2)
189 | if actual, expected := c[1], ids[2]; actual != expected {
190 | t.Errorf("expected [%v], got [%v]", expected, actual)
191 | }
192 | if actual, expected := c[2], ids[1]; actual != expected {
193 | t.Errorf("expected [%v], got [%v]", expected, actual)
194 | }
195 | c.Swap(3, 3)
196 | if actual, expected := c[3], ids[3]; actual != expected {
197 | t.Errorf("expected [%v], got [%v]", expected, actual)
198 | }
199 | }
200 | }
201 |
202 | func makeCollectionSortTest(ids []ID) func(t *testing.T) {
203 | return func(t *testing.T) {
204 | src := make([]ID, len(ids))
205 | copy(src, ids)
206 |
207 | // Input IDs are sorted, so a comparison will do the trick.
208 | src[2], src[1] = src[1], src[2]
209 | src[4], src[3] = src[3], src[4]
210 |
211 | Sort(src)
212 |
213 | if actual, expected := src, ids; !reflect.DeepEqual(actual, expected) {
214 | t.Errorf("expected [%v], got [%v]", expected, actual)
215 | }
216 | }
217 | }
218 |
219 | func TestGlobal_Zero(t *testing.T) {
220 | if actual := Zero(); actual != (ID{}) {
221 | t.Error("Zero() not equal to ID{}")
222 | }
223 | }
224 |
225 | func TestGlobal_Zero_IsZero(t *testing.T) {
226 | if !Zero().IsZero() {
227 | t.Error("Zero().IsZero() is not true")
228 | }
229 | }
230 |
--------------------------------------------------------------------------------
/benchmark/generation.go:
--------------------------------------------------------------------------------
1 | package benchmark
2 |
3 | import (
4 | crand "crypto/rand"
5 | mrand "math/rand"
6 | "sync"
7 | "testing"
8 | "time"
9 |
10 | "github.com/bwmarrin/snowflake"
11 | "github.com/celrenheit/sandflake"
12 | "github.com/gofrs/uuid"
13 | "github.com/lucsky/cuid"
14 | "github.com/muyo/sno"
15 | "github.com/oklog/ulid"
16 | "github.com/rs/xid"
17 | "github.com/segmentio/ksuid"
18 | "github.com/sony/sonyflake"
19 | )
20 |
21 | func benchmarkGeneration(b *testing.B) {
22 | println("\n-- Generation (sequential) -------------------------------------------------------------------\n")
23 | b.Run("s", benchmarkGenerateSequential)
24 | println("\n-- Generation (parallel) ---------------------------------------------------------------------\n")
25 | b.Run("p", benchmarkGenerateParallel)
26 | }
27 |
28 | func benchmarkGenerateSequential(b *testing.B) {
29 | b.Run("sno", benchmarkGenerateSequentialSno) // Bounded
30 | b.Run("xid", benchmarkGenerateSequentialXid) // Unbounded
31 | b.Run("snowflake", benchmarkGenerateSequentialSnowflake) // Bounded
32 | b.Run("sonyflake", benchmarkGenerateSequentialSonyflake) // Bounded
33 | b.Run("sandflake", benchmarkGenerateSequentialSandflake) // Unbounded
34 | b.Run("cuid", benchmarkGenerateSequentialCuid) // Unbounded
35 | b.Run("uuid", benchmarkGenerateSequentialUUID) // Unbounded
36 | b.Run("ulid", benchmarkGenerateSequentialULID) // Unbounded
37 | b.Run("ksuid", benchmarkGenerateSequentialKSUID) // Unbounded
38 | }
39 |
40 | func benchmarkGenerateParallel(b *testing.B) {
41 | b.Run("sno", benchmarkGenerateParallelSno) // Bounded
42 | b.Run("xid", benchmarkGenerateParallelXid) // Unbounded
43 | b.Run("snowflake", benchmarkGenerateParallelSnowflake) // Bounded
44 | b.Run("sonyflake", benchmarkGenerateParallelSonyflake) // Bounded
45 | b.Run("sandflake", benchmarkGenerateParallelSandflake) // Unbounded
46 | b.Run("cuid", benchmarkGenerateParallelCuid) // Unbounded
47 | b.Run("uuid", benchmarkGenerateParallelUUID) // Unbounded
48 | b.Run("ulid", benchmarkGenerateParallelULID) // Unbounded
49 | b.Run("ksuid", benchmarkGenerateParallelKSUID) // Unbounded
50 | }
51 |
52 | func benchmarkGenerateSequentialSno(b *testing.B) {
53 | for i := 0; i < b.N; i++ {
54 | _ = sno.New(255)
55 | }
56 | }
57 |
58 | func benchmarkGenerateSequentialXid(b *testing.B) {
59 | for i := 0; i < b.N; i++ {
60 | _ = xid.New()
61 | }
62 | }
63 |
64 | func benchmarkGenerateSequentialSnowflake(b *testing.B) {
65 | n, _ := snowflake.NewNode(255)
66 | b.ResetTimer()
67 |
68 | for i := 0; i < b.N; i++ {
69 | _ = n.Generate()
70 | }
71 | }
72 |
73 | func benchmarkGenerateSequentialSonyflake(b *testing.B) {
74 | g := sonyflake.NewSonyflake(sonyflake.Settings{})
75 | b.ResetTimer()
76 |
77 | for i := 0; i < b.N; i++ {
78 | _, _ = g.NextID()
79 | }
80 | }
81 |
82 | func benchmarkGenerateSequentialSandflake(b *testing.B) {
83 | var g sandflake.Generator
84 | b.ResetTimer()
85 |
86 | for i := 0; i < b.N; i++ {
87 | _ = g.Next()
88 | }
89 | }
90 |
91 | func benchmarkGenerateSequentialCuid(b *testing.B) {
92 | for i := 0; i < b.N; i++ {
93 | _ = cuid.New()
94 | }
95 | }
96 |
97 | func benchmarkGenerateSequentialUUID(b *testing.B) {
98 | b.Run("v1", benchmarkGenerateSequentialUUIDv1)
99 | b.Run("v4", benchmarkGenerateSequentialUUIDv4)
100 | }
101 |
102 | func benchmarkGenerateSequentialUUIDv1(b *testing.B) {
103 | for i := 0; i < b.N; i++ {
104 | _, _ = uuid.NewV1()
105 | }
106 | }
107 |
108 | func benchmarkGenerateSequentialUUIDv4(b *testing.B) {
109 | for i := 0; i < b.N; i++ {
110 | _, _ = uuid.NewV4()
111 | }
112 | }
113 |
114 | // A note about the included ULID runs.
115 | //
116 | // ULIDs generators expect time to be passed in as a timestamp with msec precision. All of the other
117 | // libraries being tested handle time sourcing themselves, which is reflected in their results.
118 | // Therefore the time fetching (via time.Now()) including the unit conversion (via ulid.Timestamp())
119 | // is included in each iteration. If the time had been fetched outside the benchmark loop, the results
120 | // would be roughly 7nsec/op lower (@go 1.14.1, Windows 10, i7 4770k 4.4GHz).
121 | //
122 | // The ULID package benchmarks itself when no entropy source is provided, which in a run resulted
123 | // at 29.8ns/op (relative to unbounded Sno at 8.8ns/op, for reference). However, this test is
124 | // excluded in this benchmark. While it may measure ULID's raw overhead, it does not measure
125 | // a end-user usable case since ULIDs without entropy are essentially a 48bit timestamp and...
126 | // 10 zero bytes, which defeats the purpose of the spec.
127 | func benchmarkGenerateSequentialULID(b *testing.B) {
128 | b.Run("crypto", benchmarkGenerateSequentialULIDCrypto)
129 | b.Run("math", benchmarkSequentialNewULIDMath)
130 | }
131 |
132 | func benchmarkGenerateSequentialULIDCrypto(b *testing.B) {
133 | rng := crand.Reader
134 | b.ResetTimer()
135 |
136 | for i := 0; i < b.N; i++ {
137 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng)
138 | }
139 | }
140 |
141 | func benchmarkSequentialNewULIDMath(b *testing.B) {
142 | rng := mrand.New(mrand.NewSource(time.Now().UnixNano()))
143 | b.ResetTimer()
144 |
145 | for i := 0; i < b.N; i++ {
146 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng)
147 | }
148 | }
149 |
150 | func benchmarkGenerateSequentialKSUID(b *testing.B) {
151 | for i := 0; i < b.N; i++ {
152 | _, _ = ksuid.NewRandom()
153 | }
154 | }
155 |
156 | func benchmarkGenerateParallelSno(b *testing.B) {
157 | b.RunParallel(func(pb *testing.PB) {
158 | for pb.Next() {
159 | _ = sno.New(255)
160 | }
161 | })
162 | }
163 |
164 | func benchmarkGenerateParallelXid(b *testing.B) {
165 | b.RunParallel(func(pb *testing.PB) {
166 | for pb.Next() {
167 | _ = xid.New()
168 | }
169 | })
170 | }
171 |
172 | func benchmarkGenerateParallelSnowflake(b *testing.B) {
173 | n, _ := snowflake.NewNode(255)
174 | b.ResetTimer()
175 |
176 | b.RunParallel(func(pb *testing.PB) {
177 | for pb.Next() {
178 | _ = n.Generate()
179 | }
180 | })
181 | }
182 |
183 | func benchmarkGenerateParallelSonyflake(b *testing.B) {
184 | g := sonyflake.NewSonyflake(sonyflake.Settings{})
185 | b.ResetTimer()
186 |
187 | b.RunParallel(func(pb *testing.PB) {
188 | for pb.Next() {
189 | _, _ = g.NextID()
190 | }
191 | })
192 | }
193 |
194 | func benchmarkGenerateParallelSandflake(b *testing.B) {
195 | var g sandflake.Generator
196 | b.ResetTimer()
197 |
198 | b.RunParallel(func(pb *testing.PB) {
199 | for pb.Next() {
200 | _ = g.Next()
201 | }
202 | })
203 | }
204 |
205 | func benchmarkGenerateParallelCuid(b *testing.B) {
206 | b.RunParallel(func(pb *testing.PB) {
207 | for pb.Next() {
208 | _ = cuid.New()
209 | }
210 | })
211 | }
212 |
213 | func benchmarkGenerateParallelUUID(b *testing.B) {
214 | b.Run("v1", benchmarkGenerateParallelUUIDv1)
215 | b.Run("v4", benchmarkGenerateParallelUUIDv4)
216 | }
217 |
218 | func benchmarkGenerateParallelUUIDv1(b *testing.B) {
219 | b.RunParallel(func(pb *testing.PB) {
220 | for pb.Next() {
221 | _, _ = uuid.NewV1()
222 | }
223 | })
224 | }
225 |
226 | func benchmarkGenerateParallelUUIDv4(b *testing.B) {
227 | b.RunParallel(func(pb *testing.PB) {
228 | for pb.Next() {
229 | _, _ = uuid.NewV4()
230 | }
231 | })
232 | }
233 |
234 | func benchmarkGenerateParallelULID(b *testing.B) {
235 | b.Run("crypto", benchmarkGenerateParallelULIDCrypto)
236 | b.Run("math", benchmarkGenerateParallelULIDMath)
237 | }
238 |
239 | func benchmarkGenerateParallelULIDCrypto(b *testing.B) {
240 | rng := crand.Reader
241 | b.ResetTimer()
242 |
243 | b.RunParallel(func(pb *testing.PB) {
244 | for pb.Next() {
245 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng)
246 | }
247 | })
248 | }
249 |
250 | func benchmarkGenerateParallelULIDMath(b *testing.B) {
251 | // Note: Requires manual locking for this run to complete.
252 | rng := mrand.New(mrand.NewSource(time.Now().UnixNano()))
253 | mu := sync.Mutex{}
254 | b.ResetTimer()
255 |
256 | b.RunParallel(func(pb *testing.PB) {
257 | for pb.Next() {
258 | mu.Lock()
259 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng)
260 | mu.Unlock()
261 | }
262 | })
263 | }
264 |
265 | func benchmarkGenerateParallelKSUID(b *testing.B) {
266 | b.RunParallel(func(pb *testing.PB) {
267 | for pb.Next() {
268 | _, _ = ksuid.NewRandom()
269 | }
270 | })
271 | }
272 |
--------------------------------------------------------------------------------
/id.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import (
4 | "bytes"
5 | "database/sql/driver"
6 | "encoding/binary"
7 | "time"
8 | "unsafe"
9 |
10 | "github.com/muyo/sno/internal"
11 | )
12 |
13 | const (
14 | // SizeBinary is the length of an ID in its binary array representation.
15 | SizeBinary = 10
16 |
17 | // SizeEncoded is the length of an ID in its canonical base-32 encoded representation.
18 | SizeEncoded = 16
19 |
20 | // Epoch is the offset to the Unix epoch, in seconds, that ID timestamps are embedded with.
21 | // Corresponds to 2010-01-01 00:00:00 UTC.
22 | Epoch = 1262304000
23 | epochNsec = Epoch * 1e9
24 |
25 | // TimeUnit is the time unit timestamps are embedded with - 4msec, as expressed in nanoseconds.
26 | TimeUnit = 4e6
27 |
28 | // MaxTimestamp is the max number of time units that can be embedded in an ID's timestamp.
29 | // Corresponds to 2079-09-07 15:47:35.548 UTC in our custom epoch.
30 | MaxTimestamp = 1<<39 - 1
31 |
32 | // MaxPartition is the max Partition number when represented as a uint16.
33 | // It equals max uint16 (65535) and is the equivalent of Partition{255, 255}.
34 | MaxPartition = 1<<16 - 1
35 |
36 | // MaxSequence is the max sequence number supported by generators. As bounds can be set
37 | // individually - this is the upper cap and equals max uint16 (65535).
38 | MaxSequence = 1<<16 - 1
39 | )
40 |
41 | // ID is the binary representation of a sno ID.
42 | //
43 | // It is comprised of 10 bytes in 2 blocks of 40 bits, with its components stored in big-endian order.
44 | //
45 | // The timestamp:
46 | // 39 bits - unsigned milliseconds since epoch with a 4msec resolution
47 | // 1 bit - the tick-tock toggle
48 | //
49 | // The payload:
50 | // 8 bits - metabyte
51 | // 16 bits - partition
52 | // 16 bits - sequence
53 | //
54 | type ID [SizeBinary]byte
55 |
56 | // Time returns the timestamp of the ID as a time.Time struct.
57 | func (id ID) Time() time.Time {
58 | var (
59 | units = int64(binary.BigEndian.Uint64(id[:]) >> 25)
60 | s = units/250 + Epoch
61 | ns = (units % 250) * TimeUnit
62 | )
63 |
64 | return time.Unix(s, ns)
65 | }
66 |
67 | // Timestamp returns the timestamp of the ID as nanoseconds relative to the Unix epoch.
68 | func (id ID) Timestamp() int64 {
69 | return int64(binary.BigEndian.Uint64(id[:])>>25)*TimeUnit + epochNsec
70 | }
71 |
72 | // Meta returns the metabyte of the ID.
73 | func (id ID) Meta() byte {
74 | return id[5]
75 | }
76 |
77 | // Partition returns the partition of the ID.
78 | func (id ID) Partition() Partition {
79 | return Partition{id[6], id[7]}
80 | }
81 |
82 | // Sequence returns the sequence of the ID.
83 | func (id ID) Sequence() uint16 {
84 | return uint16(id[8])<<8 | uint16(id[9])
85 | }
86 |
87 | // IsZero checks whether the ID is a zero value.
88 | func (id ID) IsZero() bool {
89 | return id == zero
90 | }
91 |
92 | // String implements fmt.Stringer by returning the base32-encoded representation of the ID
93 | // as a string.
94 | func (id ID) String() string {
95 | enc := internal.Encode((*[10]byte)(&id))
96 | dst := enc[:]
97 |
98 | return *(*string)(unsafe.Pointer(&dst))
99 | }
100 |
101 | // Bytes returns the ID as a byte slice.
102 | func (id ID) Bytes() []byte {
103 | return id[:]
104 | }
105 |
106 | // MarshalBinary implements encoding.BinaryMarshaler by returning the ID as a byte slice.
107 | func (id ID) MarshalBinary() ([]byte, error) {
108 | return id[:], nil
109 | }
110 |
111 | // UnmarshalBinary implements encoding.BinaryUnmarshaler by copying src into the receiver.
112 | func (id *ID) UnmarshalBinary(src []byte) error {
113 | if len(src) != SizeBinary {
114 | return &InvalidDataSizeError{Size: len(src)}
115 | }
116 |
117 | copy(id[:], src)
118 |
119 | return nil
120 | }
121 |
122 | // MarshalText implements encoding.TextMarshaler by returning the base32-encoded representation
123 | // of the ID as a byte slice.
124 | func (id ID) MarshalText() ([]byte, error) {
125 | b := internal.Encode((*[10]byte)(&id))
126 |
127 | return b[:], nil
128 | }
129 |
130 | // UnmarshalText implements encoding.TextUnmarshaler by decoding a base32-encoded representation
131 | // of the ID from src into the receiver.
132 | func (id *ID) UnmarshalText(src []byte) error {
133 | if len(src) != SizeEncoded {
134 | return &InvalidDataSizeError{Size: len(src)}
135 | }
136 |
137 | *id = internal.Decode(src)
138 |
139 | return nil
140 | }
141 |
142 | // MarshalJSON implements encoding.json.Marshaler by returning the base32-encoded and quoted
143 | // representation of the ID as a byte slice.
144 | //
145 | // If the ID is a zero value, MarshalJSON will return a byte slice containing 'null' (unquoted) instead.
146 | //
147 | // Note that ID's are byte arrays and Go's std (un)marshaler is unable to distinguish
148 | // the zero values of custom structs as "empty", so the 'omitempty' tag has the same caveats
149 | // as, for example, time.Time.
150 | //
151 | // See https://github.com/golang/go/issues/11939 for tracking purposes as changes are being
152 | // discussed.
153 | func (id ID) MarshalJSON() ([]byte, error) {
154 | if id == zero {
155 | return []byte("null"), nil
156 | }
157 |
158 | dst := []byte("\" \"")
159 | enc := internal.Encode((*[10]byte)(&id))
160 | copy(dst[1:], enc[:])
161 |
162 | return dst, nil
163 | }
164 |
165 | // UnmarshalJSON implements encoding.json.Unmarshaler by decoding a base32-encoded and quoted
166 | // representation of an ID from src into the receiver.
167 | //
168 | // If the byte slice is an unquoted 'null', the receiving ID will instead be set
169 | // to a zero ID.
170 | func (id *ID) UnmarshalJSON(src []byte) error {
171 | n := len(src)
172 | if n != SizeEncoded+2 {
173 | if n == 4 && src[0] == 'n' && src[1] == 'u' && src[2] == 'l' && src[3] == 'l' {
174 | *id = zero
175 | return nil
176 | }
177 |
178 | return &InvalidDataSizeError{Size: n}
179 | }
180 |
181 | *id = internal.Decode(src[1 : n-1])
182 |
183 | return nil
184 | }
185 |
186 | // Compare returns an integer comparing this and that ID lexicographically.
187 | //
188 | // Returns:
189 | // 0 - if this and that are equal,
190 | // -1 - if this is smaller than that,
191 | // 1 - if this is greater than that.
192 | //
193 | // Note that IDs are byte arrays - if all you need is to check for equality, a simple...
194 | // if thisID == thatID {...}
195 | // ... will do the trick.
196 | func (id ID) Compare(that ID) int {
197 | return bytes.Compare(id[:], that[:])
198 | }
199 |
200 | // Value implements the sql.driver.Valuer interface by returning the ID as a byte slice.
201 | // If you'd rather receive a string, wrapping an ID is a possible solution...
202 | //
203 | // // stringedID wraps a sno ID to provide a driver.Valuer implementation which
204 | // // returns strings.
205 | // type stringedID sno.ID
206 | //
207 | // func (id stringedID) Value() (driver.Value, error) {
208 | // return sno.ID(id).String(), nil
209 | // }
210 | //
211 | // // ... and use it via:
212 | // db.Exec(..., stringedID(id))
213 | func (id ID) Value() (driver.Value, error) {
214 | return id.MarshalBinary()
215 | }
216 |
217 | // Scan implements the sql.Scanner interface by attempting to convert the given value
218 | // into an ID.
219 | //
220 | // When given a byte slice:
221 | // - with a length of SizeBinary (10), its contents will be copied into ID.
222 | // - with a length of 0, ID will be set to a zero ID.
223 | // - with any other length, sets ID to a zero ID and returns InvalidDataSizeError.
224 | //
225 | // When given a string:
226 | // - with a length of SizeEncoded (16), its contents will be decoded into ID.
227 | // - with a length of 0, ID will be set to a zero ID.
228 | // - with any other length, sets ID to a zero ID and returns InvalidDataSizeError.
229 | //
230 | // When given nil, ID will be set to a zero ID.
231 | //
232 | // When given any other type, returns a InvalidTypeError.
233 | func (id *ID) Scan(value interface{}) error {
234 | switch v := value.(type) {
235 | case []byte:
236 | switch len(v) {
237 | case SizeBinary:
238 | copy(id[:], v)
239 | case 0:
240 | *id = zero
241 | default:
242 | *id = zero
243 | return &InvalidDataSizeError{Size: len(v)}
244 | }
245 |
246 | case string:
247 | switch len(v) {
248 | case SizeEncoded:
249 | *id = internal.Decode(*(*[]byte)(unsafe.Pointer(&v)))
250 | case 0:
251 | *id = zero
252 | default:
253 | *id = zero
254 | return &InvalidDataSizeError{Size: len(v)}
255 | }
256 |
257 | case nil:
258 | *id = zero
259 |
260 | default:
261 | return &InvalidTypeError{Value: value}
262 | }
263 |
264 | return nil
265 | }
266 |
--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # Benchmark
2 |
3 | Running the benchmark yourself:
4 |
5 | ```
6 | go test -run=^$ -bench=. -benchmem
7 | ```
8 |
9 | ## Results
10 |
11 | Platform: `Go 1.14.1 | i7 4770K (Haswell; 4 physical, 8 logical cores) @ 4.4GHz | Win 10`, ran on `2020/04/06`.
12 |
13 | All libraries being compared are listed as ➜ [Alternatives](../README.md#alternatives) in the root package.
14 |
15 |
16 |
17 | ### Generation
18 |
19 | These results must **not** be taken for their raw numbers. See the explanation
20 | (primarily about the `unbounded` suffix) afterwards.
21 |
22 | **Sequential**
23 | ```
24 | sno/unbounded 136208883 8.80 ns/op 0 B/op 0 allocs/op
25 | xid 59964620 19.4 ns/op 0 B/op 0 allocs/op
26 | uuid/v1 33327685 36.3 ns/op 0 B/op 0 allocs/op
27 | ulid/math 23083492 50.3 ns/op 16 B/op 1 allocs/op
28 | sno/bounded 21022425 61.0 ns/op 0 B/op 0 allocs/op
29 | ulid/crypto 5797293 204 ns/op 16 B/op 1 allocs/op
30 | uuid/v4 5660026 205 ns/op 16 B/op 1 allocs/op
31 | ksuid 5430244 206 ns/op 0 B/op 0 allocs/op
32 | sandflake 5427452 224 ns/op 3 B/op 1 allocs/op
33 | snowflake 4917784 244 ns/op 0 B/op 0 allocs/op
34 | cuid 3507404 342 ns/op 55 B/op 4 allocs/op
35 | sonyflake 31000 38938 ns/op 0 B/op 0 allocs/op
36 | ```
37 |
38 | **Parallel** (8 threads)
39 |
40 | ```
41 | sno/unbounded 65161461 17.8 ns/op 0 B/op 0 allocs/op
42 | xid 63163545 18.1 ns/op 0 B/op 0 allocs/op
43 | sno/bounded 21022425 61.0 ns/op 0 B/op 0 allocs/op
44 | uuid/v1 8695777 137 ns/op 0 B/op 0 allocs/op
45 | uuid/v4 7947076 151 ns/op 16 B/op 1 allocs/op
46 | ulid/crypto 7947030 151 ns/op 16 B/op 1 allocs/op
47 | sandflake 6521745 184 ns/op 3 B/op 1 allocs/op
48 | ulid/math 5825053 206 ns/op 16 B/op 1 allocs/op
49 | snowflake 4917774 244 ns/op 0 B/op 0 allocs/op
50 | ksuid 3692324 316 ns/op 0 B/op 0 allocs/op
51 | cuid 3200022 371 ns/op 55 B/op 4 allocs/op
52 | sonyflake 30896 38740 ns/op 0 B/op 0 allocs/op
53 | ```
54 |
55 | **Snowflakes**
56 |
57 | What does `unbounded` mean? [xid], for example, is unbounded, i.e. it does not prevent you from generating more IDs
58 | than it has a pool for (nor does it account for time). In other words - at high enough throughput you simply and
59 | silently start overwriting already generated IDs. *Realistically* you are not going to fill its pool of
60 | 16,777,216 items per second. But it does reflect in synthetic benchmarks. [Sandflake] does not bind nor handle clock
61 | drifts either. In both cases their results are `WYSIWYG`.
62 |
63 | The implementations that do bind, approach this issue (and clock drifts) differently. [Sonyflake] goes to sleep,
64 | [Snowflake] spins aggressively to get the OS time. **sno**, when about to overflow, starts a single timer and
65 | locks all overflowing requests on a condition, waking them up when the sequence resets, i.e. time changes.
66 |
67 | Both of the above are edge cases, *realistically* - Go's benchmarks happen to saturate the capacities and hit
68 | those cases. **Most of the time what you get is the unbounded overhead**. Expect said overhead of the
69 | implementations to be considerably lower, but still higher than [xid] and **sno** due to their locking nature.
70 | Similarily, expect some of the generation calls to **sno** to be considerably *slower* when they drop into an
71 | edge case branch, but still very much in the same logarithmic ballpark.
72 |
73 | Note: the `61.0ns/op` is our **throughput upper bound** - `1s / 61.0 ns`, yields `16,393,442`. It's an imprecise
74 | measure, but it actually reflects `16,384,000` - our pool per second. If you shrink that capacity using custom
75 | sequence bounds, that number - `61.0ns/op` - will start growing exponentially, but only if/as your burst through
76 | the available capacity.
77 |
78 | [Sonyflake], for example, is limited to 256 IDs per 10msec (25 600 per second), which is why its numbers *appear* so
79 | high - and why the comparison has a disclaimer.
80 |
81 | **`sno/unbounded`**
82 |
83 | In order to get the `unbounded` results in **sno**'s case, `Generator.New()` must be modified locally
84 | and the...
85 | ```
86 | if g.seqMax >= seq {...}
87 | ```
88 | ...condition removed.
89 |
90 | **Entropy**
91 |
92 | All entropy-based implementations lock - and will naturally be slower as they need to read from a entropy source and
93 | have more bits to fiddle with. [ULID] implementation required manual locking of rand.Reader for the parallel test.
94 |
95 |
96 |
97 |
98 | ### Encoding/decoding
99 |
100 | The comparisons below are preceded by some baseline measures for sno relative to std's base32 package
101 | as a reference.
102 |
103 | - `sno/vector` - amd64 SIMD code,
104 | - `sno/scalar` - assembly based fallback on amd64 without SIMD,
105 | - `sno/pure-go` - non-assembly, pure Go implementation used by sno on non-amd64 platforms.
106 |
107 | The actual comparison results utilized `sno/vector` in our case, but `sno/pure-go` - albeit slower -
108 | places just as high.
109 |
110 | **Excluded**
111 | - [Sonyflake] has no canonical encoding;
112 | - [cuid] is base36 only (no binary representation);
113 |
114 | **Notes**
115 | - Expect JSON (un)marshal performance to be nearly identical in most if not all cases;
116 |
117 |
118 | #### Encoding
119 |
120 | **Baseline**
121 | ```
122 | sno/vector 2000000000 0.85 ns/op 0 B/op 0 allocs/op
123 | sno/scalar 1000000000 2.21 ns/op 0 B/op 0 allocs/op
124 | sno/pure-go 1000000000 2.70 ns/op 0 B/op 0 allocs/op
125 | std 30000000 12.5 ns/op 0 B/op 0 allocs/op
126 | ```
127 |
128 | **Comparison**
129 |
130 | ```
131 | sno 963900753 1.18 ns/op 0 B/op 0 allocs/op
132 | xid 240481202 4.94 ns/op 0 B/op 0 allocs/op
133 | ulid 211640920 5.67 ns/op 0 B/op 0 allocs/op
134 | snowflake 71941237 16.5 ns/op 32 B/op 1 allocs/op
135 | sandflake 58868926 21.5 ns/op 32 B/op 1 allocs/op
136 | uuid/v4 55494362 22.1 ns/op 48 B/op 1 allocs/op
137 | uuid/v1 51785808 22.2 ns/op 48 B/op 1 allocs/op
138 | ksuid 19672356 54.7 ns/op 0 B/op 0 allocs/op
139 | ```
140 |
141 | Using: `String()`, provided by all packages.
142 |
143 |
144 | #### Decoding
145 |
146 | **Baseline**
147 | ```
148 | sno/vector 2000000000 1.02 ns/op 0 B/op 0 allocs/op
149 | sno/scalar 500000000 2.41 ns/op 0 B/op 0 allocs/op
150 | sno/pure-go 500000000 2.79 ns/op 0 B/op 0 allocs/op
151 | std 50000000 31.8 ns/op 0 B/op 0 allocs/op
152 | ```
153 |
154 | **Comparison**
155 |
156 | ```
157 | sno 863313699 1.30 ns/op 0 B/op 0 allocs/op
158 | ulid 239884370 4.98 ns/op 0 B/op 0 allocs/op
159 | xid 156291760 7.62 ns/op 0 B/op 0 allocs/op
160 | snowflake 127603538 9.32 ns/op 0 B/op 0 allocs/op
161 | uuid/v1 30000150 35.7 ns/op 48 B/op 1 allocs/op
162 | uuid/v4 30000300 35.7 ns/op 48 B/op 1 allocs/op
163 | ksuid 27908728 37.5 ns/op 0 B/op 0 allocs/op
164 | sandflake 25533001 40.6 ns/op 32 B/op 1 allocs/op
165 | ```
166 |
167 | Using: `sno.FromEncodedString`, `ulid.Parse`, `xid.FromString`, `snowflake.ParseString`, `sandflake.Parse`, `ksuid.Parse`,
168 | `uuid.FromString`
169 |
170 |
171 | [UUID]: https://github.com/gofrs/uuid
172 | [KSUID]: https://github.com/segmentio/ksuid
173 | [cuid]: https://github.com/lucsky/cuid
174 | [Snowflake]: https://github.com/bwmarrin/snowflake
175 | [Sonyflake]: https://github.com/sony/sonyflake
176 | [Sandflake]: https://github.com/celrenheit/sandflake
177 | [ULID]: https://github.com/oklog/ulid
178 | [xid]: https://github.com/rs/xid
--------------------------------------------------------------------------------
/id_test.go:
--------------------------------------------------------------------------------
1 | package sno
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "reflect"
7 | "sync/atomic"
8 | "testing"
9 | "time"
10 | )
11 |
12 | func TestID_Time(t *testing.T) {
13 | tn := time.Now()
14 | id := New(255)
15 |
16 | // As we prune the fraction, actual cmp needs to be adjusted. This *may* also fail
17 | // in the rare condition that a new timeframe started between time.Now() and New()
18 | // since we're not using a deterministic time source currently.
19 | expected := tn.UnixNano() / TimeUnit
20 | actual := id.Time().UnixNano() / TimeUnit
21 |
22 | if actual != expected {
23 | t.Errorf("expected [%v], got [%v]", expected, actual)
24 | }
25 |
26 | id = NewWithTime(255, tn)
27 | actual = id.Time().UnixNano() / TimeUnit
28 |
29 | if actual != expected {
30 | t.Errorf("expected [%v], got [%v]", expected, actual)
31 | }
32 | }
33 |
34 | func TestID_Timestamp(t *testing.T) {
35 | tn := time.Now()
36 | id := New(255)
37 |
38 | expected := tn.UnixNano() / TimeUnit * TimeUnit // Drop precision for the comparison.
39 | actual := id.Timestamp()
40 |
41 | if actual != expected {
42 | t.Errorf("expected [%v], got [%v]", expected, actual)
43 | }
44 |
45 | id = NewWithTime(255, tn)
46 | actual = id.Timestamp()
47 |
48 | if actual != expected {
49 | t.Errorf("expected [%v], got [%v]", expected, actual)
50 | }
51 | }
52 |
53 | func TestID_Meta(t *testing.T) {
54 | var expected byte = 255
55 | id := New(expected)
56 | actual := id.Meta()
57 |
58 | if actual != expected {
59 | t.Errorf("expected [%v], got [%v]", expected, actual)
60 | }
61 | }
62 |
63 | func TestID_Partition(t *testing.T) {
64 | expected := generator.Partition()
65 | actual := generator.New(255).Partition()
66 |
67 | if actual != expected {
68 | t.Errorf("expected [%v], got [%v]", expected, actual)
69 | }
70 | }
71 |
72 | func TestID_Sequence(t *testing.T) {
73 | expected := atomic.LoadUint32(&generator.seq) + 1
74 | actual := generator.New(255).Sequence()
75 |
76 | if actual != uint16(expected) {
77 | t.Errorf("expected [%v], got [%v]", expected, actual)
78 | }
79 | }
80 |
81 | func TestID_String(t *testing.T) {
82 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
83 | expected := "brpk4q72xwf2m63l"
84 | actual := src.String()
85 |
86 | if actual != expected {
87 | t.Errorf("expected [%s], got [%s]", expected, actual)
88 | }
89 | }
90 |
91 | func TestID_Bytes(t *testing.T) {
92 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
93 | expected := make([]byte, SizeBinary)
94 | copy(expected, src[:])
95 |
96 | actual := src.Bytes()
97 | if !bytes.Equal(actual, expected) {
98 | t.Errorf("expected [%s], got [%s]", expected, actual)
99 | }
100 |
101 | actual[SizeBinary-1]++
102 | if bytes.Equal(expected, actual) {
103 | t.Error("returned a reference to underlying array")
104 | }
105 | }
106 |
107 | func TestID_MarshalText(t *testing.T) {
108 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
109 | expected := []byte("brpk4q72xwf2m63l")
110 |
111 | actual, err := src.MarshalText()
112 | if err != nil {
113 | t.Fatal(err)
114 | }
115 |
116 | if !bytes.Equal(actual, expected) {
117 | t.Errorf("expected [%s], got [%s]", expected, actual)
118 | }
119 | }
120 |
121 | func TestID_UnmarshalText_Valid(t *testing.T) {
122 | actual := ID{}
123 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
124 |
125 | if err := actual.UnmarshalText([]byte("brpk4q72xwf2m63l")); err != nil {
126 | t.Fatal(err)
127 | }
128 |
129 | if actual != expected {
130 | t.Errorf("expected [%s], got [%s]", expected, actual)
131 | }
132 | }
133 |
134 | func TestID_UnmarshalText_Invalid(t *testing.T) {
135 | id := ID{}
136 | err := id.UnmarshalText([]byte("012brpk4q72xwf2m63l1245453gfdgxz"))
137 |
138 | if _, ok := err.(*InvalidDataSizeError); !ok {
139 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err)
140 | }
141 | }
142 |
143 | func TestID_MarshalJSON_Valid(t *testing.T) {
144 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
145 | expected := []byte("\"brpk4q72xwf2m63l\"")
146 |
147 | actual, err := src.MarshalJSON()
148 | if err != nil {
149 | t.Fatal(err)
150 | }
151 |
152 | if !bytes.Equal(actual, expected) {
153 | t.Errorf("expected [%s], got [%s]", expected, actual)
154 | }
155 | }
156 |
157 | func TestID_MarshalJSON_Null(t *testing.T) {
158 | src := ID{}
159 | expected := []byte("null")
160 | actual, err := src.MarshalJSON()
161 | if err != nil {
162 | t.Fatal(err)
163 | }
164 |
165 | if !bytes.Equal(actual, expected) {
166 | t.Errorf("expected [%s], got [%s]", expected, actual)
167 | }
168 | }
169 |
170 | func TestID_UnmarshalJSON_Valid(t *testing.T) {
171 | actual := ID{}
172 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
173 |
174 | if err := actual.UnmarshalJSON([]byte("\"brpk4q72xwf2m63l\"")); err != nil {
175 | t.Fatal(err)
176 | }
177 |
178 | if actual != expected {
179 | t.Errorf("expected [%s], got [%s]", expected, actual)
180 | }
181 | }
182 |
183 | func TestID_UnmarshalJSON_Invalid(t *testing.T) {
184 | id := ID{}
185 | err := id.UnmarshalJSON([]byte("\"012brpk4q72xwf2m63l1245453gfdgxz\""))
186 |
187 | if _, ok := err.(*InvalidDataSizeError); !ok {
188 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err)
189 | }
190 |
191 | if err != nil && err.Error() != errInvalidDataSizeMsg {
192 | t.Errorf("expected error [%s], got [%s]", errInvalidDataSizeMsg, err.Error())
193 | }
194 | }
195 |
196 | func TestID_UnmarshalJSON_Null(t *testing.T) {
197 | actual := ID{}
198 | expected := ID{}
199 |
200 | if err := actual.UnmarshalJSON([]byte("null")); err != nil {
201 | t.Fatal(err)
202 | }
203 |
204 | if actual != expected {
205 | t.Errorf("expected [%s], got [%s]", expected, actual)
206 | }
207 | }
208 |
209 | func TestID_IsZero(t *testing.T) {
210 | for _, c := range []struct {
211 | id ID
212 | want bool
213 | }{
214 | {
215 | id: New(255),
216 | want: false,
217 | },
218 | {
219 | id: ID{},
220 | want: true,
221 | },
222 | } {
223 | if actual, expected := c.id.IsZero(), c.want; actual != expected {
224 | t.Errorf("expected [%v], got [%v]", expected, actual)
225 | }
226 | }
227 | }
228 |
229 | func TestID_Compare(t *testing.T) {
230 | a := New(100)
231 | l := a
232 | l[5]++
233 | e := a
234 | b := a
235 | b[5]--
236 |
237 | if actual := a.Compare(l); actual != -1 {
238 | t.Errorf("expected [-1], got [%d]", actual)
239 | }
240 |
241 | if actual := a.Compare(e); actual != 0 {
242 | t.Errorf("expected [0], got [%d]", actual)
243 | }
244 |
245 | if actual := a.Compare(b); actual != 1 {
246 | t.Errorf("expected [1], got [%d]", actual)
247 | }
248 | }
249 |
250 | func TestID_Value(t *testing.T) {
251 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}
252 | expected := make([]byte, SizeBinary)
253 | copy(expected, src[:])
254 |
255 | v, err := src.Value()
256 | if err != nil {
257 | t.Errorf("got unexpected error: %s", err)
258 | }
259 |
260 | actual, ok := v.([]byte)
261 | if !ok {
262 | t.Errorf("expected type [%T], got [%T]", expected, actual)
263 | }
264 |
265 | if !bytes.Equal(actual, expected) {
266 | t.Errorf("expected [%s], got [%s]", expected, actual)
267 | }
268 |
269 | actual[SizeBinary-1]++
270 | if bytes.Equal(expected, actual) {
271 | t.Error("returned a reference to underlying array")
272 | }
273 | }
274 |
275 | func TestID_Scan(t *testing.T) {
276 | id := New(255)
277 |
278 | for _, c := range []struct {
279 | name string
280 | in interface{}
281 | out ID
282 | err error
283 | errMsg string
284 | }{
285 | {"nil", nil, ID{}, nil, ""},
286 | {"bytes-valid", id[:], id, nil, ""},
287 | {"bytes-invalid", make([]byte, 3), zero, &InvalidDataSizeError{Size: 3}, errInvalidDataSizeMsg},
288 | {"bytes-zero", []byte{}, zero, nil, ""},
289 | {"string-valid", id.String(), id, nil, ""},
290 | {"string-invalid", "123", zero, &InvalidDataSizeError{Size: 3}, errInvalidDataSizeMsg},
291 | {"string-zero", "", zero, nil, ""},
292 | {"invalid", 69, ID{}, &InvalidTypeError{Value: 69}, fmt.Sprintf(errInvalidTypeFmt, 69)},
293 | } {
294 | c := c
295 | t.Run(c.name, func(t *testing.T) {
296 | t.Parallel()
297 |
298 | var out ID
299 | err := out.Scan(c.in)
300 |
301 | if actual, expected := out, c.out; actual != expected {
302 | t.Errorf("expected [%s], got [%s]", expected, actual)
303 | }
304 |
305 | if err != nil && c.err == nil {
306 | t.Errorf("got unexpected error: %s", err)
307 | } else if actual, expected := reflect.TypeOf(err), reflect.TypeOf(c.err); actual != expected {
308 | t.Errorf("expected error type [%s], got [%s]", expected, actual)
309 | } else if err != nil && c.errMsg != "" && err.Error() != c.errMsg {
310 | t.Errorf("expected error message [%s], got [%s]", c.errMsg, err.Error())
311 | }
312 | })
313 | }
314 | }
315 |
--------------------------------------------------------------------------------
/generator.go:
--------------------------------------------------------------------------------
1 | // Package sno provides fast generators of compact, sortable, unique IDs with embedded metadata.
2 | package sno
3 |
4 | import (
5 | "encoding/binary"
6 | "sync"
7 | "sync/atomic"
8 | "time"
9 | )
10 |
11 | // GeneratorSnapshot represents the bookkeeping data of a Generator at some point in time.
12 | //
13 | // Snapshots serve both as configuration and a means of restoring generators across restarts,
14 | // to ensure newly generated IDs don't overwrite IDs generated before going offline.
15 | type GeneratorSnapshot struct {
16 | // The Partition the generator is scoped to. A zero value ({0, 0}) is valid and will be used.
17 | Partition Partition `json:"partition"`
18 |
19 | // Sequence pool bounds (inclusive). Can be given in either order - lower value will become lower bound.
20 | // When SequenceMax is 0 and SequenceMin != 65535, SequenceMax will be set to 65535.
21 | SequenceMin uint16 `json:"sequenceMin"`
22 | SequenceMax uint16 `json:"sequenceMax"`
23 |
24 | // Current sequence number. When 0, it will be set to SequenceMin. May overflow SequenceMax,
25 | // but not underflow SequenceMin.
26 | Sequence uint32 `json:"sequence"`
27 |
28 | Now int64 `json:"now"` // Wall time the snapshot was taken at in sno time units and in our epoch.
29 | WallHi int64 `json:"wallHi"` //
30 | WallSafe int64 `json:"wallSafe"` //
31 | Drifts uint32 `json:"drifts"` // Count of wall clock regressions the generator tick-tocked at.
32 | }
33 |
34 | // SequenceOverflowNotification contains information pertaining to the current state of a Generator
35 | // while it is overflowing.
36 | type SequenceOverflowNotification struct {
37 | Now time.Time // Time of tick.
38 | Count uint32 // Number of currently overflowing generation calls.
39 | Ticks uint32 // Total count of ticks while dealing with the *current* overflow.
40 | }
41 |
42 | // Generator is responsible for generating new IDs scoped to a given fixed Partition and
43 | // managing their sequence.
44 | //
45 | // A Generator must be constructed using NewGenerator - the zero value of a Generator is
46 | // an unusable state.
47 | //
48 | // A Generator must not be copied after first use.
49 | type Generator struct {
50 | partition uint32 // Immutable.
51 |
52 | drifts uint32 // Uses the LSB for the tick-tock and serves as a counter.
53 | wallHi uint64 // Atomic.
54 | wallSafe uint64 // Atomic.
55 | regression sync.Mutex // Regression branch lock.
56 |
57 | seq uint32 // Atomic.
58 | seqMin uint32 // Immutable.
59 | seqMax uint32 // Immutable.
60 | seqStatic uint32 // Atomic. See NewWithTime. Not included in snapshots (does not get restored).
61 |
62 | seqOverflowCond *sync.Cond
63 | seqOverflowTicker *time.Ticker
64 | seqOverflowCount uint32 // Behind seqOverflowCond lock.
65 | seqOverflowChan chan<- *SequenceOverflowNotification
66 | }
67 |
68 | // NewGenerator returns a new generator based on the optional Snapshot.
69 | func NewGenerator(snapshot *GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) {
70 | if snapshot != nil {
71 | return newGeneratorFromSnapshot(*snapshot, c)
72 | }
73 |
74 | return newGeneratorFromDefaults(c)
75 | }
76 |
77 | func newGeneratorFromSnapshot(snapshot GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) {
78 | if err := sanitizeSnapshotBounds(&snapshot); err != nil {
79 | return nil, err
80 | }
81 |
82 | return &Generator{
83 | partition: partitionToInternalRepr(snapshot.Partition),
84 | seq: snapshot.Sequence,
85 | seqMin: uint32(snapshot.SequenceMin),
86 | seqMax: uint32(snapshot.SequenceMax),
87 | seqStatic: uint32(snapshot.SequenceMin - 1), // Offset by -1 since NewWithTime starts this with an incr.
88 | seqOverflowCond: sync.NewCond(&sync.Mutex{}),
89 | seqOverflowChan: c,
90 | drifts: snapshot.Drifts,
91 | wallHi: uint64(snapshot.WallHi),
92 | wallSafe: uint64(snapshot.WallSafe),
93 | }, nil
94 | }
95 |
96 | func newGeneratorFromDefaults(c chan<- *SequenceOverflowNotification) (*Generator, error) {
97 | // Realistically safe, but has an edge case resulting in PartitionPoolExhaustedError.
98 | partition, err := genPartition()
99 | if err != nil {
100 | return nil, err
101 | }
102 |
103 | return &Generator{
104 | partition: partition,
105 | seqMax: MaxSequence,
106 | seqStatic: ^uint32(0), // Offset by -1 since NewWithTime starts this with an incr.
107 | seqOverflowCond: sync.NewCond(&sync.Mutex{}),
108 | seqOverflowChan: c,
109 | }, nil
110 | }
111 |
112 | // New generates a new ID using the current system time for its timestamp.
113 | func (g *Generator) New(meta byte) (id ID) {
114 | retry:
115 | var (
116 | // Note: Single load of wallHi for the evaluations is correct (as we only grab wallNow
117 | // once as well).
118 | wallHi = atomic.LoadUint64(&g.wallHi)
119 | wallNow = snotime()
120 | )
121 |
122 | // Fastest branch if we're still within the most recent time unit.
123 | if wallNow == wallHi {
124 | seq := atomic.AddUint32(&g.seq, 1)
125 |
126 | if g.seqMax >= seq {
127 | g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1)
128 | g.applyPayload(&id, meta, seq)
129 |
130 | return
131 | }
132 |
133 | // This is to be considered an edge case if seqMax actually gets exceeded, but since bounds
134 | // can be set arbitrarily, in a small pool (or in stress tests) this can happen.
135 | // We don't *really* handle this gracefully - we currently clog up and wait until the sequence
136 | // gets reset by a time change *hoping* we'll finally get our turn. If requests to generate
137 | // don't decrease enough, eventually this will starve out resources.
138 | //
139 | // The reason we don't simply plug the broadcast into the time progression branch is precisely
140 | // because that one is going to be the most common branch for many uses realistically (1 or 0 ID per 4msec)
141 | // while this one is for scales on another level. At the same time if we *ever* hit this case, we need
142 | // a periodic flush anyways, because even a single threaded process can easily exhaust the max default
143 | // sequence pool, let alone a smaller one, meaning it could potentially deadlock if all routines get
144 | // locked in on a sequence overflow and no new routine comes to their rescue at a higher time to reset
145 | // the sequence and notify them.
146 | g.seqOverflowCond.L.Lock()
147 | g.seqOverflowCount++
148 |
149 | if g.seqOverflowTicker == nil {
150 | // Tick *roughly* each 1ms during overflows.
151 | g.seqOverflowTicker = time.NewTicker(TimeUnit / 4)
152 | go g.seqOverflowLoop()
153 | }
154 |
155 | for atomic.LoadUint32(&g.seq) > g.seqMax {
156 | // We spin pessimistically here instead of a straight lock -> wait -> unlock because that'd
157 | // put us back on the New(). At extreme contention we could end up back here anyways.
158 | g.seqOverflowCond.Wait()
159 | }
160 |
161 | g.seqOverflowCount--
162 | g.seqOverflowCond.L.Unlock()
163 |
164 | goto retry
165 | }
166 |
167 | // Time progression branch.
168 | if wallNow > wallHi && atomic.CompareAndSwapUint64(&g.wallHi, wallHi, wallNow) {
169 | atomic.StoreUint32(&g.seq, g.seqMin)
170 |
171 | g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1)
172 | g.applyPayload(&id, meta, g.seqMin)
173 |
174 | return
175 | }
176 |
177 | // Time regression branch.
178 | g.regression.Lock()
179 |
180 | // Check-again. It's possible that another thread applied the drift while we were spinning (if we were).
181 | if wallHi = atomic.LoadUint64(&g.wallHi); wallNow >= wallHi {
182 | g.regression.Unlock()
183 |
184 | goto retry
185 | }
186 |
187 | if wallNow > g.wallSafe {
188 | // Branch for the one routine that gets to apply the drift.
189 | // wallHi is bidirectional (gets updated whenever the wall clock time progresses - or when a drift
190 | // gets applied, which is when it regresses). In contrast, wallSafe only ever gets updated when
191 | // a drift gets applied and always gets set to the highest time recorded, meaning it
192 | // increases monotonically.
193 | atomic.StoreUint64(&g.wallSafe, wallHi)
194 | atomic.StoreUint64(&g.wallHi, wallNow)
195 | atomic.StoreUint32(&g.seq, g.seqMin)
196 |
197 | g.applyTimestamp(&id, wallNow, atomic.AddUint32(&g.drifts, 1)&1)
198 | g.applyPayload(&id, meta, g.seqMin)
199 |
200 | g.regression.Unlock()
201 |
202 | return
203 | }
204 |
205 | // Branch for all routines that are in an "unsafe" past (e.g. multiple time regressions happened
206 | // before we reached wallSafe again).
207 | g.regression.Unlock()
208 |
209 | time.Sleep(time.Duration(g.wallSafe - wallNow))
210 |
211 | goto retry
212 | }
213 |
214 | // NewWithTime generates a new ID using the given time for the timestamp.
215 | //
216 | // IDs generated with user-specified timestamps are exempt from the tick-tock mechanism and
217 | // use a sequence separate from New() - one that is independent from time, as time provided to
218 | // this method can be arbitrary. The sequence increases strictly monotonically up to hitting
219 | // the generator's SequenceMax, after which it rolls over silently back to SequenceMin.
220 | //
221 | // That means bounds are respected, but unlike New(), NewWithTime() will not block the caller
222 | // when the (separate) sequence rolls over as the Generator would be unable to determine when
223 | // to resume processing within the constraints of this method.
224 | //
225 | // Managing potential collisions due to the arbitrary time is left to the user.
226 | //
227 | // This utility is primarily meant to enable porting of old IDs to sno and assumed to be ran
228 | // before an ID scheme goes online.
229 | func (g *Generator) NewWithTime(meta byte, t time.Time) (id ID) {
230 | retry:
231 | var seq = atomic.AddUint32(&g.seqStatic, 1)
232 |
233 | if seq > g.seqMax {
234 | if !atomic.CompareAndSwapUint32(&g.seqStatic, seq, g.seqMin) {
235 | goto retry
236 | }
237 |
238 | seq = g.seqMin
239 | }
240 |
241 | g.applyTimestamp(&id, uint64(t.UnixNano()-epochNsec)/TimeUnit, 0)
242 | g.applyPayload(&id, meta, seq)
243 |
244 | return
245 | }
246 |
247 | // Partition returns the fixed identifier of the Generator.
248 | func (g *Generator) Partition() Partition {
249 | return partitionToPublicRepr(g.partition)
250 | }
251 |
252 | // Sequence returns the current sequence the Generator is at.
253 | //
254 | // This does *not* mean that if one were to call New() right now, the generated ID
255 | // will necessarily get this sequence, as other things may happen before.
256 | //
257 | // If the next call to New() would result in a reset of the sequence, SequenceMin
258 | // is returned instead of the current internal sequence.
259 | //
260 | // If the generator is currently overflowing, the sequence returned will be higher than
261 | // the generator's SequenceMax (thus a uint32 return type), meaning it can be used to
262 | // determine the current overflow via:
263 | // overflow := int(uint32(generator.SequenceMax()) - generator.Sequence())
264 | func (g *Generator) Sequence() uint32 {
265 | if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) {
266 | return atomic.LoadUint32(&g.seq)
267 | }
268 |
269 | return g.seqMin
270 | }
271 |
272 | // SequenceMin returns the lower bound of the sequence pool of this generator.
273 | func (g *Generator) SequenceMin() uint16 {
274 | return uint16(g.seqMin)
275 | }
276 |
277 | // SequenceMax returns the upper bound of the sequence pool of this generator.
278 | func (g *Generator) SequenceMax() uint16 {
279 | return uint16(g.seqMax)
280 | }
281 |
282 | // Len returns the number of IDs generated in the current timeframe.
283 | func (g *Generator) Len() int {
284 | if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) {
285 | if seq := atomic.LoadUint32(&g.seq); g.seqMax > seq {
286 | return int(seq-g.seqMin) + 1
287 | }
288 |
289 | return g.Cap()
290 | }
291 |
292 | return 0
293 | }
294 |
295 | // Cap returns the total capacity of the Generator.
296 | //
297 | // To get its current capacity (e.g. number of possible additional IDs in the current
298 | // timeframe), simply:
299 | // spare := generator.Cap() - generator.Len()
300 | // The result will always be non-negative.
301 | func (g *Generator) Cap() int {
302 | return int(g.seqMax-g.seqMin) + 1
303 | }
304 |
305 | // Snapshot returns a copy of the Generator's current bookkeeping data.
306 | func (g *Generator) Snapshot() GeneratorSnapshot {
307 | var (
308 | wallNow = snotime()
309 | wallHi = atomic.LoadUint64(&g.wallHi)
310 | seq uint32
311 | )
312 |
313 | // Be consistent with g.Sequence() and return seqMin if the next call to New()
314 | // would reset the sequence.
315 | if wallNow == wallHi {
316 | seq = atomic.LoadUint32(&g.seq)
317 | } else {
318 | seq = g.seqMin
319 | }
320 |
321 | return GeneratorSnapshot{
322 | Partition: partitionToPublicRepr(g.partition),
323 | SequenceMin: uint16(g.seqMin),
324 | SequenceMax: uint16(g.seqMax),
325 | Sequence: seq,
326 | Now: int64(wallNow),
327 | WallHi: int64(wallHi),
328 | WallSafe: int64(atomic.LoadUint64(&g.wallSafe)),
329 | Drifts: atomic.LoadUint32(&g.drifts),
330 | }
331 | }
332 |
333 | func (g *Generator) applyTimestamp(id *ID, units uint64, tick uint32) {
334 | // Equivalent to...
335 | //
336 | // id[0] = byte(units >> 31)
337 | // id[1] = byte(units >> 23)
338 | // id[2] = byte(units >> 15)
339 | // id[3] = byte(units >> 7)
340 | // id[4] = byte(units << 1) | byte(tick)
341 | //
342 | // ... and slightly wasteful as we're storing 3 bytes that will get overwritten
343 | // via applyPartition but unlike the code above, the calls to binary.BigEndian.PutUintXX()
344 | // are compiler assisted and boil down to essentially a load + shift + bswap (+ a nop due
345 | // to midstack inlining), which we prefer over the roughly 16 instructions otherwise.
346 | // If applyTimestamp() was implemented straight in assembly, we'd not get it inline.
347 | binary.BigEndian.PutUint64(id[:], units<<25|uint64(tick)<<24)
348 | }
349 |
350 | func (g *Generator) applyPayload(id *ID, meta byte, seq uint32) {
351 | id[5] = meta
352 | binary.BigEndian.PutUint32(id[6:], g.partition|seq)
353 | }
354 |
355 | func (g *Generator) seqOverflowLoop() {
356 | var (
357 | retryNotify bool
358 | ticks uint32
359 | )
360 |
361 | for t := range g.seqOverflowTicker.C {
362 | g.seqOverflowCond.L.Lock()
363 |
364 | if g.seqOverflowChan != nil {
365 | // We only ever count ticks when we've got a notification channel up.
366 | // Even if we're at a count of 0 but on our first tick, it means the generator declogged already,
367 | // but we still notify that it happened.
368 | ticks++
369 | if retryNotify || g.seqOverflowCount == 0 || ticks%4 == 1 {
370 | select {
371 | case g.seqOverflowChan <- &SequenceOverflowNotification{
372 | Now: t,
373 | Ticks: ticks,
374 | Count: g.seqOverflowCount,
375 | }:
376 | retryNotify = false
377 |
378 | default:
379 | // Simply drop the message for now but try again the next tick already
380 | // instead of waiting for the full interval.
381 | retryNotify = true
382 | }
383 | }
384 | }
385 |
386 | if g.seqOverflowCount == 0 {
387 | g.seqOverflowTicker.Stop()
388 | g.seqOverflowTicker = nil
389 | g.seqOverflowCond.L.Unlock()
390 |
391 | return
392 | }
393 |
394 | // At this point we can unlock already because we don't touch any shared data anymore.
395 | // The broadcasts further don't require us to hold the lock.
396 | g.seqOverflowCond.L.Unlock()
397 |
398 | // Under normal behaviour high load would trigger an overflow and load would remain roughly
399 | // steady, so a seq reset will simply get triggered by a time change happening in New().
400 | // The actual callers are in a pessimistic loop and will check the condition themselves again.
401 | if g.seqMax >= atomic.LoadUint32(&g.seq) {
402 | g.seqOverflowCond.Broadcast()
403 |
404 | continue
405 | }
406 |
407 | // Handles an edge case where we've got calls locked on an overflow and suddenly no more
408 | // calls to New() come in, meaning there's no one to actually reset the sequence.
409 | var (
410 | wallNow = uint64(t.UnixNano()-epochNsec) / TimeUnit
411 | wallHi = atomic.LoadUint64(&g.wallHi)
412 | )
413 |
414 | if wallNow > wallHi {
415 | atomic.StoreUint32(&g.seq, g.seqMin)
416 | g.seqOverflowCond.Broadcast()
417 |
418 | continue // Left for readability of flow.
419 | }
420 | }
421 | }
422 |
423 | // Arbitrary min pool size of 4 per time unit (that is 1000 per sec).
424 | // Separated out as a constant as this value is being tested against.
425 | const minSequencePoolSize = 4
426 |
427 | func sanitizeSnapshotBounds(s *GeneratorSnapshot) error {
428 | // Zero value of SequenceMax will pass as the default max if and only if SequenceMin is not already
429 | // default max (as the range can be defined in either order).
430 | if s.SequenceMax == 0 && s.SequenceMin != MaxSequence {
431 | s.SequenceMax = MaxSequence
432 | }
433 |
434 | if s.SequenceMin == s.SequenceMax {
435 | return invalidSequenceBounds(s, errSequenceBoundsIdenticalMsg)
436 | }
437 |
438 | // Allow bounds to be given in any order.
439 | if s.SequenceMax < s.SequenceMin {
440 | s.SequenceMin, s.SequenceMax = s.SequenceMax, s.SequenceMin
441 | }
442 |
443 | if s.SequenceMax-s.SequenceMin-1 < minSequencePoolSize {
444 | return invalidSequenceBounds(s, errSequencePoolTooSmallMsg)
445 | }
446 |
447 | // Allow zero value to pass as a default of the lower bound.
448 | if s.Sequence == 0 {
449 | s.Sequence = uint32(s.SequenceMin)
450 | }
451 |
452 | if s.Sequence < uint32(s.SequenceMin) {
453 | return invalidSequenceBounds(s, errSequenceUnderflowsBound)
454 | }
455 |
456 | return nil
457 | }
458 |
459 | func invalidSequenceBounds(s *GeneratorSnapshot, msg string) *InvalidSequenceBoundsError {
460 | return &InvalidSequenceBoundsError{
461 | Cur: s.Sequence,
462 | Min: s.SequenceMin,
463 | Max: s.SequenceMax,
464 | Msg: msg,
465 | }
466 | }
467 |
--------------------------------------------------------------------------------
/generator_test.go:
--------------------------------------------------------------------------------
1 | // +build test
2 |
3 | package sno
4 |
5 | import (
6 | "fmt"
7 | "sync"
8 | "sync/atomic"
9 | "testing"
10 | "time"
11 | _ "unsafe"
12 |
13 | "github.com/muyo/sno/internal"
14 | )
15 |
16 | // snotime is the actual time source used by Generators during tests.
17 | //
18 | // We split on build tags ("test") to swap out the snotime() implementations provided by platform specific
19 | // code so that tests can use mocked time sources without in any way impacting a Generator's runtime performance
20 | // in production builds.
21 | //
22 | // Note: Attempting to run the test suite without the "test" build tag will fail, resulting in several
23 | // compilation errors.
24 | var snotime = internal.Snotime
25 |
26 | // monotime provides real monotonic clock readings to several tests.
27 | //go:linkname monotime runtime.nanotime
28 | func monotime() int64
29 |
30 | // staticTime provides tests with a fake time source which returns a fixed time on each call.
31 | // The time returned can be changed by directly (atomically) mutating the underlying variable.
32 | func staticTime() uint64 {
33 | return atomic.LoadUint64(staticWallNow)
34 | }
35 |
36 | // staticIncTime provides tests with a fake time source which returns a time based on a fixed time
37 | // monotonically increasing by 1 TimeUnit on each call.
38 | func staticIncTime() uint64 {
39 | wall := atomic.LoadUint64(staticWallNow) + atomic.LoadUint64(staticInc)*TimeUnit
40 |
41 | atomic.AddUint64(staticInc, 1)
42 |
43 | return wall
44 | }
45 |
46 | var (
47 | staticInc = new(uint64)
48 | staticWallNow = func() *uint64 {
49 | wall := snotime()
50 | return &wall
51 | }()
52 | )
53 |
54 | func TestGenerator_NewNoOverflow(t *testing.T) {
55 | var (
56 | part = Partition{255, 255}
57 | seqPool = uint16(MaxSequence / 2)
58 | seqMin = seqPool
59 | seqMax = 2*seqPool - 1
60 |
61 | // Scaled to not exceed bounds, otherwise we run into the seqOverflow race and order - which we
62 | // test for in here - becomes non-deterministic.
63 | sampleSize = int(seqPool)
64 | g, err = NewGenerator(&GeneratorSnapshot{
65 | Partition: part,
66 | SequenceMin: seqMin,
67 | SequenceMax: seqMax,
68 | }, nil)
69 | )
70 |
71 | if err != nil {
72 | t.Fatal(err)
73 | }
74 |
75 | ids := make([]ID, sampleSize)
76 | for i := 0; i < sampleSize; i++ {
77 | ids[i] = g.New(byte(i))
78 | }
79 |
80 | for i := 1; i < sampleSize; i++ {
81 | curID, prevID := ids[i], ids[i-1]
82 |
83 | seq := ids[i].Sequence()
84 | if seq > seqMax {
85 | t.Errorf("%d: sequence overflowing max boundary; max [%d], got [%d]", i, seqMin, seq)
86 | }
87 |
88 | if seq < seqMin {
89 | t.Errorf("%d: sequence underflowing min boundary; min [%d], got [%d]", i, seqMin, seq)
90 | }
91 |
92 | // We're expecting the time to increment and never more than by one time unit, since
93 | // we generated them in sequence.
94 | timeDiff := curID.Timestamp() - prevID.Timestamp()
95 |
96 | // Check if drift got applied in this edge case.
97 | if timeDiff < 0 && curID[4]&1 == 0 {
98 | t.Error("timestamp of next ID lower than previous and no tick-tock applied")
99 | }
100 |
101 | if timeDiff > TimeUnit {
102 | t.Error("timestamp diff between IDs is higher than by one time unit")
103 | }
104 |
105 | if prevID.Partition() != part {
106 | t.Errorf("%d: partition differs from generator's partition; expected [%d], got [%d]", i, part, prevID.Partition())
107 | }
108 | }
109 | }
110 |
111 | func TestGenerator_NewOverflows(t *testing.T) {
112 | var (
113 | part = Partition{255, 255}
114 | seqPool = 512
115 | seqOverflows = 16
116 | seqMin = uint16(seqPool)
117 | seqMax = uint16(2*seqPool - 1)
118 | sampleSize = seqPool * seqOverflows
119 |
120 | c = make(chan *SequenceOverflowNotification)
121 | cc = make(chan struct{})
122 | notesHi = new(int64)
123 |
124 | g, err = NewGenerator(&GeneratorSnapshot{
125 | Partition: part,
126 | SequenceMin: seqMin,
127 | SequenceMax: seqMax,
128 | }, c)
129 | )
130 |
131 | if err != nil {
132 | t.Fatal(err)
133 | }
134 |
135 | go func() {
136 | for {
137 | select {
138 | case note := <-c:
139 | if note.Count > 0 {
140 | atomic.AddInt64(notesHi, 1)
141 | }
142 | case <-cc:
143 | return
144 | }
145 | }
146 | }()
147 |
148 | ids := make([]ID, sampleSize)
149 | for i := 0; i < sampleSize; i++ {
150 | ids[i] = g.New(byte(i))
151 | }
152 |
153 | close(cc)
154 |
155 | // TODO(alcore) The non-blocking writes are far from reliable. The notifications need a rework with
156 | // deep profiling.
157 | if atomic.LoadInt64(notesHi) < int64(seqOverflows)/4 {
158 | t.Errorf("expected at least [%d] overflow notification, got [%d]", seqOverflows/4, atomic.LoadInt64(notesHi))
159 | }
160 |
161 | timeDist := make(map[int64]int)
162 |
163 | for i := 0; i < sampleSize; i++ {
164 | id := ids[i]
165 | timeDist[id.Timestamp()]++
166 |
167 | seq := id.Sequence()
168 | if seq > seqMax {
169 | t.Errorf("%d: sequence overflowing max boundary; max [%d], got [%d]", i, seqMin, seq)
170 | }
171 |
172 | if seq < seqMin {
173 | t.Errorf("%d: sequence underflowing min boundary; min [%d], got [%d]", i, seqMin, seq)
174 | }
175 |
176 | if id.Partition() != part {
177 | t.Errorf("%d: partition differs from generator's partition; expected [%d], got [%d]", i, part, id.Partition())
178 | }
179 | }
180 |
181 | for tf, c := range timeDist {
182 | if c > seqPool {
183 | t.Errorf("count of IDs in the given timeframe exceeds pool; timestamp [%d], pool [%d], count [%d]", tf, seqPool, c)
184 | }
185 | }
186 | }
187 |
188 | func TestGenerator_NewTickTocks(t *testing.T) {
189 | g, ids := testGeneratorNewTickTocksSetup(t)
190 | t.Run("Tick", testGeneratorNewTickTocksTick(g, ids))
191 | t.Run("SafetySlumber", testGeneratorNewTickTocksSafetySlumber(g, ids))
192 | t.Run("Tock", testGeneratorNewTickTocksTock(g, ids))
193 | t.Run("Race", testGeneratorNewTickTocksRace(g, ids))
194 | }
195 |
196 | func testGeneratorNewTickTocksSetup(t *testing.T) (*Generator, []ID) {
197 | var (
198 | seqPool = 8096
199 | g, err = NewGenerator(&GeneratorSnapshot{
200 | Partition: Partition{255, 255},
201 | SequenceMin: uint16(seqPool),
202 | SequenceMax: uint16(2*seqPool - 1),
203 | }, nil)
204 | )
205 | if err != nil {
206 | t.Fatal(err)
207 | }
208 |
209 | return g, make([]ID, g.Cap())
210 | }
211 |
212 | func testGeneratorNewTickTocksTick(g *Generator, ids []ID) func(*testing.T) {
213 | return func(t *testing.T) {
214 | // First batch follows normal time progression.
215 | for i := 0; i < 512; i++ {
216 | ids[i] = g.New(255)
217 | }
218 |
219 | wall := snotime()
220 | atomic.StoreUint64(staticWallNow, wall-TimeUnit)
221 |
222 | // Swap out the time source. Next batch is supposed to set a drift, have their tick-tock bit
223 | // set to 1, and wallSafe on the generator must be set accordingly.
224 | snotime = staticTime
225 |
226 | if atomic.LoadUint32(&g.drifts) != 0 {
227 | t.Errorf("expected [0] drifts recorded, got [%d]", atomic.LoadUint32(&g.drifts))
228 | }
229 |
230 | if atomic.LoadUint64(&g.wallSafe) != 0 {
231 | t.Errorf("expected wallSafe to be [0], is [%d]", atomic.LoadUint64(&g.wallSafe))
232 | }
233 |
234 | for j := 512; j < 1024; j++ {
235 | ids[j] = g.New(255)
236 | }
237 |
238 | if atomic.LoadUint32(&g.drifts) != 1 {
239 | t.Errorf("expected [1] drift recorded, got [%d]", atomic.LoadUint32(&g.drifts))
240 | }
241 |
242 | if atomic.LoadUint64(&g.wallSafe) == atomic.LoadUint64(staticWallNow) {
243 | t.Errorf("expected wallSafe to be [%d], was [%d]", atomic.LoadUint64(staticWallNow), atomic.LoadUint64(&g.wallSafe))
244 | }
245 |
246 | for i := 0; i < 512; i++ {
247 | if ids[i][4]&1 != 0 {
248 | t.Errorf("%d: expected tick-tock bit to not be set, was set", i)
249 | }
250 | }
251 |
252 | for j := 512; j < 1024; j++ {
253 | if ids[j][4]&1 != 1 {
254 | t.Errorf("%d: expected tick-tock bit to be set, was not", j)
255 | }
256 | }
257 |
258 | snotime = internal.Snotime
259 | }
260 | }
261 |
262 | func testGeneratorNewTickTocksSafetySlumber(g *Generator, ids []ID) func(*testing.T) {
263 | return func(t *testing.T) {
264 | // Multi-regression, checking on a single goroutine.
265 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1))
266 |
267 | // Use a clock where the first call will return the static clock times
268 | // but subsequent calls will return higher times. Since we didn't adjust the mono clock
269 | // at all insofar, it's currently 1 TimeUnit (first drift) behind wallSafe, which got set
270 | // during the initial drift. This is the time the next generation call(s) are supposed
271 | // to sleep, as we are simulating a multi-regression (into an unsafe past where can't
272 | // tick-tock again until reaching wallSafe).
273 | snotime = staticIncTime
274 |
275 | mono1 := monotime()
276 | id := g.New(255)
277 | if id[4]&1 != 1 {
278 | t.Errorf("expected tick-tock bit to be set, was not")
279 | }
280 | mono2 := monotime()
281 |
282 | monoDiff := mono2 - mono1
283 |
284 | // We had 2 regressions by 1 TimeUnit each, so sleep duration should've been roughly
285 | // the same since time was static (got incremented only after the sleep).
286 | if monoDiff < 2*TimeUnit {
287 | t.Errorf("expected to sleep for at least [%f]ns, took [%d] instead", 2*TimeUnit, monoDiff)
288 | } else if monoDiff > 5*TimeUnit {
289 | t.Errorf("expected to sleep for no more than [%f]ns, took [%d] instead", 5*TimeUnit, monoDiff)
290 | }
291 |
292 | if atomic.LoadUint32(&g.drifts) != 1 {
293 | t.Errorf("expected [1] drift recorded, got [%d]", atomic.LoadUint32(&g.drifts))
294 | }
295 |
296 | snotime = internal.Snotime
297 | }
298 | }
299 |
300 | func testGeneratorNewTickTocksTock(g *Generator, ids []ID) func(*testing.T) {
301 | return func(t *testing.T) {
302 | // At this point we are going to simulate another drift, somewhere in the 'far' future,
303 | // with parallel load.
304 | snotime = staticTime
305 | atomic.AddUint64(staticWallNow, 100*TimeUnit)
306 |
307 | g.New(255) // Updates wallHi
308 |
309 | // Regress again. Not adjusting mono clock - calls below are supposed to simply drift - drift
310 | // count is supposed to end at 2 (since we're still using the same generator) and tick-tock
311 | // bit is supposed to be unset.
312 | atomic.AddUint64(staticWallNow, ^uint64(2*TimeUnit-1))
313 |
314 | var (
315 | batchCount = 4
316 | batchSize = g.Cap() / batchCount
317 | wg sync.WaitGroup
318 | )
319 |
320 | wg.Add(batchCount)
321 |
322 | for i := 0; i < batchCount; i++ {
323 | go func(mul int) {
324 | for i := mul * batchSize; i < mul*batchSize+batchSize; i++ {
325 | ids[i] = g.New(255)
326 | }
327 | wg.Done()
328 | }(i)
329 | }
330 |
331 | wg.Wait()
332 |
333 | if atomic.LoadUint32(&g.drifts) != 2 {
334 | t.Errorf("expected [2] drifts recorded, got [%d]", atomic.LoadUint32(&g.drifts))
335 | }
336 |
337 | for i := 0; i < g.Cap(); i++ {
338 | if ids[i][4]&1 != 0 {
339 | t.Errorf("%d: expected tick-tock bit to not be set, was set", i)
340 | }
341 | }
342 |
343 | snotime = internal.Snotime
344 | }
345 | }
346 |
347 | func testGeneratorNewTickTocksRace(g *Generator, ids []ID) func(*testing.T) {
348 | return func(*testing.T) {
349 | snotime = staticTime
350 |
351 | atomic.AddUint64(staticWallNow, 100*TimeUnit)
352 | g.New(255)
353 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1))
354 |
355 | var (
356 | wgOuter sync.WaitGroup
357 | wgInner sync.WaitGroup
358 | )
359 | wgOuter.Add(1000)
360 |
361 | wgInner.Add(1000)
362 | for i := 0; i < 1000; i++ {
363 | go func() {
364 | wgInner.Done()
365 | wgInner.Wait()
366 | for i := 0; i < 2; i++ {
367 | _ = g.New(byte(i))
368 | }
369 | wgOuter.Done()
370 | }()
371 | }
372 | wgOuter.Wait()
373 |
374 | snotime = internal.Snotime
375 | }
376 | }
377 |
378 | func TestGenerator_NewGeneratorRestoreRegressions(t *testing.T) {
379 | // First one we simply check that the times get applied at all. We get rid of the time
380 | // added while simulating the last drift.
381 | g, err := NewGenerator(nil, nil)
382 | if err != nil {
383 | t.Fatal(err)
384 | }
385 |
386 | // Reset the static clock.
387 | wall := snotime()
388 | snotime = staticTime
389 | atomic.StoreUint64(staticWallNow, wall)
390 |
391 | // Simulate a regression.
392 | g.New(255)
393 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1))
394 | g.New(255)
395 |
396 | snapshot := g.Snapshot()
397 |
398 | g, err = NewGenerator(&snapshot, nil)
399 | if err != nil {
400 | t.Fatal(err)
401 | }
402 |
403 | if uint64(snapshot.WallSafe) != atomic.LoadUint64(&g.wallSafe) {
404 | t.Errorf("expected [%d], got [%d]", snapshot.WallSafe, atomic.LoadUint64(&g.wallSafe))
405 | }
406 |
407 | if uint64(snapshot.WallHi) != atomic.LoadUint64(&g.wallHi) {
408 | t.Errorf("expected [%d], got [%d]", snapshot.WallHi, atomic.LoadUint64(&g.wallHi))
409 | }
410 |
411 | // Second test, with a snapshot taken "in the future" (relative to current wall clock time).
412 | wall = internal.Snotime()
413 | atomic.StoreUint64(staticWallNow, wall+100*TimeUnit)
414 |
415 | // Simulate another regression. Takes place in the future - we are going to take a snapshot
416 | // and create a generator using that snapshot, where the generator will use snotime (current time)
417 | // as comparison and is supposed to handle this as if it is in the past relative to the snapshot.
418 | g.New(255)
419 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1))
420 | g.New(255)
421 |
422 | snotime = internal.Snotime
423 |
424 | snapshot = g.Snapshot()
425 |
426 | g, err = NewGenerator(&snapshot, nil)
427 | if err != nil {
428 | t.Fatal(err)
429 | }
430 |
431 | if uint64(snapshot.WallSafe) != atomic.LoadUint64(&g.wallSafe) {
432 | t.Errorf("expected [%d], got [%d]", snapshot.WallSafe, atomic.LoadUint64(&g.wallSafe))
433 | }
434 |
435 | if wall > atomic.LoadUint64(&g.wallHi) {
436 | t.Errorf("expected smaller than [%d], got [%d]", wall, atomic.LoadUint64(&g.wallHi))
437 | }
438 | }
439 |
440 | func TestGenerator_NewWithTimeOverflows(t *testing.T) {
441 | var (
442 | part = Partition{255, 255}
443 | seqPool = 12
444 | seqOverflows = 4
445 | seqMin = uint16(seqPool)
446 | seqMax = uint16(2*seqPool - 1)
447 | sampleSize = seqPool * seqOverflows
448 |
449 | g, err = NewGenerator(&GeneratorSnapshot{
450 | Partition: part,
451 | SequenceMin: seqMin,
452 | SequenceMax: seqMax,
453 | }, nil)
454 | )
455 |
456 | if err != nil {
457 | t.Fatal(err)
458 | }
459 |
460 | tn := time.Now()
461 | pool := g.Cap()
462 |
463 | ids := make([]ID, sampleSize)
464 | for i := 0; i < sampleSize; i++ {
465 | ids[i] = g.NewWithTime(byte(i), tn)
466 | }
467 |
468 | timeDist := make(map[int64]int)
469 |
470 | for i, s := 0, 0; i < sampleSize; i, s = i+1, s+1 {
471 | id := ids[i]
472 | timeDist[id.Timestamp()]++
473 |
474 | seq := id.Sequence()
475 | if seq > seqMax {
476 | t.Errorf("%d: sequence overflowing max boundary; max [%d], got [%d]", i, seqMin, seq)
477 | }
478 |
479 | if seq < seqMin {
480 | t.Errorf("%d: sequence underflowing min boundary; min [%d], got [%d]", i, seqMin, seq)
481 | }
482 |
483 | // When we overflow with NewWithTime, the static sequence is supposed to roll over silently.
484 | if s == pool {
485 | s = 0
486 | } else if i > 0 && seq-ids[i-1].Sequence() != 1 {
487 | t.Errorf("%d: expected sequence to increment by 1, got [%d]", i, seq-ids[i-1].Sequence())
488 | }
489 |
490 | expectedSeq := uint16(s) + seqMin
491 | if seq != expectedSeq {
492 | t.Errorf("%d: expected sequence [%d], got [%d]", i, expectedSeq, seq)
493 | }
494 |
495 | if id.Partition() != part {
496 | t.Errorf("%d: partition differs from generator's partition; expected [%d], got [%d]", i, part, id.Partition())
497 | }
498 | }
499 |
500 | if len(timeDist) > 1 {
501 | t.Error("IDs generated with the same time ended up with different timestamps")
502 | }
503 |
504 | // Race test.
505 | var wg sync.WaitGroup
506 | wg.Add(1000)
507 | for i := 0; i < 1000; i++ {
508 | go func() {
509 | for i := 0; i < sampleSize; i++ {
510 | _ = g.NewWithTime(byte(i), tn)
511 | }
512 | wg.Done()
513 | }()
514 | }
515 | wg.Wait()
516 | }
517 |
518 | func TestGenerator_Uniqueness(t *testing.T) {
519 | var (
520 | collisions int
521 | setSize = 4 * MaxSequence
522 | )
523 |
524 | ids := make(map[ID]struct{}, setSize)
525 |
526 | for i := 1; i < setSize; i++ {
527 | id := generator.New(255)
528 | if _, found := ids[id]; found {
529 | collisions++
530 | } else {
531 | ids[id] = struct{}{}
532 | }
533 | }
534 |
535 | if collisions > 0 {
536 | t.Errorf("generated %d colliding IDs in a set of %d", collisions, setSize)
537 | }
538 | }
539 |
540 | func TestGenerator_Partition(t *testing.T) {
541 | expected := Partition{'A', 255}
542 | g, err := NewGenerator(&GeneratorSnapshot{
543 | Partition: expected,
544 | }, nil)
545 | if err != nil {
546 | t.Fatal(err)
547 | }
548 |
549 | actual := g.Partition()
550 | if actual != expected {
551 | t.Errorf("expected [%s], got [%s]", expected, actual)
552 | }
553 | }
554 |
555 | func TestGenerator_SequenceBounds(t *testing.T) {
556 | min := uint16(1024)
557 | max := uint16(2047)
558 | g, err := NewGenerator(&GeneratorSnapshot{
559 | SequenceMin: min,
560 | SequenceMax: max,
561 | }, nil)
562 | if err != nil {
563 | t.Fatal(err)
564 | }
565 |
566 | if actual, expected := g.SequenceMin(), min; actual != expected {
567 | t.Errorf("expected [%d], got [%d]", expected, actual)
568 | }
569 |
570 | if actual, expected := g.SequenceMax(), max; actual != expected {
571 | t.Errorf("expected [%d], got [%d]", expected, actual)
572 | }
573 |
574 | if actual, expected := g.Cap(), int(max-min)+1; actual != expected {
575 | t.Errorf("expected [%d], got [%d]", expected, actual)
576 | }
577 |
578 | if actual, expected := g.Len(), 0; actual != expected {
579 | t.Errorf("expected [%d], got [%d]", expected, actual)
580 | }
581 |
582 | for i := 0; i < 5; i++ {
583 | g.New(255)
584 | }
585 |
586 | if actual, expected := g.Len(), 5; actual != expected {
587 | t.Errorf("expected [%d], got [%d]", expected, actual)
588 | }
589 |
590 | g, err = NewGenerator(&GeneratorSnapshot{
591 | SequenceMin: 8,
592 | SequenceMax: 16,
593 | }, nil)
594 | if err != nil {
595 | t.Fatal(err)
596 | }
597 |
598 | // Simulate an overflow. All IDs over Cap() must be generated in a subsequent timeframe
599 | // meaning Len will reflect the count in the last frame.
600 | // TODO(alcore) This *can* occasionally fail as we are not using a deterministic time source,
601 | // meaning first batch can get split up if time changes during the test and then end up
602 | // spilling into the Len() we test for.
603 | for i := 0; i < g.Cap()+7; i++ {
604 | g.New(255)
605 | }
606 |
607 | if actual, expected := g.Len(), 7; actual != expected {
608 | t.Errorf("expected [%d], got [%d]", expected, actual)
609 | }
610 |
611 | g, err = NewGenerator(&GeneratorSnapshot{
612 | SequenceMin: 8,
613 | SequenceMax: 16,
614 | }, nil)
615 | if err != nil {
616 | t.Fatal(err)
617 | }
618 |
619 | for i := 0; i < g.Cap(); i++ {
620 | g.New(255)
621 | }
622 |
623 | if actual, expected := g.Len(), g.Cap(); actual != expected {
624 | t.Errorf("expected [%d], got [%d]", expected, actual)
625 | }
626 | }
627 |
628 | func TestGenerator_Sequence_Single(t *testing.T) {
629 | g, err := NewGenerator(nil, nil)
630 | if err != nil {
631 | t.Fatal(err)
632 | }
633 |
634 | expected0 := uint32(0)
635 | expected1 := expected0
636 | expected2 := expected1 + 1
637 | actual0 := g.Sequence()
638 | _ = g.New(255)
639 | actual1 := g.Sequence()
640 | _ = g.New(255)
641 | actual2 := g.Sequence()
642 |
643 | if actual0 != expected0 {
644 | t.Errorf("expected [%d], got [%d]", expected0, actual0)
645 | }
646 | if actual1 != expected1 {
647 | t.Errorf("expected [%d], got [%d]", expected1, actual1)
648 | }
649 | if actual2 != expected2 {
650 | t.Errorf("expected [%d], got [%d]", expected2, actual2)
651 | }
652 | }
653 |
654 | func TestGenerator_Sequence_Batch(t *testing.T) {
655 | g, err := NewGenerator(nil, nil)
656 | if err != nil {
657 | t.Fatal(err)
658 | }
659 |
660 | expected := uint32(9)
661 | for i := 0; i <= int(expected); i++ {
662 | _ = g.New(255)
663 | }
664 |
665 | actual := g.Sequence()
666 | if actual != expected {
667 | t.Errorf("expected [%d], got [%d]", expected, actual)
668 | }
669 | }
670 |
671 | func TestGenerator_FromSnapshot_Sequence(t *testing.T) {
672 | seq := uint32(1024)
673 | g, err := NewGenerator(&GeneratorSnapshot{
674 | SequenceMin: uint16(seq),
675 | Sequence: seq,
676 | }, nil)
677 | if err != nil {
678 | t.Fatal(err)
679 | }
680 |
681 | expected1 := seq
682 | expected2 := seq + 1
683 | _ = g.New(255)
684 | actual1 := g.Sequence()
685 | _ = g.New(255)
686 | actual2 := g.Sequence()
687 |
688 | if actual1 != expected1 {
689 | t.Errorf("expected [%d], got [%d]", expected1, actual1)
690 | }
691 | if actual2 != expected2 {
692 | t.Errorf("expected [%d], got [%d]", expected2, actual2)
693 | }
694 | }
695 |
696 | func TestGenerator_FromSnapshot_Pool_Defaults(t *testing.T) {
697 | t.Parallel()
698 |
699 | g, err := NewGenerator(&GeneratorSnapshot{
700 | SequenceMin: 0,
701 | SequenceMax: 0,
702 | }, nil)
703 | if err != nil {
704 | t.Fatal(err)
705 | }
706 |
707 | if g.SequenceMin() != 0 {
708 | t.Errorf("expected [%d], got [%d]", 0, g.SequenceMin())
709 | }
710 |
711 | if g.SequenceMax() != MaxSequence {
712 | t.Errorf("expected [%d], got [%d]", MaxSequence, g.SequenceMax())
713 | }
714 |
715 | // Max as default when min is given.
716 | g, err = NewGenerator(&GeneratorSnapshot{
717 | SequenceMin: 2048,
718 | }, nil)
719 | if err != nil {
720 | t.Fatal(err)
721 | }
722 |
723 | if g.SequenceMin() != 2048 {
724 | t.Errorf("expected [%d], got [%d]", 2048, g.SequenceMin())
725 | }
726 |
727 | if g.SequenceMax() != MaxSequence {
728 | t.Errorf("expected [%d], got [%d]", MaxSequence, g.SequenceMax())
729 | }
730 | }
731 |
732 | func TestGenerator_FromSnapshot_Pool_BoundsOrder(t *testing.T) {
733 | t.Parallel()
734 |
735 | g, err := NewGenerator(&GeneratorSnapshot{
736 | SequenceMin: 2048,
737 | SequenceMax: 1024,
738 | }, nil)
739 | if err != nil {
740 | t.Fatal(err)
741 | }
742 |
743 | if g.SequenceMin() != 1024 {
744 | t.Errorf("expected [%d], got [%d]", 1024, g.SequenceMin())
745 | }
746 |
747 | if g.SequenceMax() != 2048 {
748 | t.Errorf("expected [%d], got [%d]", 2048, g.SequenceMax())
749 | }
750 | }
751 |
752 | func TestGenerator_FromSnapshot_Pool_None(t *testing.T) {
753 | t.Parallel()
754 |
755 | bound := uint16(2048)
756 | _, err := NewGenerator(&GeneratorSnapshot{
757 | SequenceMin: bound,
758 | SequenceMax: bound,
759 | }, nil)
760 | if err == nil {
761 | t.Errorf("expected error, got none")
762 | return
763 | }
764 |
765 | verr, ok := err.(*InvalidSequenceBoundsError)
766 | if !ok {
767 | t.Errorf("expected error type [%T], got [%T]", &InvalidSequenceBoundsError{}, err)
768 | return
769 | }
770 |
771 | if verr.Msg != errSequenceBoundsIdenticalMsg {
772 | t.Errorf("expected error msg [%s], got [%s]", errSequenceBoundsIdenticalMsg, verr.Msg)
773 | }
774 |
775 | if verr.Min != bound {
776 | t.Errorf("expected [%d], got [%d]", bound, verr.Min)
777 | }
778 |
779 | if verr.Max != bound {
780 | t.Errorf("expected [%d], got [%d]", bound, verr.Max)
781 | }
782 |
783 | expectedMsg := fmt.Sprintf(errInvalidSequenceBoundsFmt, errSequenceBoundsIdenticalMsg, bound, 0, bound, 1)
784 | if verr.Error() != expectedMsg {
785 | t.Errorf("expected error msg [%s], got [%s]", expectedMsg, verr.Error())
786 | }
787 | }
788 |
789 | func TestGenerator_FromSnapshot_Pool_Size(t *testing.T) {
790 | t.Parallel()
791 |
792 | seqMin := uint16(0)
793 | seqMax := seqMin + minSequencePoolSize - 1
794 | _, err := NewGenerator(&GeneratorSnapshot{
795 | SequenceMin: seqMin,
796 | SequenceMax: seqMax,
797 | }, nil)
798 | if err == nil {
799 | t.Errorf("expected error, got none")
800 | return
801 | }
802 |
803 | verr, ok := err.(*InvalidSequenceBoundsError)
804 | if !ok {
805 | t.Errorf("expected error type [%T], got [%T]", &InvalidSequenceBoundsError{}, err)
806 | return
807 | }
808 |
809 | if verr.Msg != errSequencePoolTooSmallMsg {
810 | t.Errorf("expected error msg [%s], got [%s]", errSequencePoolTooSmallMsg, verr.Msg)
811 | }
812 |
813 | if verr.Min != seqMin {
814 | t.Errorf("expected [%d], got [%d]", seqMin, verr.Min)
815 | }
816 |
817 | if verr.Max != seqMax {
818 | t.Errorf("expected [%d], got [%d]", seqMax, verr.Max)
819 | }
820 |
821 | expectedMsg := fmt.Sprintf(errInvalidSequenceBoundsFmt, errSequencePoolTooSmallMsg, seqMin, 0, seqMax, seqMax-seqMin+1)
822 | if verr.Error() != expectedMsg {
823 | t.Errorf("expected error msg [%s], got [%s]", expectedMsg, verr.Error())
824 | }
825 | }
826 |
827 | func TestGenerator_FromSnapshot_Underflow(t *testing.T) {
828 | t.Parallel()
829 |
830 | seqMin := uint16(2048)
831 | seq := uint32(seqMin - 1)
832 | _, err := NewGenerator(&GeneratorSnapshot{
833 | SequenceMin: seqMin,
834 | Sequence: seq,
835 | }, nil)
836 | if err == nil {
837 | t.Errorf("expected error, got none")
838 | return
839 | }
840 |
841 | verr, ok := err.(*InvalidSequenceBoundsError)
842 | if !ok {
843 | t.Errorf("expected error type [%T], got [%T]", &InvalidSequenceBoundsError{}, err)
844 | return
845 | }
846 |
847 | if verr.Msg != errSequenceUnderflowsBound {
848 | t.Errorf("expected error msg [%s], got [%s]", errSequenceUnderflowsBound, verr.Msg)
849 | }
850 |
851 | if verr.Min != seqMin {
852 | t.Errorf("expected [%d], got [%d]", seqMin, verr.Min)
853 | }
854 |
855 | if verr.Cur != seq {
856 | t.Errorf("expected [%d], got [%d]", seq, verr.Cur)
857 | }
858 |
859 | expectedMsg := fmt.Sprintf(errInvalidSequenceBoundsFmt, errSequenceUnderflowsBound, seqMin, seq, MaxSequence, MaxSequence-seqMin+1)
860 | if verr.Error() != expectedMsg {
861 | t.Errorf("expected error msg [%s], got [%s]", expectedMsg, verr.Error())
862 | }
863 | }
864 |
865 | func TestGenerator_Snapshot(t *testing.T) {
866 | var (
867 | part = Partition{128, 255}
868 | seqMin = uint16(1024)
869 | seqMax = uint16(2047)
870 | seq = uint32(1024)
871 | )
872 |
873 | snap := &GeneratorSnapshot{
874 | Partition: part,
875 | SequenceMin: seqMin,
876 | SequenceMax: seqMax,
877 | Sequence: seq,
878 | }
879 |
880 | g, err := NewGenerator(snap, nil)
881 | if err != nil {
882 | t.Fatal(err)
883 | }
884 |
885 | actual := g.Snapshot()
886 | if actual.Sequence != seq {
887 | t.Errorf("expected [%d], got [%d]", seq, actual.Sequence)
888 | }
889 |
890 | atomic.AddUint32(&g.drifts, 1)
891 | wallNow := snotime()
892 | g.New(255) // First call will catch a zero wallHi and reset the sequence, while we want to measure an incr.
893 | g.New(255)
894 | actual = g.Snapshot()
895 |
896 | if uint64(actual.Now) != wallNow {
897 | t.Errorf("expected [%d], got [%d]", wallNow, actual.Now)
898 | }
899 |
900 | if uint64(actual.WallHi) != wallNow {
901 | t.Errorf("expected [%d], got [%d]", wallNow, actual.WallHi)
902 | }
903 |
904 | if actual.Drifts != 1 {
905 | t.Errorf("expected [%d], got [%d]", 1, actual.Drifts)
906 | }
907 |
908 | if actual.Sequence != seq+1 {
909 | t.Errorf("expected [%d], got [%d]", seq+1, actual.Sequence)
910 | }
911 |
912 | if actual.Partition != part {
913 | t.Errorf("expected [%s], got [%s]", part, actual.Partition)
914 | }
915 |
916 | if actual.SequenceMin != seqMin {
917 | t.Errorf("expected [%d], got [%d]", seqMin, actual.SequenceMin)
918 | }
919 |
920 | if actual.SequenceMax != seqMax {
921 | t.Errorf("expected [%d], got [%d]", seqMax, actual.SequenceMax)
922 | }
923 | }
924 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | A spec for **unique IDs in distributed systems** based on the Snowflake design, i.e. a coordination-based ID variant.
4 | It aims to be friendly to both machines and humans, compact, *versatile* and fast.
5 |
6 | This repository contains a **Go** package for generating such IDs.
7 |
8 | [](https://pkg.go.dev/github.com/muyo/sno?tab=doc)
9 | [](https://github.com/muyo/sno/releases)
10 | [](https://travis-ci.com/muyo/sno)
11 | [](https://codecov.io/gh/muyo/sno)
12 | [](https://goreportcard.com/report/github.com/muyo/sno)
13 | [](https://raw.githubusercontent.com/muyo/sno/master/LICENSE)
14 | ```bash
15 | go get -u github.com/muyo/sno
16 | ```
17 |
18 | ### Features
19 |
20 | - **Compact** - **10 bytes** in its binary representation, canonically [encoded](#encoding) as **16 characters**.
21 |
URL-safe and non-ambiguous encoding which also happens to be at the binary length of UUIDs -
22 | **sno**s can be stored as UUIDs in your database of choice.
23 | - **K-sortable** in either representation.
24 | - **[Embedded timestamp](#time-and-sequence)** with a **4msec resolution**, bounded within the years **2010 - 2079**.
25 |
Handles clock drifts gracefully, without waiting.
26 | - **[Embedded byte](#metabyte)** for arbitrary data.
27 | - **[Simple data layout](#layout)** - straightforward to inspect or encode/decode.
28 | - **[Optional and flexible](#usage)** configuration and coordination.
29 | - **[Fast](./benchmark#results)**, wait-free, safe for concurrent use.
30 |
Clocks in at about 500 LoC, has no external dependencies and minimal dependencies on std.
31 | - A pool of **≥ 16,384,000** IDs per second.
32 |
65,536 guaranteed unique IDs per 4msec per partition (65,536 combinations) per metabyte
33 | (256 combinations) per tick-tock (1 bit adjustment for clock drifts).
34 | **549,755,813,888,000** is the global pool **per second** when all components are taken into account.
35 |
36 | ### Non-features / cons
37 |
38 | - True randomness. **sno**s embed a counter and have **no entropy**. They are not suitable in a context where
39 | unpredictability of IDs is a must. They still, however, meet the common requirement of keeping internal counts
40 | (e.g. total number of entitites) unguessable and appear obfuscated;
41 | - Time precision. While *good enough* for many use cases, not quite there for others. The ➜ [Metabyte](#metabyte)
42 | can be used to get around this limitation, however.
43 | - It's 10 bytes, not 8. This is suboptimal as far as memory alignment is considered (platform dependent).
44 |
45 |
46 |
47 |
48 | ## Usage (➜ [API](https://pkg.go.dev/github.com/muyo/sno?tab=doc))
49 |
50 | **sno** comes with a package-level generator on top of letting you configure your own generators.
51 |
52 | Generating a new ID using the defaults takes no more than importing the package and:
53 |
54 | ```go
55 | id := sno.New(0)
56 | ```
57 |
58 | Where `0` is the ➜ [Metabyte](#metabyte).
59 |
60 | The global generator is immutable and private. It's therefore also not possible to restore it using a Snapshot.
61 | Its Partition is based on time and changes across restarts.
62 |
63 | ### Partitions (➜ [doc](https://pkg.go.dev/github.com/muyo/sno?tab=doc#Partition))
64 |
65 | As soon as you run more than 1 generator, you **should** start coordinating the creation of Generators to
66 | actually *guarantee* a collision-free ride. This applies to all specs of the Snowflake variant.
67 |
68 | Partitions are one of several friends you have to get you those guarantees. A Partition is 2 bytes.
69 | What they mean and how you define them is up to you.
70 |
71 | ```go
72 | generator, err := sno.NewGenerator(&sno.GeneratorSnapshot{
73 | Partition: sno.Partition{'A', 10}
74 | }, nil)
75 | ```
76 |
77 | Multiple generators can share a partition by dividing the sequence pool between
78 | them (➜ [Sequence sharding](#sequence-sharding)).
79 |
80 | ### Snapshots (➜ [doc](https://pkg.go.dev/github.com/muyo/sno?tab=doc#GeneratorSnapshot))
81 |
82 | Snapshots happen to serve both as configuration and a means of saving and restoring generator data. They are
83 | optional - simply pass `nil` to `NewGenerator()`, to get a Generator with sane defaults and a unique (in-process)
84 | Partition.
85 |
86 | Snapshots can be taken at runtime:
87 |
88 | ```go
89 | s := generator.Snapshot()
90 | ```
91 |
92 | This exposes most of a Generator's internal bookkeeping data. In an ideal world where programmers are not lazy
93 | until their system runs into an edge case - you'd persist that snapshot across restarts and restore generators
94 | instead of just creating them from scratch each time. This will keep you safe both if a large clock drift happens
95 | during the restart -- or before, and you just happen to come back online again "in the past", relative to IDs that
96 | had already been generated.
97 |
98 | A snapshot is a sample in time - it will very quickly get stale. Only take snapshots meant for restoring them
99 | later when generators are already offline - or for metrics purposes when online.
100 |
101 |
102 |
103 |
104 | ## Layout
105 |
106 | A **sno** is simply 80-bits comprised of two 40-bit blocks: the **timestamp** and the **payload**. The bytes are
107 | stored in **big-endian** order in all representations to retain their sortable property.
108 | 
109 | Both blocks can be inspected and mutated independently in either representation. Bits of the components in the binary
110 | representation don't spill over into other bytes which means no additional bit twiddling voodoo is necessary* to extract
111 | them.
112 |
113 | \*The tick-tock bit in the timestamp is the only exception (➜ [Time and sequence](#time-and-sequence)).
114 |
115 |
116 |
117 | ## Time and sequence
118 |
119 | ### Time
120 |
121 | **sno**s embed a timestamp comprised of 39 bits with the epoch **milliseconds at a 4msec resolution** (floored,
122 | unsigned) and one bit, the LSB of the entire block - for the tick-tock toggle.
123 |
124 | ### Epoch
125 |
126 | The **epoch is custom** and **constant**. It is bounded within `2010-01-01 00:00:00 UTC` and
127 | `2079-09-07 15:47:35.548 UTC`. The lower bound is `1262304000` seconds relative to Unix.
128 |
129 | If you *really* have to break out of the epoch - or want to store higher precision - the metabyte is your friend.
130 |
131 | ### Precision
132 |
133 | Higher precision *is not necessarily* a good thing. Think in dataset and sorting terms, or in sampling rates. You
134 | want to grab all requests with an error code of `403` in a given second, where the code may be encoded in the metabyte.
135 | At a resolution of 1 second, you binary search for just one index and then proceed straight up linearly.
136 | That's simple enough.
137 |
138 | At a resolution of 1msec however, you now need to find the corresponding 1000 potential starting offsets because
139 | your `403` requests are interleaved with the `200` requests (potentially). At 4msec, this is 250 steps.
140 |
141 | Everything has tradeoffs. This was a compromise between precision, size, simple data layout -- and factors like that above.
142 |
143 | ### Sequence
144 |
145 | **sno**s embed a sequence (2 bytes) that is **relative to time**. It does not overflow and resets on each new time
146 | unit (4msec). A higher sequence within a given timeframe **does not necessarily indicate order of creation**.
147 | It is not advertised as monotonic because its monotonicity is dependent on usage. A single generator writing
148 | to a single partition, *ceteris paribus*, *will* result in monotonic increments and *will* represent order of creation.
149 |
150 | With multiple writers in the same partition, increment order is *undefined*. If the generator moves back in time,
151 | the order will still be monotonic but sorted either 2msec after or before IDs previously already written at that
152 | time (see tick-tock).
153 |
154 | #### Sequence sharding
155 |
156 | The sequence pool has a range of `[0..65535]` (inclusive). **sno** supports partition sharing out of the box
157 | by further sharding the sequence - that is multiple writers (generators) in the same partition.
158 |
159 | This is done by dividing the pool between all writers, via user-specified bounds.
160 |
161 | A generator will reset to its lower bound on each new time unit - and will never overflow its upper bound.
162 | Collisions are therefore guaranteed impossible unless misconfigured and they overlap with another
163 | *currently online* generator.
164 |
165 |
166 |
167 | Star Trek: Voyager mode, How to shard sequences
168 |
169 |
170 | This can be useful when multiple containers on one physical machine are to write as a cluster to a partition
171 | defined by the machine's ID (or simpler - multiple processes on one host). Or if multiple remote
172 | services across the globe were to do that.
173 |
174 | ```go
175 | var PeoplePartition = sno.Partition{'P', 0}
176 |
177 | // In process/container/remote host #1
178 | generator1, err := sno.NewGenerator(&sno.GeneratorSnapshot{
179 | Partition: PeoplePartition,
180 | SequenceMin: 0,
181 | SequenceMax: 32767 // 32768 - 1
182 | }, nil)
183 |
184 | // In process/container/remote host #2
185 | generator2, err := sno.NewGenerator(&sno.GeneratorSnapshot{
186 | Partition: PeoplePartition,
187 | SequenceMin: 32768,
188 | SequenceMax: 65535 // 65536 - 1
189 | }, nil)
190 | ```
191 |
192 | You will notice that we have simply divided our total pool of 65,536 into 2 even and **non-overlapping**
193 | sectors. In the first snapshot `SequenceMin` could be omitted - and `SequenceMax` in the second, as those are the
194 | defaults used when they are not defined. You will get an error when trying to set limits above the capacity of
195 | generators, but since the library is oblivious to your setup - it cannot warn you about overlaps and cannot
196 | resize on its own either.
197 |
198 | The pools can be defined arbitrarily - as long as you make sure they don't overlap across *currently online*
199 | generators.
200 |
201 | It is safe for a range previously used by another generator to be assigned to a different generator under the
202 | following conditions:
203 | - it happens in a different timeframe *in the future*, i.e. no sooner than after 4msec have passed (no orchestrator
204 | is fast enough to get a new container online to replace a dead one for this to be a worry);
205 | - if you can guarantee the new Generator won't regress into a time the previous Generator was running in.
206 |
207 | If you create the new Generator using a Snapshot of the former as it went offline, you do not need to worry about those
208 | conditions and can resume writing to the same range immediately - the obvious tradeoff being the need to coordinate
209 | the exchange of Snapshots.
210 |
211 | If your clusters are always fixed size - reserving ranges is straightforward. With dynamic sizes, a potential simple
212 | scheme is to reserve the lower byte of the partition for scaling. Divide your sequence pool by, say, 8, keep
213 | assigning higher ranges until you hit your divider. When you do, increment partition by 1, start assigning
214 | ranges from scratch. This gave us 2048 identifiable origins by using just one byte of the partition.
215 |
216 | That said, the partition pool available is large enough that the likelihood you'll ever *need*
217 | this is slim to none. Suffice to know you *can* if you want to.
218 |
219 | Besides for guaranteeing a collision-free ride, this approach can also be used to attach more semantic meaning to
220 | partitions themselves, them being placed higher in the sort order.
221 | In other words - with it, the origin of an ID can be determined by inspecting the sequence
222 | alone, which frees up the partition for another meaning.
223 |
224 | How about...
225 |
226 | ```go
227 | var requestIDGenerator, _ = sno.NewGenerator(&GeneratorSnapshot{
228 | SequenceMax: 32767,
229 | }, nil)
230 |
231 | type Service byte
232 | type Call byte
233 |
234 | const (
235 | UsersSvc Service = 1
236 | UserList Call = 1
237 | UserCreate Call = 2
238 | UserDelete Call = 3
239 | )
240 |
241 | func genRequestID(svc Service, methodID Call) sno.ID {
242 | id := requestIDGenerator.New(byte(svc))
243 | // Overwrites the upper byte of the fixed partition.
244 | // In our case - we didn't define it but gave a non-nil snapshot, so it is {0, 0}.
245 | id[6] = byte(methodID)
246 |
247 | return id
248 | }
249 | ```
250 |
251 |
252 |
253 |
254 | #### Sequence overflow
255 |
256 | Remember that limiting the sequence pool also limits max throughput of each generator. For an explanation on what
257 | happens when you're running at or over capacity, see the details below or take a look at ➜ [Benchmarks](#benchmarks)
258 | which explains the numbers involved.
259 |
260 |
261 | Star Trek: Voyager mode, Behaviour on sequence overflow
262 |
263 |
264 | The sequence never overflows and the generator is designed with a single-return `New()` method that does not return
265 | errors nor invalid IDs. *Realistically* the default generator will never overflow simply because you won't saturate
266 | the capacity.
267 |
268 | But since you can set bounds yourself, the capacity could shrink to `4` per 4msec (smallest allowed).
269 | Now that's more likely. So when you start overflowing, the generator will *stall* and *pray* for a
270 | reduction in throughput sometime in the near future.
271 |
272 | From **sno**'s persective requesting more IDs than it can safely give you **immediately** is not an error - but
273 | it *may* require correcting on *your end*. And you should know about that. Therefore, if
274 | you want to know when it happens - simply give **sno** a channel along with its configuration snapshot.
275 |
276 | When a thread requests an ID and gets stalled, **once** per time unit, you will get a `SequenceOverflowNotification`
277 | on that channel.
278 |
279 | ```go
280 | type SequenceOverflowNotification struct {
281 | Now time.Time // Time of tick.
282 | Count uint32 // Number of currently overflowing generation calls.
283 | Ticks uint32 // For how many ticks in total we've already been dealing with the *current* overflow.
284 | }
285 | ```
286 | Keep track of the counter. If it keeps increasing, you're no longer bursting - you're simply over capacity
287 | and *eventually* need to slow down or you'll *eventually* starve your system. The `Ticks` count lets you estimate
288 | how long the generator has already been overflowing without keeping track of time yourself. A tick is *roughly* 1ms.
289 |
290 | The order of generation when stalling occurs is `undefined`. It is not a FIFO queue, it's a race. Previously stalled
291 | goroutines get woken up alongside inflight goroutines which have not yet been stalled, where the order of the former is
292 | handled by the runtime. A livelock is therefore possible if demand doesn't decrease. This behaviour *may* change and
293 | inflight goroutines *may* get thrown onto the stalling wait list if one is up and running, but this requires careful
294 | inspection. And since this is considered an unrealistic scenario which can be avoided with simple configuration,
295 | it's not a priority.
296 |
297 |
298 |
299 |
300 | #### Clock drift and the tick-tock toggle
301 |
302 | Just like all other specs that rely on clock times to resolve ambiguity, **sno**s are prone to clock drifts. But
303 | unlike all those others specs, **sno** adjusts itself to the new time - instead of waiting (blocking), it tick-tocks.
304 |
305 | **The tl;dr** applying to any system, really: ensure your deployments use properly synchronized system clocks
306 | (via NTP) to mitigate the *size* of drifts. Ideally, use a NTP server pool that applies
307 | a gradual [smear for leap seconds](https://developers.google.com/time/smear). Despite the original Snowflake spec
308 | suggesting otherwise, using NTP in slew mode (to avoid regressions entirely)
309 | [is not always a good idea](https://www.redhat.com/en/blog/avoiding-clock-drift-vms).
310 |
311 | Also remember that containers tend to get *paused* meaning their clocks are paused with them.
312 |
313 | As far as **sno**, collisions and performance are concerned, in typical scenarios you can enjoy a wait-free ride
314 | without requiring slew mode nor having to worry about even large drifts.
315 |
316 |
317 | Star Trek: Voyager mode, How tick-tocking works
318 |
319 |
320 | **sno** attempts to eliminate the issue *entirely* - both despite and because of its small pool of bits to work with.
321 |
322 | The approach it takes is simple - each generator keeps track of the highest wall clock time it got from the OS\*,
323 | each time it generates a new timestamp. If we get a time that is lower than the one we recorded, i.e. the clock
324 | drifted backwards and we'd risk generating colliding IDs, we toggle a bit - stored from here on out in
325 | each **sno** generated *until the next regression*. Rinse, repeat - tick, tock.
326 |
327 | (\*IDs created with a user-defined time are exempt from this mechanism as their time is arbitrary. The means
328 | to *bring your own time* are provided to make porting old IDs simpler and is assumed to be done before an ID
329 | scheme goes online)
330 |
331 | In practice this means that we switch back and forth between two alternating timelines. Remember how the pool
332 | we've got is 16,384,000 IDs per second? When we tick or tock, we simply jump between two pools with the same
333 | capacity.
334 |
335 | Why not simply use that bit to store a higher resolution time fraction? True, we'd get twice the pool which
336 | seemingly boils down to the same - except it doesn't. That is due to how the sequence increases. Even if you
337 | had a throughput of merely 1 ID per hour, while the chance would be astronomically low - if the clock drifted
338 | back that whole hour, you *could* get a collision. The higher your throughput, the bigger the probability.
339 | ID's of the Snowflake variant, **sno** being one of them, are about **guarantees - not probabilities**.
340 | So this is a **sno-go**.
341 |
342 | (I will show myself out...)
343 |
344 | The simplistic approach of tick-tocking *entirely eliminates* that collision chance - but with a rigorous assumption:
345 | regressions happen at most once into a specific period, i.e. from the highest recorded time into the past
346 | and never back into that particular timeframe (let alone even further into the past).
347 |
348 | This *generally* is exactly the case but oddities as far as time synchronization, bad clocks and NTP client
349 | behaviour goes *do* happen. And in distributed systems, every edge case that can happen - *will* happen. What do?
350 |
351 | ##### How others do it
352 |
353 | - [Sonyflake] goes to sleep until back at the wall clock time it was already at
354 | previously. All goroutines attempting to generate are blocked.
355 | - [snowflake] hammers the OS with syscalls to get the current time until back
356 | at the time it was already at previously. All goroutines attempting to generate are blocked.
357 | - [xid] goes ¯\\_(ツ)_/¯ and does not tackle drifts at all.
358 | - Entropy-based specs (like UUID or KSUID) don't really need to care as they are generally not prone, even to
359 | extreme drifts - you run with a risk all the time.
360 |
361 | The approach one library took was to keep generating, but timestamp all IDs with the highest time recorded instead.
362 | This worked, because it had a large entropy pool to work with, for one (so a potential large spike in IDs generated
363 | in the same timeframe wasn't much of a consideration). **sno** has none. But more importantly - it disagrees on the
364 | reasoning about time and clocks. If we moved backwards, it means that an *adjustment* happened and we are *now*
365 | closer to the *correct* time from the perspective of a wider system.
366 |
367 | **sno** therefore keeps generating without waiting, using the time as reported by the system - in the "past" so to
368 | speak, but with the tick-tock bit toggled.
369 |
370 | *If* another regression happens, into that timeframe or even further back, *only then* do we tell all contenders
371 | to wait. We get a wait-free fast path *most of the time* - and safety if things go southways.
372 |
373 | ##### Tick-tocking obviously affects the sort order as it changes the timestamp
374 |
375 | Even though the toggle is *not* part of the milliseconds, you can think of it as if it were. Toggling is then like
376 | moving two milliseconds back and forth, but since our milliseconds are floored to increments of 4msec, we never
377 | hit the range of a previous timeframe. Alternating timelines are as such sorted *as if* they were 2msec apart from
378 | each other, but as far as the actual stored time is considered - they are timestamped at exactly the same millisecond.
379 |
380 | They won't sort in an interleaved fashion, but will be *right next* to the other timeline. Technically they *were*
381 | created at a different time, so being able to make that distinction is considered a plus by the author.
382 |
383 |
384 |
385 |
386 |
387 | ## Metabyte
388 |
389 | The **metabyte** is unique to **sno** across the specs the author researched, but the concept of embedding metadata
390 | in IDs is an ancient one. It's effectively just a *byte-of-whatever-you-want-it-to-be* - but perhaps
391 | *8-bits-of-whatever-you-want-them-to-be* does a better job of explaining its versatility.
392 |
393 | ### `0` is a valid metabyte
394 |
395 | **sno** is agnostic as to what that byte represents and it is **optional**. None of the properties of **sno**s
396 | get violated if you simply pass a `0`.
397 |
398 | However, if you can't find use for it, then you may be better served using a different ID spec/library
399 | altogether (➜ [Alternatives](#alternatives)). You'd be wasting a byte that could give you benefits elsewhere.
400 |
401 | ### Why?
402 |
403 | Many databases, especially embedded ones, are extremely efficient when all you need is the keys - not all
404 | the data all those keys represent. None of the Snowflake-like specs would provide a means to do that without
405 | excessive overrides (or too small a pool to work with), essentially a different format altogether, and so - **sno**.
406 |
407 |
408 |
409 | And simple constants tend to do the trick.
410 |
411 |
412 |
413 | Untyped integers can pass as `uint8` (i.e. `byte`) in Go, so the following would work and keep things tidy:
414 |
415 | ```go
416 | const (
417 | PersonType = iota
418 | OtherType
419 | )
420 |
421 | type Person struct {
422 | ID sno.ID
423 | Name string
424 | }
425 |
426 | person := Person{
427 | ID: sno.New(PersonType),
428 | Name: "A Special Snöflinga",
429 | }
430 | ```
431 |
432 |
433 |
434 |
435 |
436 | *Information that describes something* has the nice property of also helping to *identify* something across a sea
437 | of possibilities. It's a natural fit.
438 |
439 | Do everyone a favor, though, and **don't embed confidential information**. It will stop being confidential and
440 | become public knowledge the moment you do that. Let's stick to *nice* property, avoiding `PEBKAC`.
441 |
442 | ### Sort order and placement
443 |
444 | The metabyte follows the timestamp. This clusters IDs by the timestamp and then by the metabyte (for example -
445 | the type of the entity), *before* the fixed partition.
446 |
447 | If you were to use machine-ID based partitions across a cluster generating, say, `Person` entities, where `Person`
448 | corresponds to a metabyte of `1` - this has the neat property of grouping all `People` generated across the entirety
449 | of your system in the given timeframe in a sortable manner. In database terms, you *could* think of the metabyte as
450 | identifying a table that is sharded across many partitions - or as part of a compound key. But that's just one of
451 | many ways it can be utilized.
452 |
453 | Placement at the beginning of the second block allows the metabyte to potentially both extend the timestamp
454 | block or provide additional semantics to the payload block. Even if you always leave it empty, sort
455 | order nor sort/insert performance won't be hampered.
456 |
457 | ### But it's just a single byte!
458 |
459 | A single byte is plenty.
460 |
461 |
462 | Here's a few ideas for things you did not know you wanted, yet.
463 |
464 |
465 | - IDs for requests in a HTTP context: 1 byte is enough to contain one of all possible standard HTTP status codes.
466 | *Et voila*, you now got all requests that resulted in an error nicely sorted and clustered.
467 |
Limit yourself to the non-exotic status codes and you can store the HTTP verb along with the status code.
468 | In that single byte. Suddenly even the partition (if it's tied to a machine/cluster) gains relevant semantics,
469 | as you've gained a timeseries of requests that started fail-cascading in the cluster. Constrain yourself even
470 | further to just one bit for `OK` or `ERROR` and you made room to also store information about the operation that
471 | was requested (think resource endpoint).
472 |
473 | - How about storing a (immutable) bitmask along with the ID? Save some 7 bytes of bools by doing so and have the
474 | flags readily available during an efficient sequential key traversal using your storage engine of choice.
475 |
476 | - Want to version-control a `Message`? Limit yourself to at most 256 versions and it becomes trivial. Take the ID
477 | of the last version created, increment its metabyte - and that's it. What you now have is effectively a simplistic
478 | versioning schema, where the IDs of all possible versions can be inferred without lookups, joins, indices and whatnot.
479 | And many databases will just store them *close* to each other. Locality is a thing.
480 |
How? The only part that changed was the metabyte. All other components remained the same, but we ended up with
481 | a new ID pointing to the most recent version. Admittedly the timestamp lost its default semantics of
482 | *moment of creation* and instead is *moment of creation of first version*, but you'd store a `revisedAt` timestamp
483 | anyways, wouldn't you?
And if you *really* wanted to support more versions - the IDs have certain properties
484 | that can be (ab)used for this. Increment this, decrement that...
485 |
486 | - Sometimes a single byte is all the data that you actually need to store, along with the time
487 | *when something happened*. Batch processing succeeded? `sno.New(0)`, done. Failed? `sno.New(1)`, done. You now
488 | have a uniquely identifiable event, know *when* and *where* it happened, what the outcome was - and you still
489 | had 7 spare bits (for higher precision time, maybe?)
490 |
491 | - Polymorphism has already been covered. Consider not just data storage, but also things like (un)marshaling
492 | polymorphic types efficiently. Take a JSON of `{id: "aaaaaaaa55aaaaaa", foo: "bar", baz: "bar"}`.
493 | The 8-th and 9-th (0-indexed) characters of the ID contain the encoded bits of the metabyte. Decode that
494 | (use one of the utilities provided by the library) and you now know what internal type the data should unmarshal
495 | to without first unmarshaling into an intermediary structure (nor rolling out a custom decoder for this type).
496 | There are many approaches to tackle this - an ID just happens to lend itself naturally to solve it and is easily
497 | portable.
498 |
499 | - 2 bytes for partitions not enough for your needs? Use a fixed byte as the metabyte -- you have extended the
500 | fixed partition to 3 bytes. Wrap a generator with a custom one to apply that metabyte for you each time you use it.
501 | The metabyte is, after all, part of the partition. It's just separated out for semantic purposes but its actual
502 | semantics are left to you.
503 |
504 |
505 |
506 |
507 |
508 | ## Encoding
509 |
510 | The encoding is a **custom base32** variant stemming from base32hex. Let's *not* call it *sno32*.
511 | A canonically encoded **sno** is a regexp of `[2-9a-x]{16}`.
512 |
513 | The following alphabet is used:
514 |
515 | ```
516 | 23456789abcdefghijklmnopqrstuvwx
517 | ```
518 |
519 | This is 2 contiguous ASCII ranges: `50..57` (digits) and `97..120` (*strictly* lowercase letters).
520 |
521 | On `amd64` encoding/decoding is vectorized and **[extremely fast](./benchmark#encodingdecoding)**.
522 |
523 |
524 |
525 | ## Alternatives
526 |
527 | | Name | Binary (bytes) | Encoded (chars)* | Sortable | Random** | Metadata | nsec/ID
528 | |------------:|:--------------:|:----------------:|:---------:|:---------:|:--------:|--------:
529 | | [UUID] | 16 | 36 | ![no] | ![yes] | ![no] | ≥36.3
530 | | [KSUID] | 20 | 27 | ![yes] | ![yes] | ![no] | 206.0
531 | | [ULID] | 16 | 26 | ![yes] | ![yes] | ![no] | ≥50.3
532 | | [Sandflake] | 16 | 26 | ![yes] | ![meh] | ![no] | 224.0
533 | | [cuid] | ![no] | 25 | ![yes] | ![meh] | ![no] | 342.0
534 | | [xid] | 12 | 20 | ![yes] | ![no] | ![no] | 19.4
535 | | **sno** | 10 | **16** | ![yes] | ![no] | ![yes] | **8.8**
536 | | [Snowflake] | **8** | ≤20 | ![yes] | ![no] | ![no] | 28.9
537 |
538 |
539 | [UUID]: https://github.com/gofrs/uuid
540 | [KSUID]: https://github.com/segmentio/ksuid
541 | [cuid]: https://github.com/lucsky/cuid
542 | [Snowflake]: https://github.com/bwmarrin/snowflake
543 | [Sonyflake]: https://github.com/sony/sonyflake
544 | [Sandflake]: https://github.com/celrenheit/sandflake
545 | [ULID]: https://github.com/oklog/ulid
546 | [xid]: https://github.com/rs/xid
547 |
548 | [yes]: ./.github/ico-yes.svg
549 | [meh]: ./.github/ico-meh.svg
550 | [no]: ./.github/ico-no.svg
551 |
552 | \* Using canonical encoding.
553 | \** When used with a proper CSPRNG. The more important aspect is the distinction between entropy-based and
554 | coordination-based IDs. [Sandflake] and [cuid] do contain entropy, but not sufficient to rely on entropy
555 | alone to avoid collisions (3 bytes and 4 bytes respectively).
556 |
557 | For performance results see ➜ [Benchmark](./benchmark). `≥` values given for libraries which provide more
558 | than one variant, whereas the fastest one is listed.
559 |
560 |
561 |
562 |
563 | ## Attributions
564 |
565 | **sno** is both based on and inspired by [xid] - more so than by the original Snowflake - but the changes it
566 | introduces are unfortunately incompatible with xid's spec.
567 |
568 | ## Further reading
569 |
570 | - [Original Snowflake implementation](https://github.com/twitter-archive/snowflake/tree/snowflake-2010) and
571 | [related post](https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake.html)
572 | - [Mongo ObjectIds](https://docs.mongodb.com/manual/reference/method/ObjectId/)
573 | - [Instagram: Sharding & IDs at Instagram](https://instagram-engineering.com/sharding-ids-at-instagram-1cf5a71e5a5c)
574 | - [Flickr: Ticket Servers: Distributed Unique Primary Keys on the Cheap](http://code.flickr.net/2010/02/08/ticket-servers-distributed-unique-primary-keys-on-the-cheap/)
575 | - [Segment: A brief history of the UUID](https://segment.com/blog/a-brief-history-of-the-uuid/) - about KSUID and the shortcomings of UUIDs.
576 | - [Farfetch: Unique integer generation in distributed systems](https://www.farfetchtechblog.com/en/blog/post/unique-integer-generation-in-distributed-systems) - uint32 utilizing Cassandra to coordinate.
577 |
578 | Also potentially of interest:
579 | - [Lamport timestamps](https://en.wikipedia.org/wiki/Lamport_timestamps) (vector/logical clocks)
580 | - [The Bloom Clock](https://arxiv.org/pdf/1905.13064.pdf) by Lum Ramabaja
581 |
--------------------------------------------------------------------------------