├── go.mod ├── .github ├── layout.png ├── logo_200x200.png ├── ico-meh.svg ├── ico-yes.svg └── ico-no.svg ├── internal ├── cpu_test.go ├── internal.go ├── internal_test.go ├── time_linux_amd64.go ├── cpu_amd64.s ├── time_test.go ├── time_windows_amd64.go ├── encoding_amd64.go ├── cpu_amd64.go ├── encoding_test.go ├── time.go ├── time_windows_amd64.s ├── encoding.go ├── cpu_amd64_test.go └── encoding_amd64.s ├── cmd └── sno │ ├── version.go │ ├── go.mod │ ├── go.sum │ ├── inspect.go │ ├── usage.go │ ├── main.go │ └── generate.go ├── benchmark ├── benchmark_test.go ├── go.mod ├── encoding.go ├── generation.go └── README.md ├── time.go ├── .travis.yml ├── LICENSE ├── errors.go ├── global.go ├── partition_test.go ├── partition.go ├── global_test.go ├── id.go ├── id_test.go ├── generator.go ├── generator_test.go └── README.md /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/muyo/sno 2 | 3 | go 1.14 4 | -------------------------------------------------------------------------------- /.github/layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muyo/sno/HEAD/.github/layout.png -------------------------------------------------------------------------------- /.github/logo_200x200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muyo/sno/HEAD/.github/logo_200x200.png -------------------------------------------------------------------------------- /internal/cpu_test.go: -------------------------------------------------------------------------------- 1 | // +build !amd64 2 | 3 | package internal 4 | 5 | import "testing" 6 | 7 | func testCPU(t *testing.T) {} 8 | -------------------------------------------------------------------------------- /.github/ico-meh.svg: -------------------------------------------------------------------------------- 1 | 4 | -------------------------------------------------------------------------------- /internal/internal.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | const ( 4 | // Duplicates root package due to circular imports. 5 | epochNsec = 1262304000 * 1e9 6 | timeUnit = 4e6 7 | ) 8 | -------------------------------------------------------------------------------- /cmd/sno/version.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | const ( 8 | ver = "1.1.0" 9 | ) 10 | 11 | func version() { 12 | _, _ = os.Stdout.Write([]byte(ver)) 13 | os.Exit(0) 14 | } 15 | -------------------------------------------------------------------------------- /internal/internal_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import "testing" 4 | 5 | func Test(t *testing.T) { 6 | t.Run("cpu", testCPU) 7 | t.Run("time", testSnotime) 8 | t.Run("encoding", testEncoding) 9 | } 10 | -------------------------------------------------------------------------------- /.github/ico-yes.svg: -------------------------------------------------------------------------------- 1 | 4 | -------------------------------------------------------------------------------- /benchmark/benchmark_test.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func Benchmark(b *testing.B) { 8 | b.Run("generation", benchmarkGeneration) 9 | b.Run("encoding", benchmarkEncoding) 10 | } 11 | -------------------------------------------------------------------------------- /cmd/sno/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/muyo/sno/cmd/sno 2 | 3 | go 1.14 4 | 5 | require github.com/muyo/sno v1.2.0 6 | require github.com/muyo/rush v0.0.0-20200227160314-2d77be2a525b 7 | 8 | replace github.com/muyo/sno => ../../ 9 | -------------------------------------------------------------------------------- /cmd/sno/go.sum: -------------------------------------------------------------------------------- 1 | github.com/muyo/rush v0.0.0-20200227160314-2d77be2a525b h1:PfRvR7nHUUuz5hf9m0ironGBFOC4lyApPtkp0Lh5GF4= 2 | github.com/muyo/rush v0.0.0-20200227160314-2d77be2a525b/go.mod h1:4zPkqWZT3vdQAqQtSTbhb1GXRMF0ZRioeH+6ffeyI/c= 3 | -------------------------------------------------------------------------------- /.github/ico-no.svg: -------------------------------------------------------------------------------- 1 | 4 | -------------------------------------------------------------------------------- /internal/time_linux_amd64.go: -------------------------------------------------------------------------------- 1 | //go:build go1.17 2 | // +build go1.17 3 | 4 | package internal 5 | 6 | import "time" 7 | 8 | // Snotime returns the current wall clock time reported by the OS as adjusted to our internal epoch. 9 | func Snotime() uint64 { 10 | return uint64(time.Now().UnixNano()-epochNsec) / timeUnit 11 | } 12 | -------------------------------------------------------------------------------- /internal/cpu_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "funcdata.h" 3 | 4 | // func cpuidReal(op uint32) (eax, ebx, ecx, edx uint32) 5 | TEXT ·cpuidReal(SB), NOSPLIT, $0-24 6 | MOVL op+0(FP), AX 7 | XORQ CX, CX 8 | CPUID 9 | MOVL AX, eax+8(FP) 10 | MOVL BX, ebx+12(FP) 11 | MOVL CX, ecx+16(FP) 12 | MOVL DX, edx+20(FP) 13 | RET 14 | -------------------------------------------------------------------------------- /benchmark/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/muyo/sno/benchmark 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/bwmarrin/snowflake v0.3.0 7 | github.com/celrenheit/sandflake v0.0.0-20190410195419-50a943690bc2 8 | github.com/gofrs/uuid v3.2.0+incompatible 9 | github.com/lucsky/cuid v1.0.2 10 | github.com/muyo/sno v1.1.0 11 | github.com/oklog/ulid v1.3.1 12 | github.com/rs/xid v1.2.1 13 | github.com/segmentio/ksuid v1.0.2 14 | github.com/sony/sonyflake v1.0.0 15 | ) 16 | 17 | replace github.com/muyo/sno => ../ 18 | -------------------------------------------------------------------------------- /time.go: -------------------------------------------------------------------------------- 1 | // +build !test 2 | 3 | package sno 4 | 5 | import "github.com/muyo/sno/internal" 6 | 7 | // snotime returns the current wall clock time reported by the OS as adjusted to our internal epoch. 8 | // 9 | // It is a thin wrapper over actual implementations provided separately by os/arch dependent code. 10 | // 11 | // Note: tests use a different implementation of snotime() which is dynamically dispatched 12 | // and does not necessarily call internal.Snotime(). 13 | func snotime() uint64 { 14 | return internal.Snotime() 15 | } 16 | -------------------------------------------------------------------------------- /internal/time_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func testSnotime(t *testing.T) { 9 | // Covers all arch/os combinations since they are expected to provide the snotime() function 10 | // to the rest of the package. 11 | // 12 | // Strictly speaking this test can be flaky if the time.Now() call happens to cross 13 | // the boundary between different TimeUnits, but that would just be really bad luck. 14 | actual := Snotime() 15 | expected := uint64(time.Now().UnixNano()-epochNsec) / timeUnit 16 | 17 | if actual != expected { 18 | t.Errorf("expected [%v], got [%v]", expected, actual) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /internal/time_windows_amd64.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | //go:noescape 4 | func ostime() uint64 5 | 6 | // Snotime returns the current wall clock time reported by the OS as adjusted to our internal epoch. 7 | func Snotime() uint64 { 8 | // Note: Division is left here instead of being impl in asm since the compiler optimizes this 9 | // into mul+shift, which is easier to read when left in as simple division. 10 | // This doesn't affect performance. The asm won't get inlined anyway while this function 11 | // will. 12 | // 13 | // 4e4 instead of TimeUnit (4e6) because the time we get from the OS is in units of 100ns. 14 | return ostime() / 4e4 15 | } 16 | -------------------------------------------------------------------------------- /cmd/sno/inspect.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/muyo/sno" 8 | ) 9 | 10 | const inspectFmt = ` 11 | -- Representations 12 | 13 | Encoded: %v 14 | Bytes: %v 15 | 16 | -- Components 17 | 18 | Time: %v 19 | Timestamp: %v 20 | Meta: %v 21 | Partition: %v 22 | Sequence: %v 23 | 24 | ` 25 | 26 | func inspect(in string) { 27 | id, err := sno.FromEncodedString(in) 28 | if err != nil { 29 | _, _ = os.Stderr.Write([]byte(fmt.Sprintf("Failed to inspect: [%s] does not appear to be a valid sno.\n", in))) 30 | os.Exit(1) 31 | } 32 | 33 | fmt.Printf(inspectFmt, 34 | id.String(), 35 | id[:], 36 | id.Time().UTC(), 37 | id.Timestamp(), 38 | id.Meta(), 39 | id.Partition().AsUint16(), 40 | id.Sequence(), 41 | ) 42 | 43 | os.Exit(0) 44 | } 45 | -------------------------------------------------------------------------------- /cmd/sno/usage.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | const usageFmt = ` 8 | sno generates compact, unique IDs with embedded metadata. 9 | 10 | Usage: 11 | 12 | sno [parameters ...] 13 | 14 | Commands: 15 | 16 | inspect Displays information about an ID and its components 17 | 18 | sno inspect 19 | 20 | generate Generates one or more IDs 21 | 22 | sno generate [options...] [number of IDs to generate] 23 | --meta= The metabyte to set on generated IDs, in decimal, max 255 24 | --partition= The partition to set on generated IDs, in decimal, max 65535 25 | 26 | version Displays the version of this program 27 | help Displays this information 28 | ` 29 | 30 | func usage() { 31 | _, _ = os.Stdout.Write([]byte(usageFmt)) 32 | os.Exit(0) 33 | } 34 | -------------------------------------------------------------------------------- /internal/encoding_amd64.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | // Encode returns the sno32-encoded representation of src as an array of 16 bytes. 4 | //go:noescape 5 | func Encode(src *[10]byte) (dst [16]byte) 6 | 7 | // Decode returns the binary representation of a sno32-encoded src as an array of bytes. 8 | // 9 | // Src does not get validated and must have a length of 16 - otherwise Decode will panic. 10 | //go:noescape 11 | func Decode(src []byte) (dst [10]byte) 12 | 13 | // One-shot to determine whether we've got SSE2 at all - and the SSE4.2 and BMI2 sets 14 | // that we need for the vectorized codecs. 15 | // 16 | // The fallbacks currently rely on SSE2 - while it's available on just about 17 | // any modern amd64 platform, *just in case* it's not, the check will fail loudly 18 | // and immediately (panic) instead of faulting on the first encode/decode attempt. 19 | var hasVectorSupport = checkVectorSupport() 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.x 5 | - "1.14" 6 | - master 7 | 8 | arch: 9 | - amd64 10 | 11 | os: 12 | - linux 13 | - osx 14 | - windows 15 | 16 | dist: bionic 17 | 18 | before_install: 19 | - go get github.com/mattn/goveralls 20 | 21 | script: 22 | - go test -v -tags=test -race -coverprofile=coverage.txt -covermode=atomic ./... 23 | 24 | after_success: 25 | - goveralls -coverprofile=coverage.txt -service=travis-ci 26 | - bash <(curl -s https://codecov.io/bash) -f coverage.txt 27 | 28 | notifications: 29 | webhooks: https://coveralls.io/webhook 30 | 31 | jobs: 32 | include: 33 | - os: linux 34 | arch: arm64 35 | go: "1.14" 36 | - os: linux 37 | arch: ppc64le 38 | go: "1.14" 39 | - os: linux 40 | arch: s390x 41 | go: "1.14" 42 | script: 43 | # Go's race detector does not work on s390x 44 | - go test -v -tags=test -coverprofile=coverage.txt -covermode=atomic ./... 45 | 46 | allow_failures: 47 | - go: "master" 48 | 49 | fast_finish: true 50 | -------------------------------------------------------------------------------- /internal/cpu_amd64.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | const cpuLacksSSE2ErrMsg = "sno: CPU does not seem to support SSE2 instructions required on amd64 platforms" 4 | 5 | func checkVectorSupport() bool { 6 | // We need a highest function parameter of at least 7 since we need 7 | // to check for BMI2 support as well. 8 | eax, _, _, _ := cpuid(0) 9 | if eax < 7 { 10 | if eax < 1 { 11 | panic(cpuLacksSSE2ErrMsg) 12 | } 13 | 14 | return false 15 | } 16 | 17 | _, _, ecx, edx := cpuid(1) 18 | if (edx & (1 << 26)) == 0 { 19 | panic(cpuLacksSSE2ErrMsg) 20 | } 21 | 22 | // c & 0x00000001 -> SSE3 23 | // c & 0x00000200 -> SSSE3 24 | // c & 0x00080000 -> SSE4 25 | // c & 0x00100000 -> SSE4.2 26 | if (ecx & 0x00180201) != 0x00180201 { 27 | return false 28 | } 29 | 30 | // b & 0x00000008 -> BMI1 31 | // b & 0x00000100 -> BMI2 32 | _, ebx, _, _ := cpuid(7) 33 | 34 | return (ebx & 0x00000108) == 0x00000108 35 | } 36 | 37 | // Gets temporarily swapped out with a mock during tests. 38 | var cpuid = cpuidReal 39 | 40 | //go:noescape 41 | func cpuidReal(op uint32) (eax, ebx, ecx, edx uint32) 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Michał Chojnacki 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /cmd/sno/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | ) 6 | 7 | const ( 8 | cmdGenerate = "generate" 9 | cmdInspect = "inspect" 10 | cmdVersion = "version" 11 | cmdHelp = "help" 12 | ) 13 | 14 | var ( 15 | meta string 16 | part string 17 | ) 18 | 19 | func init() { 20 | flag.StringVar(&meta, "meta", "", "The metabyte to set on generated IDs, given in decimal (base10)") 21 | flag.StringVar(&part, "partition", "", "The partition to set on generated IDs, given in decimal (base10)") 22 | flag.Parse() 23 | } 24 | 25 | func main() { 26 | var ( 27 | args = flag.Args() 28 | argsN = len(args) 29 | ) 30 | 31 | if argsN < 2 { 32 | // No args at all or "generate" without arg simply passes on to generate one sno. 33 | // Opts will still get passed through, if they were given. 34 | if argsN == 0 || args[0] == cmdGenerate { 35 | generate("1") 36 | } 37 | 38 | switch args[0] { 39 | case cmdVersion: 40 | version() 41 | case cmdHelp: 42 | usage() 43 | } 44 | } else if argsN == 2 { 45 | switch args[0] { 46 | case cmdGenerate: 47 | generate(args[1]) 48 | case cmdInspect: 49 | inspect(args[1]) 50 | } 51 | } 52 | 53 | usage() 54 | } 55 | -------------------------------------------------------------------------------- /internal/encoding_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | func testEncoding(t *testing.T) { 9 | runEncodingWithFallback("encode", t, testEncodingEncode) 10 | runEncodingWithFallback("decode", t, testEncodingDecode) 11 | } 12 | 13 | var encdec = [...]struct { 14 | dec string 15 | enc [10]byte 16 | }{ 17 | {"2222222222222222", [10]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, 18 | {"brpk4q72xwf2m63l", [10]byte{78, 111, 33, 96, 160, 255, 154, 10, 16, 51}}, 19 | {"xxxxxxxxxxxxxxxx", [10]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}, 20 | } 21 | 22 | func testEncodingEncode(t *testing.T) { 23 | for _, c := range encdec { 24 | var ( 25 | actual = Encode(&c.enc) 26 | expected = []byte(c.dec) 27 | ) 28 | 29 | if !bytes.Equal(actual[:], expected) { 30 | t.Errorf("expected [%s], got [%s]", expected, actual) 31 | } 32 | } 33 | } 34 | 35 | func testEncodingDecode(t *testing.T) { 36 | for _, c := range encdec { 37 | var ( 38 | actual = Decode([]byte(c.dec)) 39 | expected = c.enc 40 | ) 41 | 42 | if actual != expected { 43 | t.Errorf("expected [%v], got [%v]", expected, actual) 44 | } 45 | } 46 | } 47 | 48 | func runEncodingWithFallback(name string, t *testing.T, f func(t *testing.T)) { 49 | t.Run(name, func(t *testing.T) { 50 | var actualVectorSupport = hasVectorSupport 51 | if actualVectorSupport { 52 | t.Run("vectorized", f) 53 | } 54 | 55 | hasVectorSupport = false 56 | t.Run("fallback", f) 57 | hasVectorSupport = actualVectorSupport 58 | }) 59 | } 60 | -------------------------------------------------------------------------------- /cmd/sno/generate.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/muyo/rush/chars" 7 | "github.com/muyo/sno" 8 | ) 9 | 10 | func generate(in string) { 11 | c, ok := chars.ParseUint64(in) 12 | if !ok { 13 | _, _ = os.Stderr.Write([]byte("Need a valid number of IDs to generate.\n")) 14 | os.Exit(1) 15 | } 16 | 17 | metabyte, snapshot := parseGenerateOpts() 18 | 19 | g, err := sno.NewGenerator(snapshot, nil) 20 | if err != nil { 21 | _, _ = os.Stderr.Write([]byte("Failed to create a generator.\n")) 22 | os.Exit(1) 23 | } 24 | 25 | ids := make([]sno.ID, c) 26 | for i := 0; i < int(c); i++ { 27 | ids[i] = g.New(metabyte) 28 | } 29 | 30 | buf := make([]byte, sno.SizeEncoded+1) 31 | buf[sno.SizeEncoded] = '\n' 32 | 33 | for i := 0; i < int(c); i++ { 34 | enc, _ := ids[i].MarshalText() 35 | copy(buf, enc) 36 | if _, err := os.Stdout.Write(buf); err != nil { 37 | os.Exit(1) 38 | } 39 | } 40 | 41 | os.Exit(0) 42 | } 43 | 44 | func parseGenerateOpts() (metabyte byte, snapshot *sno.GeneratorSnapshot) { 45 | var ok bool 46 | 47 | if meta != "" { 48 | if metabyte, ok = chars.ParseUint8(meta); !ok { 49 | _, _ = os.Stderr.Write([]byte("-meta must be a valid base10 number smaller than 256\n")) 50 | os.Exit(1) 51 | } 52 | } 53 | 54 | if part != "" { 55 | pu16, ok := chars.ParseUint16(part) 56 | if !ok { 57 | _, _ = os.Stderr.Write([]byte("-partition must be a valid base10 number smaller than 65536\n")) 58 | os.Exit(1) 59 | } 60 | 61 | var partition sno.Partition 62 | partition.PutUint16(pu16) 63 | 64 | snapshot = &sno.GeneratorSnapshot{ 65 | Partition: partition, 66 | } 67 | } 68 | 69 | return 70 | } 71 | -------------------------------------------------------------------------------- /internal/time.go: -------------------------------------------------------------------------------- 1 | //go:build !(windows && amd64) && !(linux && amd64 && go1.17) 2 | // +build !windows !amd64 3 | // +build !linux !amd64 !go1.17 4 | 5 | package internal 6 | 7 | import _ "unsafe" // Required for go:linkname 8 | 9 | // ostime returns the current wall clock time reported by the OS. 10 | // 11 | // The function is linked against runtime.walltime() directly, which is only available since the 12 | // introduction of faketime in Go 1.14 (which is the version sno depends on at minimum). This being 13 | // linked to an internal function instead of a semi-stable one like time.now() is somewhat brittle, 14 | // but the rationale is explained below. 15 | // 16 | // POSIXy arch/OS combinations use some form of clock_gettime with CLOCK_REALTIME, either through 17 | // a syscall, libc call (Darwin) or vDSO (Linux). 18 | // These calls are relatively slow, even using vDSO. Not using time.Now() allows us to bypass getting 19 | // the monotonic clock readings which is a separate invocation of the underlying kernel facility and 20 | // roughly doubles the execution time. 21 | // 22 | // As a result, doing sno.New(0).Time() tends to be actually faster on those platforms than time.Now(), 23 | // despite an entire ID being generated alongside. That is, if you're fine with the precision reduced to 4ms. 24 | // 25 | // On Windows/amd64 we use an even more efficient implementation which allows us to also bypass 26 | // some unnecessary unit conversions, which isn't as trivially possible on POSIXy systems (as their 27 | // kernels keep track of time and provide secs and fractional secs instead of a singular higher 28 | // resolution source). 29 | // 30 | // See https://lore.kernel.org/linux-arm-kernel/20190621095252.32307-1-vincenzo.frascino@arm.com 31 | // to get an overview of the perf numbers involved on Linux-based distros. 32 | // 33 | //go:linkname ostime runtime.walltime 34 | func ostime() (sec int64, nsec int32) 35 | 36 | // Snotime returns the current wall clock time reported by the OS as adjusted to our internal epoch. 37 | func Snotime() uint64 { 38 | wallSec, wallNsec := ostime() 39 | 40 | return (uint64(wallSec)*1e9 + uint64(wallNsec) - epochNsec) / timeUnit 41 | } 42 | -------------------------------------------------------------------------------- /internal/time_windows_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "funcdata.h" 3 | 4 | // Uses the same approach as Go's runtime to get the current OS time as documented on: 5 | // https://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure 6 | // https://github.com/golang/go/blob/450d0b2f30e820f402a638799de0b886c1da8dbe/src/runtime/sys_windows_amd64.s#L499 7 | // 8 | // However, we skip a few things the runtime does to provide the facility to time.Now(): 9 | // - There is no fallback to QPC, which means this won't work on Wine except the most recent versions; 10 | // - We offset the time straight into the sno epoch instead of into Unix first; 11 | // - We do not perform a unit conversion from 100nsec (as returned by the OS) into 1nsec. Instead we 12 | // return this as is and the unit conversion is done in the wrapping snotime() function, where the 13 | // division gets optimized by the compiler; 14 | // - There is no split into seconds and fractional nsecs, since - unlike time.Now() - this is the opposite 15 | // of what we want; 16 | // 17 | // All in all this lets us shave off about a dozen instructions - including a fairly expensive back-and-forth 18 | // conversion between time units. 19 | // 20 | // func ostime() uint64 21 | TEXT ·ostime(SB), NOSPLIT, $0-8 22 | MOVQ $2147352596, DI // 0x7ffe0014 -> 2147352596 23 | time: 24 | MOVL 4(DI), AX // time_hi1 25 | MOVL 0(DI), BX // time_lo 26 | MOVL 8(DI), CX // time_hi2 27 | CMPL AX, CX 28 | JNE time 29 | 30 | SHLQ $32, AX 31 | ORQ BX, AX 32 | 33 | // Windows time as stored within _KUSER_SHARED_DATA starts at Jan 1st 1601. 34 | // The offset in the Windows units (100ns) to Unix epoch is a SUBQ by 116 444 736 000 000 000. 35 | // 36 | // Our internal epoch is: 37 | // 1 262 304 000 seconds on top of Unix. 38 | // 12 623 040 000 000 000‬ in units of 100nsec (secs * 1e7) 39 | // 40 | // As such we SUBQ 116444736000000000 (Windows to Unix diff) + 12623040000000000‬ (Sno to Unix diff) 41 | // 116 444 736 000 000 000 42 | // 12 623 040 000 000 000‬ 43 | // ---- 44 | // 129 067 776 000 000 000 45 | 46 | MOVQ $129067776000000000, DI 47 | SUBQ DI, AX 48 | 49 | MOVQ AX, ret+0(FP) 50 | RET 51 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import "fmt" 4 | 5 | const ( 6 | errInvalidDataSizeMsg = "sno: unrecognized data size" 7 | errInvalidTypeFmt = "sno: unrecognized data type: %T" 8 | errInvalidSequenceBoundsFmt = "sno: %s; min: %d, sequence: %d, max: %d, pool: %d" 9 | errSequenceBoundsIdenticalMsg = "sno: sequence bounds are identical - need a sequence pool with a capacity of at least 4" 10 | errSequenceUnderflowsBound = "sno: current sequence underflows the given lower bound" 11 | errSequencePoolTooSmallMsg = "sno: generators require a sequence pool with a capacity of at least 4" 12 | errPartitionPoolExhaustedMsg = "sno: process exceeded maximum number of possible defaults-configured generators" 13 | ) 14 | 15 | // InvalidDataSizeError gets returned when attempting to unmarshal or decode an ID from data that 16 | // is not nil and not of a size of: SizeBinary, SizeEncoded nor 0. 17 | type InvalidDataSizeError struct { 18 | Size int 19 | } 20 | 21 | func (e *InvalidDataSizeError) Error() string { return errInvalidDataSizeMsg } 22 | 23 | // InvalidTypeError gets returned when attempting to scan a value that is neither... 24 | // - a string 25 | // - a byte slice 26 | // - nil 27 | // ... into an ID via ID.Scan(). 28 | type InvalidTypeError struct { 29 | Value interface{} 30 | } 31 | 32 | func (e *InvalidTypeError) Error() string { 33 | return fmt.Sprintf(errInvalidTypeFmt, e.Value) 34 | } 35 | 36 | // InvalidSequenceBoundsError gets returned when a Generator gets seeded with sequence boundaries 37 | // which are invalid, e.g. the pool is too small or the current sequence overflows the bounds. 38 | type InvalidSequenceBoundsError struct { 39 | Cur uint32 40 | Min uint16 41 | Max uint16 42 | Msg string 43 | } 44 | 45 | func (e *InvalidSequenceBoundsError) Error() string { 46 | return fmt.Sprintf(errInvalidSequenceBoundsFmt, e.Msg, e.Min, e.Cur, e.Max, e.Max-e.Min+1) 47 | } 48 | 49 | // PartitionPoolExhaustedError gets returned when attempting to create more than MaxPartition (65535) 50 | // Generators using the default configuration (eg. without snapshots). 51 | // 52 | // Should you ever run into this, please consult the docs on the genPartition() internal function. 53 | type PartitionPoolExhaustedError struct{} 54 | 55 | func (e *PartitionPoolExhaustedError) Error() string { return errPartitionPoolExhaustedMsg } 56 | -------------------------------------------------------------------------------- /global.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import ( 4 | "sort" 5 | "time" 6 | "unsafe" 7 | 8 | "github.com/muyo/sno/internal" 9 | ) 10 | 11 | var ( 12 | generator *Generator 13 | zero ID 14 | ) 15 | 16 | func init() { 17 | doInit() 18 | } 19 | 20 | func doInit() { 21 | g, err := NewGenerator(nil, nil) 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | generator = g 27 | } 28 | 29 | // New uses the package-level generator to generate a new ID using the current system 30 | // time for its timestamp. 31 | func New(meta byte) ID { 32 | return generator.New(meta) 33 | } 34 | 35 | // NewWithTime uses the package-level generator to generate a new ID using the given time 36 | // for the timestamp. 37 | // 38 | // IDs generated using this method are subject to several caveats. 39 | // See generator.NewWithTime() for their documentation. 40 | func NewWithTime(meta byte, t time.Time) ID { 41 | return generator.NewWithTime(meta, t) 42 | } 43 | 44 | // FromBinaryBytes takes a byte slice and copies its contents into an ID, returning the bytes as an ID. 45 | // 46 | // The slice must have a length of 10. Returns a InvalidDataSizeError if it does not. 47 | func FromBinaryBytes(src []byte) (id ID, err error) { 48 | return id, id.UnmarshalBinary(src) 49 | } 50 | 51 | // FromEncodedBytes decodes a canonically base32-encoded byte slice representation of an ID 52 | // into its binary representation and returns it. 53 | // 54 | // The slice must have a length of 16. Returns a InvalidDataSizeError if it does not. 55 | func FromEncodedBytes(src []byte) (id ID, err error) { 56 | return id, id.UnmarshalText(src) 57 | } 58 | 59 | // FromEncodedString decodes a canonically base32-encoded string representation of an ID 60 | // into its binary representation and returns it. 61 | // 62 | // The string must have a length of 16. Returns a InvalidDataSizeError if it does not. 63 | func FromEncodedString(src string) (id ID, err error) { 64 | if len(src) != SizeEncoded { 65 | return zero, &InvalidDataSizeError{Size: len(src)} 66 | } 67 | 68 | // We only read in the data pointer (and input is read-only), so this does the job. 69 | return internal.Decode(*(*[]byte)(unsafe.Pointer(&src))), nil 70 | } 71 | 72 | type collection []ID 73 | 74 | func (ids collection) Len() int { return len(ids) } 75 | func (ids collection) Less(i, j int) bool { return ids[i].Compare(ids[j]) < 0 } 76 | func (ids collection) Swap(i, j int) { ids[i], ids[j] = ids[j], ids[i] } 77 | 78 | // Sort performs an in-place lexicographic sort of a slice of sno IDs. 79 | func Sort(s []ID) { 80 | sort.Sort(collection(s)) 81 | } 82 | 83 | // Zero returns the zero value of an ID, which is 10 zero bytes and equivalent to: 84 | // 85 | // id := sno.ID{} 86 | // ... e.g. ... 87 | // id := sno.ID{0, 0, 0, 0, 0, 0, 0, 0, 0, 0} 88 | func Zero() ID { 89 | return zero 90 | } 91 | -------------------------------------------------------------------------------- /partition_test.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import ( 4 | "sync/atomic" 5 | "testing" 6 | ) 7 | 8 | func TestPartition_Public_Conversions(t *testing.T) { 9 | t.Run("AsUint16", func(t *testing.T) { 10 | src := Partition{255, 255} 11 | expected := uint16(MaxPartition) 12 | actual := src.AsUint16() 13 | 14 | if actual != expected { 15 | t.Errorf("expected [%d], got [%d]", expected, actual) 16 | } 17 | }) 18 | 19 | t.Run("PutUint16", func(t *testing.T) { 20 | expected := Partition{255, 255} 21 | actual := Partition{} 22 | actual.PutUint16(MaxPartition) 23 | 24 | if actual != expected { 25 | t.Errorf("expected [%s], got [%s]", expected, actual) 26 | } 27 | }) 28 | } 29 | 30 | func TestPartition_Internal_Conversions(t *testing.T) { 31 | public := Partition{255, 255} 32 | internal := uint32(MaxPartition) << 16 33 | 34 | t.Run("to-internal", func(t *testing.T) { 35 | expected := internal 36 | actual := partitionToInternalRepr(public) 37 | 38 | if actual != expected { 39 | t.Errorf("expected [%d], got [%d]", expected, actual) 40 | } 41 | }) 42 | 43 | t.Run("to-public", func(t *testing.T) { 44 | expected := public 45 | actual := partitionToPublicRepr(internal) 46 | 47 | if actual != expected { 48 | t.Errorf("expected [%d], got [%d]", expected, actual) 49 | } 50 | }) 51 | } 52 | 53 | func TestPartition_Internal_Generation(t *testing.T) { 54 | t.Run("monotonic-increments", func(t *testing.T) { 55 | // Reset global count (leaving seed as is). 56 | atomic.StoreUint32(&partitions, 0) 57 | 58 | var prevPartition = uint32(seed) << 16 59 | 60 | for i := 0; i < 100; i++ { 61 | p, err := genPartition() 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | 66 | // Note: genPartition() shifts to make space for the sequence, 67 | // so we can't simply check for an increment of 1 within the resulting 68 | // uint32. The below is a tiny bit faster than converting back 69 | // to an uint16. 70 | if p-prevPartition != 1<<16 { 71 | t.Errorf("expected [%d], got [%d]", prevPartition+1<<16, p) 72 | break 73 | } 74 | 75 | prevPartition = p 76 | } 77 | }) 78 | 79 | t.Run("pool-exhaustion", func(t *testing.T) { 80 | // Reset global count (leaving seed as is). 81 | atomic.StoreUint32(&partitions, 0) 82 | 83 | for i := 0; i < 2*MaxPartition; i++ { 84 | _, err := genPartition() 85 | 86 | if err != nil { 87 | verr, ok := err.(*PartitionPoolExhaustedError) 88 | if !ok { 89 | t.Fatalf("expected error type [%T], got [%T]", &PartitionPoolExhaustedError{}, err) 90 | return 91 | } 92 | 93 | if i < MaxPartition { 94 | t.Fatalf("expected errors no sooner than after [%d] iterations, got to [%d]", MaxPartition, i) 95 | return 96 | } 97 | 98 | errMsgActual := verr.Error() 99 | errMsgExpected := errPartitionPoolExhaustedMsg 100 | 101 | if errMsgActual != errMsgExpected { 102 | t.Fatalf("expected error msg [%s], got [%s]", errMsgExpected, errMsgActual) 103 | } 104 | } 105 | 106 | if i >= MaxPartition { 107 | if err == nil { 108 | t.Fatalf("expected constant errors after [%d] iterations, got no error at [%d]", MaxPartition, i) 109 | return 110 | } 111 | } 112 | } 113 | }) 114 | 115 | // Clean up. 116 | atomic.StoreUint32(&partitions, 0) 117 | } 118 | -------------------------------------------------------------------------------- /internal/encoding.go: -------------------------------------------------------------------------------- 1 | // +build !amd64 2 | 3 | package internal 4 | 5 | const ( 6 | // The encoding is a custom base32 variant stemming from base32hex. 7 | // The alphabet is 2 contiguous ASCII ranges: `50..57` (digits) and `97..120` (lowercase letters). 8 | // A canonically encoded ID can be validated with a regexp of `[2-9a-x]{16}`. 9 | enc = "23456789abcdefghijklmnopqrstuvwx" 10 | ) 11 | 12 | var ( 13 | // Decoding LUT. 14 | dec = [256]byte{ 15 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 16 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 17 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 18 | 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 19 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 20 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 21 | 0xFF, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 22 | 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 23 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 24 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 25 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 26 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 27 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 28 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 29 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 30 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 31 | } 32 | 33 | // Dummy flag to be set by the respective build (used by tests). 34 | hasVectorSupport bool 35 | ) 36 | 37 | // Encode returns the sno32-encoded representation of src as an array of 16 bytes. 38 | func Encode(src *[10]byte) (dst [16]byte) { 39 | dst[15] = enc[src[9]&0x1F] 40 | dst[14] = enc[(src[9]>>5|src[8]<<3)&0x1F] 41 | dst[13] = enc[src[8]>>2&0x1F] 42 | dst[12] = enc[(src[8]>>7|src[7]<<1)&0x1F] 43 | dst[11] = enc[(src[7]>>4|src[6]<<4)&0x1F] 44 | dst[10] = enc[src[6]>>1&0x1F] 45 | dst[9] = enc[(src[6]>>6|src[5]<<2)&0x1F] 46 | dst[8] = enc[src[5]>>3] 47 | 48 | dst[7] = enc[src[4]&0x1F] 49 | dst[6] = enc[(src[4]>>5|src[3]<<3)&0x1F] 50 | dst[5] = enc[src[3]>>2&0x1F] 51 | dst[4] = enc[(src[3]>>7|src[2]<<1)&0x1F] 52 | dst[3] = enc[(src[2]>>4|src[1]<<4)&0x1F] 53 | dst[2] = enc[src[1]>>1&0x1F] 54 | dst[1] = enc[(src[1]>>6|src[0]<<2)&0x1F] 55 | dst[0] = enc[src[0]>>3] 56 | 57 | return 58 | } 59 | 60 | // Decode returns the binary representation of a sno32-encoded src as an array of bytes. 61 | // 62 | // Src does not get validated and must have a length of 16 - otherwise Decode will panic. 63 | func Decode(src []byte) (dst [10]byte) { 64 | _ = src[15] // BCE hint. 65 | 66 | dst[9] = dec[src[14]]<<5 | dec[src[15]] 67 | dst[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3 68 | dst[7] = dec[src[11]]<<4 | dec[src[12]]>>1 69 | dst[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4 70 | dst[5] = dec[src[8]]<<3 | dec[src[9]]>>2 71 | 72 | dst[4] = dec[src[6]]<<5 | dec[src[7]] 73 | dst[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3 74 | dst[2] = dec[src[3]]<<4 | dec[src[4]]>>1 75 | dst[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4 76 | dst[0] = dec[src[0]]<<3 | dec[src[1]]>>2 77 | 78 | return 79 | } 80 | -------------------------------------------------------------------------------- /partition.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import "sync/atomic" 4 | 5 | // Partition represents the fixed identifier of a Generator. 6 | // 7 | // If you'd rather define Partitions as integers instead of as byte arrays, then: 8 | // var p sno.Partition 9 | // p.PutUint16(65535) 10 | type Partition [2]byte 11 | 12 | // AsUint16 returns the Partition as a uint16. 13 | func (p Partition) AsUint16() uint16 { 14 | return uint16(p[0])<<8 | uint16(p[1]) 15 | } 16 | 17 | // PutUint16 sets Partition to the given uint16 in big-endian order. 18 | func (p *Partition) PutUint16(u uint16) { 19 | p[0] = byte(u >> 8) 20 | p[1] = byte(u) 21 | } 22 | 23 | // genPartition generates a Partition in its internal representation from a time based seed. 24 | // 25 | // While this alone would be enough if we only used this once (for the global generator), 26 | // generators created with the default configuration also use generated partitions - a case 27 | // for which we want to avoid collisions, at the very least within our process. 28 | // 29 | // Considering we only have a tiny period of 2**16 available, and that predictability of 30 | // the partitions is a non-factor, using even a 16-bit Xorshift PRNG would be overkill. 31 | // 32 | // If we used a PRNG without adjustment, we'd have the following pitfalls: 33 | // - we'd need to maintain its state and synchronize access to it. As it can't run atomically, 34 | // this would require maintaining a global lock separately; 35 | // - our space is limited to barely 65535 partitions, making collisions quite likely 36 | // and we have no way of determining them without maintaining yet additional state, 37 | // at the very least as a bit set (potentially growing to 8192 bytes for the entire 38 | // space). It'd also need to be synchronized. With collisions becoming more and 39 | // and more likely as we hand out partitions, we'd need a means of determining free 40 | // partitions in the set to be efficient. 41 | // 42 | // And others. At which point the complexity becomes unreasonable for what we're aiming 43 | // to do, so instead of all of that, we go back to the fact that predictability is a non-factor 44 | // and our goal being only the prevention of collisions, we simply start off with 45 | // a time based seed... which we then atomically increment. 46 | // 47 | // This way access is safely synchronized and we're guaranteed to get 65535 partitions 48 | // without collisions in-process with just a tiny bit of code in comparison. 49 | // 50 | // Should we ever exceed that number, we however panic. If your usage pattern is weird enough 51 | // to hit this edge case, please consider managing the partition space yourself and starting 52 | // the Generators using configuration snapshots, instead. 53 | // 54 | // Note: This being entirely predictable has the upside that the order of creation and the count 55 | // of in-process generators created without snapshots can be simply inferred by comparing their 56 | // partitions (including comparing to the global generator, which starts at 0 - i.e. at the seed). 57 | func genPartition() (uint32, error) { 58 | n := atomic.AddUint32(&partitions, 1) 59 | 60 | if n > MaxPartition { 61 | return 0, &PartitionPoolExhaustedError{} 62 | } 63 | 64 | // Convert to our internal representation leaving 2 bytes empty 65 | // for the sequence to simply get ORed at runtime. 66 | return uint32(seed+uint16(n)) << 16, nil 67 | } 68 | 69 | var ( 70 | // Counter starts at -1 since genPartition() will increase it on each call, including 71 | // the first. This means the global generator gets an N of 0 and always has a Partition = seed. 72 | partitions = ^uint32(0) 73 | seed = func() uint16 { 74 | t := snotime() 75 | 76 | return uint16((t >> 32) ^ t) 77 | }() 78 | ) 79 | 80 | func partitionToInternalRepr(p Partition) uint32 { 81 | return uint32(p[0])<<24 | uint32(p[1])<<16 82 | } 83 | 84 | func partitionToPublicRepr(p uint32) Partition { 85 | return Partition{byte(p >> 24), byte(p >> 16)} 86 | } 87 | -------------------------------------------------------------------------------- /internal/cpu_amd64_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func testCPU(t *testing.T) { 8 | t.Run("real", testCPUReal) 9 | t.Run("mocked", testCPUMocked) 10 | } 11 | 12 | // First tests are run against the real hardware and actual cpuid instruction. 13 | // While we can't reliably assume the availability of the instruction sets, 14 | // at the very least we may catch anomalies when the highest function parameter 15 | // returned is not sane - or when SSE2 instructions are not available where we 16 | // assume they should be. 17 | func testCPUReal(t *testing.T) { 18 | t.Run("highest-function-parameter-valid", testCPURealMFIValid) 19 | t.Run("has-base-set", testCPURealHasBaseSet) 20 | t.Run("has-vector-support-attempt", testCPURealHasVectorSupportAttempt) 21 | } 22 | 23 | func testCPURealMFIValid(t *testing.T) { 24 | eax, _, _, _ := cpuid(0) 25 | if eax < 1 { 26 | t.Errorf("expected a non-zero highest function parameter, got [%d]", eax) 27 | } 28 | } 29 | 30 | func testCPURealHasBaseSet(t *testing.T) { 31 | _, _, _, edx := cpuid(1) 32 | if (edx & (1 << 26)) == 0 { 33 | t.Error("expected the SSE2 instruction set to be available, does not appear to be") 34 | } 35 | } 36 | 37 | func testCPURealHasVectorSupportAttempt(t *testing.T) { 38 | defer func() { 39 | catch(t, recover(), "") 40 | }() 41 | 42 | // Note: We don't care about the result as we can't assume to get a 'true'. 43 | // We only care for this to not panic. 44 | checkVectorSupport() 45 | } 46 | 47 | // Note: Those tests must not run in parallel to any tests that rely 48 | // on real hardware and the actual cpuid implementation (vide enc/dec), 49 | // as the cpuid function gets swapped out for mocks. 50 | func testCPUMocked(t *testing.T) { 51 | cpuid = cpu.id 52 | 53 | t.Run("highest-function-parameter-invalid", testCPUHasVectorSupportMFIInvalid) 54 | t.Run("highest-function-parameter-too-low", testCPUHasVectorSupportMFILow) 55 | t.Run("lacks-base-set", testCPUHasVectorSupportLacksBaseSet) 56 | t.Run("lacks-extended-sets", testCPUHasVectorSupportLacksExtendedSets) 57 | t.Run("passes", testCPUHasVectorPasses) 58 | 59 | // Restore real implementation. 60 | cpuid = cpuidReal 61 | } 62 | 63 | func testCPUHasVectorSupportMFIInvalid(t *testing.T) { 64 | defer func() { 65 | catch(t, recover(), cpuLacksSSE2ErrMsg) 66 | }() 67 | 68 | cpu.reset() 69 | cpu.eax = 0 70 | expectVectorSupport(t, false) 71 | } 72 | 73 | func testCPUHasVectorSupportMFILow(t *testing.T) { 74 | defer func() { 75 | catch(t, recover(), "") 76 | }() 77 | 78 | cpu.reset() 79 | cpu.eax = 6 80 | expectVectorSupport(t, false) 81 | } 82 | 83 | func testCPUHasVectorSupportLacksBaseSet(t *testing.T) { 84 | defer func() { 85 | catch(t, recover(), cpuLacksSSE2ErrMsg) 86 | }() 87 | 88 | cpu.reset() 89 | cpu.edx ^= 1 << 26 // SSE2 is featured as 1 << 26, so we simply set everything *but*. 90 | expectVectorSupport(t, false) 91 | } 92 | 93 | func testCPUHasVectorSupportLacksExtendedSets(t *testing.T) { 94 | defer func() { 95 | catch(t, recover(), "") 96 | }() 97 | 98 | for _, c := range []struct { 99 | name string 100 | ebx uint32 101 | ecx uint32 102 | }{ 103 | {"SSE3", 0, ^uint32(0x00000001)}, 104 | {"SSSE3", 0, ^uint32(0x00000200)}, 105 | {"SSE4", 0, ^uint32(0x00080000)}, 106 | {"SSE4.2", 0, ^uint32(0x00100000)}, 107 | {"BMI1", ^uint32(0x00000008), 0}, 108 | {"BMI2", ^uint32(0x00000100), 0}, 109 | } { 110 | t.Run(c.name, func(t *testing.T) { 111 | cpu.reset() 112 | if c.ebx != 0 { 113 | cpu.ebx = c.ebx 114 | } 115 | 116 | if c.ecx != 0 { 117 | cpu.ecx = c.ecx 118 | } 119 | 120 | expectVectorSupport(t, false) 121 | }) 122 | } 123 | } 124 | 125 | func testCPUHasVectorPasses(t *testing.T) { 126 | defer func() { 127 | catch(t, recover(), "") 128 | }() 129 | 130 | cpu.reset() 131 | expectVectorSupport(t, true) 132 | } 133 | 134 | var cpu = func() *cpuMock { 135 | c := &cpuMock{} 136 | c.reset() 137 | 138 | return c 139 | }() 140 | 141 | type cpuMock struct { 142 | eax, ebx, ecx, edx uint32 143 | } 144 | 145 | func (c *cpuMock) reset() { 146 | c.eax = 7 147 | c.ebx = 0x00000108 148 | c.ecx = 0x00180201 149 | c.edx = 1 << 26 150 | } 151 | 152 | func (c *cpuMock) id(_ uint32) (eax, ebx, ecx, edx uint32) { 153 | return c.eax, c.ebx, c.ecx, c.edx 154 | } 155 | 156 | func catch(t *testing.T, err interface{}, expected string) { 157 | if expected != "" { 158 | if err == nil { 159 | t.Fatalf("expected a panic with message [%s]", expected) 160 | } 161 | 162 | if err != expected { 163 | t.Errorf("expected a panic with message [%s], got [%s]", expected, err) 164 | } 165 | 166 | return 167 | } 168 | 169 | if err != nil { 170 | t.Fatalf("expected to not panic, panicked with [%s]", err) 171 | } 172 | } 173 | 174 | func expectVectorSupport(t *testing.T, expected bool) { 175 | if actual := checkVectorSupport(); actual != expected { 176 | t.Errorf("expected [%t], got [%t]", expected, actual) 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /benchmark/encoding.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "crypto/rand" 5 | "testing" 6 | "time" 7 | 8 | "github.com/bwmarrin/snowflake" 9 | "github.com/celrenheit/sandflake" 10 | "github.com/gofrs/uuid" 11 | "github.com/muyo/sno" 12 | "github.com/oklog/ulid" 13 | "github.com/rs/xid" 14 | "github.com/segmentio/ksuid" 15 | ) 16 | 17 | func benchmarkEncoding(b *testing.B) { 18 | println("\n-- Encoding ----------------------------------------------------------------------------------\n") 19 | b.Run("enc", benchmarkEncode) 20 | println("\n-- Decoding ----------------------------------------------------------------------------------\n") 21 | b.Run("dec", benchmarkDecode) 22 | } 23 | 24 | func benchmarkEncode(b *testing.B) { 25 | b.Run("sno", benchmarkEncodeSno) 26 | b.Run("xid", benchmarkEncodeXid) 27 | b.Run("snowflake", benchmarkEncodeSnowflake) 28 | b.Run("sandflake", benchmarkEncodeSandflake) 29 | b.Run("uuid", benchmarkEncodeUUID) 30 | b.Run("ulid", benchmarkEncodeULID) 31 | b.Run("ksuid", benchmarkEncodeKSUID) 32 | } 33 | 34 | func benchmarkDecode(b *testing.B) { 35 | b.Run("sno", benchmarkDecodeSno) 36 | b.Run("xid", benchmarkDecodeXid) 37 | b.Run("snowflake", benchmarkDecodeSnowflake) 38 | b.Run("sandflake", benchmarkDecodeSandflake) 39 | b.Run("uuid", benchmarkDecodeUUID) 40 | b.Run("ulid", benchmarkDecodeULID) 41 | b.Run("ksuid", benchmarkDecodeKSUID) 42 | } 43 | 44 | func benchmarkEncodeSno(b *testing.B) { 45 | id := sno.New(255) 46 | b.ResetTimer() 47 | 48 | b.RunParallel(func(pb *testing.PB) { 49 | for pb.Next() { 50 | _ = id.String() 51 | } 52 | }) 53 | } 54 | 55 | func benchmarkEncodeXid(b *testing.B) { 56 | id := xid.New() 57 | b.ResetTimer() 58 | 59 | b.RunParallel(func(pb *testing.PB) { 60 | for pb.Next() { 61 | _ = id.String() 62 | } 63 | }) 64 | } 65 | 66 | func benchmarkEncodeSnowflake(b *testing.B) { 67 | n, _ := snowflake.NewNode(255) 68 | id := n.Generate() 69 | b.ResetTimer() 70 | 71 | b.RunParallel(func(pb *testing.PB) { 72 | for pb.Next() { 73 | _ = id.String() 74 | } 75 | }) 76 | } 77 | 78 | func benchmarkEncodeSandflake(b *testing.B) { 79 | var g sandflake.Generator 80 | id := g.Next() 81 | b.ResetTimer() 82 | 83 | b.RunParallel(func(pb *testing.PB) { 84 | for pb.Next() { 85 | _ = id.String() 86 | } 87 | }) 88 | } 89 | 90 | func benchmarkEncodeUUID(b *testing.B) { 91 | b.Run("v1", benchmarkEncodeUUIDv1) 92 | b.Run("v4", benchmarkEncodeUUIDv4) 93 | } 94 | 95 | func benchmarkEncodeUUIDv1(b *testing.B) { 96 | id, _ := uuid.NewV1() 97 | b.ResetTimer() 98 | 99 | b.RunParallel(func(pb *testing.PB) { 100 | for pb.Next() { 101 | _ = id.String() 102 | } 103 | }) 104 | } 105 | 106 | func benchmarkEncodeUUIDv4(b *testing.B) { 107 | id, _ := uuid.NewV4() 108 | b.ResetTimer() 109 | 110 | b.RunParallel(func(pb *testing.PB) { 111 | for pb.Next() { 112 | _ = id.String() 113 | } 114 | }) 115 | } 116 | 117 | func benchmarkEncodeULID(b *testing.B) { 118 | id, _ := ulid.New(ulid.Timestamp(time.Now()), rand.Reader) 119 | b.ResetTimer() 120 | 121 | b.RunParallel(func(pb *testing.PB) { 122 | for pb.Next() { 123 | _ = id.String() 124 | } 125 | }) 126 | } 127 | 128 | func benchmarkEncodeKSUID(b *testing.B) { 129 | id, _ := ksuid.NewRandom() 130 | b.ResetTimer() 131 | 132 | b.RunParallel(func(pb *testing.PB) { 133 | for pb.Next() { 134 | _ = id.String() 135 | } 136 | }) 137 | } 138 | 139 | func benchmarkDecodeSno(b *testing.B) { 140 | id := sno.New(255).String() 141 | b.ResetTimer() 142 | 143 | b.RunParallel(func(pb *testing.PB) { 144 | for pb.Next() { 145 | _, _ = sno.FromEncodedString(id) 146 | } 147 | }) 148 | } 149 | 150 | func benchmarkDecodeXid(b *testing.B) { 151 | id := xid.New().String() 152 | b.ResetTimer() 153 | 154 | b.RunParallel(func(pb *testing.PB) { 155 | for pb.Next() { 156 | _, _ = xid.FromString(id) 157 | } 158 | }) 159 | } 160 | 161 | func benchmarkDecodeSnowflake(b *testing.B) { 162 | n, _ := snowflake.NewNode(255) 163 | id := n.Generate().String() 164 | b.ResetTimer() 165 | 166 | b.RunParallel(func(pb *testing.PB) { 167 | for pb.Next() { 168 | _, _ = snowflake.ParseString(id) 169 | } 170 | }) 171 | } 172 | 173 | func benchmarkDecodeSandflake(b *testing.B) { 174 | var g sandflake.Generator 175 | id := g.Next().String() 176 | b.ResetTimer() 177 | 178 | b.RunParallel(func(pb *testing.PB) { 179 | for pb.Next() { 180 | _, _ = sandflake.Parse(id) 181 | } 182 | }) 183 | } 184 | 185 | func benchmarkDecodeUUID(b *testing.B) { 186 | b.Run("v1", benchmarkDecodeUUIDv1) 187 | b.Run("v4", benchmarkDecodeUUIDv4) 188 | } 189 | 190 | func benchmarkDecodeUUIDv1(b *testing.B) { 191 | id, _ := uuid.NewV1() 192 | s := id.String() 193 | b.ResetTimer() 194 | 195 | b.RunParallel(func(pb *testing.PB) { 196 | for pb.Next() { 197 | _, _ = uuid.FromString(s) 198 | } 199 | }) 200 | } 201 | 202 | func benchmarkDecodeUUIDv4(b *testing.B) { 203 | id, _ := uuid.NewV4() 204 | s := id.String() 205 | b.ResetTimer() 206 | 207 | b.RunParallel(func(pb *testing.PB) { 208 | for pb.Next() { 209 | _, _ = uuid.FromString(s) 210 | } 211 | }) 212 | } 213 | 214 | func benchmarkDecodeULID(b *testing.B) { 215 | id, _ := ulid.New(ulid.Timestamp(time.Now()), rand.Reader) 216 | s := id.String() 217 | b.ResetTimer() 218 | 219 | b.RunParallel(func(pb *testing.PB) { 220 | for pb.Next() { 221 | _, _ = ulid.Parse(s) 222 | } 223 | }) 224 | } 225 | 226 | func benchmarkDecodeKSUID(b *testing.B) { 227 | id, _ := ksuid.NewRandom() 228 | s := id.String() 229 | b.ResetTimer() 230 | 231 | b.RunParallel(func(pb *testing.PB) { 232 | for pb.Next() { 233 | _, _ = ksuid.Parse(s) 234 | } 235 | }) 236 | } 237 | -------------------------------------------------------------------------------- /internal/encoding_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "funcdata.h" 3 | 4 | DATA shuffleVec<>+0(SB)/8, $0x0001020304050607 5 | DATA shuffleVec<>+8(SB)/8, $0x08090A0B0C0D0E0F 6 | GLOBL shuffleVec<>(SB), (NOPTR+RODATA), $16 7 | 8 | DATA offsetCharset<>+0(SB)/8, $0x3232323232323232 // 50 9 | DATA offsetCharset<>+8(SB)/8, $0x3232323232323232 10 | GLOBL offsetCharset<>(SB), (NOPTR+RODATA), $16 11 | 12 | DATA selectLetters<>+0(SB)/8, $0x0707070707070707 13 | DATA selectLetters<>+8(SB)/8, $0x0707070707070707 14 | GLOBL selectLetters<>(SB), (NOPTR+RODATA), $16 15 | 16 | DATA subLetters<>+0(SB)/8, $0xD8D8D8D8D8D8D8D8 // 216 17 | DATA subLetters<>+8(SB)/8, $0xD8D8D8D8D8D8D8D8 18 | GLOBL subLetters<>(SB), (NOPTR+RODATA), $16 19 | 20 | DATA interleave<>+0(SB)/8, $0x1f1f1f1f1f1f1f1f 21 | DATA interleave<>+8(SB)/8, $0x1f1f1f1f1f1f1f1f 22 | GLOBL interleave<>(SB), (NOPTR+RODATA), $16 23 | 24 | // func Encode(src *[10]byte) (dst [16]byte) 25 | TEXT ·Encode(SB), NOSPLIT, $0-24 26 | MOVQ src+0(FP), BX 27 | 28 | MOVQ 0(BX), AX 29 | BSWAPQ AX 30 | SHRQ $24, AX 31 | 32 | MOVQ 5(BX), BX 33 | BSWAPQ BX 34 | SHRQ $24, BX 35 | 36 | CMPB ·hasVectorSupport(SB), $1 37 | JEQ encodeVec 38 | 39 | LEAQ dst+8(FP), DX 40 | 41 | MOVB AX, 7(DX) 42 | SHRQ $5, AX 43 | MOVB AX, 6(DX) 44 | SHRQ $5, AX 45 | MOVB AX, 5(DX) 46 | SHRQ $5, AX 47 | MOVB AX, 4(DX) 48 | SHRQ $5, AX 49 | MOVB AX, 3(DX) 50 | SHRQ $5, AX 51 | MOVB AX, 2(DX) 52 | SHRQ $5, AX 53 | MOVB AX, 1(DX) 54 | SHRQ $5, AX 55 | MOVB AX, 0(DX) 56 | 57 | MOVB BX, 15(DX) 58 | SHRQ $5, BX 59 | MOVB BX, 14(DX) 60 | SHRQ $5, BX 61 | MOVB BX, 13(DX) 62 | SHRQ $5, BX 63 | MOVB BX, 12(DX) 64 | SHRQ $5, BX 65 | MOVB BX, 11(DX) 66 | SHRQ $5, BX 67 | MOVB BX, 10(DX) 68 | SHRQ $5, BX 69 | MOVB BX, 9(DX) 70 | SHRQ $5, BX 71 | MOVB BX, 8(DX) 72 | 73 | MOVOU (DX), X0 74 | PAND interleave<>+0(SB), X0 75 | 76 | JMP encodeFinish 77 | 78 | encodeVec: 79 | PDEPQ interleave<>+0(SB), AX, AX 80 | PDEPQ interleave<>+0(SB), BX, BX 81 | 82 | MOVQ AX, X0 83 | PINSRQ $1, BX, X0 84 | PSHUFB shuffleVec<>+0(SB), X0 85 | 86 | encodeFinish: 87 | MOVOA X0, X1 88 | PADDB offsetCharset<>+0(SB), X0 // Add 50, where 50 is the beginning of our alphabet (ASCII '2') 89 | // That takes care of all digits. We need to offset letters, though, 90 | // as they start at char('a'), which is 97 in dec. 91 | PCMPGTB selectLetters<>+0(SB), X1 // PCMPGTB will set all bytes with letters to 255. 92 | PSUBUSB subLetters<>+0(SB), X1 // We need to add 39 to each letter in X0 to move them into the right range. 93 | // Note: Not 47 (50 + 47 = 97), as our letters are in the [8..31] range. 94 | // And so we simply do a (unsigned) subtraction of 216 and as a result 95 | // get a mask of 39 (the offset) in dec where all the letters are. 96 | PADDB X1, X0 // Add them together and done. 97 | 98 | MOVOU X0, dst+8(FP) 99 | 100 | RET 101 | 102 | 103 | //func Decode(src []byte) (dst [10]byte) 104 | TEXT ·Decode(SB), NOSPLIT, $0-34 105 | // The entirety of this function is simply the inverse of encode. 106 | MOVQ src+0(FP), BX 107 | LEAQ dst+24(FP), DX 108 | MOVOU (BX), X0 109 | 110 | PSUBB offsetCharset<>+0(SB), X0 111 | MOVOA X0, X1 112 | 113 | PCMPGTB selectLetters<>+0(SB), X1 114 | PSUBUSB subLetters<>+0(SB), X1 115 | PSUBB X1, X0 116 | 117 | CMPB ·hasVectorSupport(SB), $0 118 | JEQ decodeFallback 119 | 120 | PSHUFB shuffleVec<>+0(SB), X0 121 | 122 | MOVQ X0, R8 123 | PEXTRQ $1, X0, R9 124 | 125 | PEXTQ interleave<>+0(SB), R8, R8 126 | BSWAPQ R8 127 | SHRQ $24, R8 128 | 129 | PEXTQ interleave<>+0(SB), R9, R9 130 | BSWAPQ R9 131 | SHRQ $24, R9 132 | 133 | MOVQ R8, 0(DX) 134 | MOVQ R9, 5(DX) 135 | 136 | RET 137 | 138 | decodeFallback: 139 | // TODO(alcore) Subject to an optimization pass. 140 | MOVQ X0, R8 141 | PSRLO $8, X0 142 | MOVQ X0, R9 143 | 144 | // Timestamp block - 0 145 | MOVB R8, BX 146 | SHLB $3, BX 147 | 148 | SHRQ $8, R8 // 1 149 | MOVB R8, AX 150 | SHRB $2, AX 151 | ORB AX, BX 152 | 153 | MOVB BX, 0(DX) 154 | 155 | MOVB R8, BX 156 | SHLB $6, BX 157 | 158 | SHRQ $8, R8 // 2 159 | MOVB R8, AX 160 | SHLB $1, AX 161 | ORB AX, BX 162 | 163 | SHRQ $8, R8 // 3 164 | MOVB R8, CX 165 | SHRB $4, CX 166 | ORB CX, BX 167 | 168 | MOVB BX, 1(DX) 169 | 170 | MOVB R8, BX 171 | SHLB $4, BX 172 | 173 | SHRQ $8, R8 // 4 174 | MOVB R8, AX 175 | SHRB $1, AX 176 | ORB AX, BX 177 | 178 | MOVB BX, 2(DX) 179 | 180 | MOVB R8, BX 181 | SHLB $7, BX 182 | 183 | SHRQ $8, R8 // 5 184 | MOVB R8, CX 185 | SHLB $2, CX 186 | ORB CX, BX 187 | 188 | SHRQ $8, R8 // 6 189 | MOVB R8, AX 190 | SHRB $3, AX 191 | ORB AX, BX 192 | 193 | MOVB BX, 3(DX) 194 | 195 | MOVB R8, BX 196 | SHLB $5, BX 197 | 198 | SHRQ $8, R8 // 7 199 | ORB R8, BX 200 | 201 | MOVB BX, 4(DX) 202 | 203 | // Payload block - 8 204 | MOVB R9, BX 205 | SHLB $3, BX 206 | 207 | SHRQ $8, R9 // 9 208 | MOVB R9, AX 209 | SHRB $2, AX 210 | ORB AX, BX 211 | 212 | MOVB BX, 5(DX) 213 | 214 | MOVB R9, BX 215 | SHLB $6, BX 216 | 217 | SHRQ $8, R9 // 10 218 | MOVB R9, AX 219 | SHLB $1, AX 220 | ORB AX, BX 221 | 222 | SHRQ $8, R9 // 11 223 | MOVB R9, CX 224 | SHRB $4, CX 225 | ORB CX, BX 226 | 227 | MOVB BX, 6(DX) 228 | 229 | MOVB R9, BX 230 | SHLB $4, BX 231 | 232 | SHRQ $8, R9 // 12 233 | MOVB R9, AX 234 | SHRB $1, AX 235 | ORB AX, BX 236 | 237 | MOVB BX, 7(DX) 238 | 239 | MOVB R9, BX 240 | SHLB $7, BX 241 | 242 | SHRQ $8, R9 // 13 243 | MOVB R9, CX 244 | SHLB $2, CX 245 | ORB CX, BX 246 | 247 | SHRQ $8, R9 // 14 248 | MOVB R9, AX 249 | SHRB $3, AX 250 | ORB AX, BX 251 | 252 | MOVB BX, 8(DX) 253 | 254 | MOVB R9, BX 255 | SHLB $5, BX 256 | 257 | SHRQ $8, R9 // 15 258 | ORB R9, BX 259 | 260 | MOVB BX, 9(DX) 261 | 262 | RET 263 | -------------------------------------------------------------------------------- /global_test.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestGlobal_Init(t *testing.T) { 9 | t.Run("sane", func(t *testing.T) { 10 | defer func() { 11 | if err := recover(); err != nil { 12 | t.Fatal("expected init to not panic") 13 | } 14 | }() 15 | 16 | // Must never panic. 17 | doInit() 18 | }) 19 | 20 | t.Run("panics", func(t *testing.T) { 21 | defer func() { 22 | err := recover() 23 | if err == nil { 24 | t.Fatal("expected init to panic") 25 | } 26 | 27 | if _, ok := err.(*PartitionPoolExhaustedError); !ok { 28 | t.Errorf("expected panic with type [%T], got [%T]", &PartitionPoolExhaustedError{}, err) 29 | return 30 | } 31 | }() 32 | 33 | // Theoretically impossible to happen but ensure that we cover all "potential" cases 34 | // where the global generator could fail to get constructed and we need to panic. 35 | // 36 | // At present only one branch even has an error return, so we simulate that... impossibility 37 | // by trying to create more Generators without snapshots than we have a Partition pool for. 38 | // Note that we are invoking doInit() instead of NewGenerator() directly. 39 | for i := 0; i < 2*MaxPartition; i++ { 40 | doInit() 41 | } 42 | }) 43 | } 44 | 45 | func TestGlobal_FromEncodedString_Valid(t *testing.T) { 46 | src := "brpk4q72xwf2m63l" 47 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 48 | 49 | actual, err := FromEncodedString(src) 50 | if err != nil { 51 | t.Fatal(err) 52 | } 53 | 54 | if actual != expected { 55 | t.Errorf("expected [%v], got [%v]", expected, actual) 56 | } 57 | } 58 | 59 | func TestGlobal_FromEncodedString_Invalid(t *testing.T) { 60 | _, err := FromEncodedString("012brpk4q72xwf2m63l1245453gfdgxz") 61 | 62 | if _, ok := err.(*InvalidDataSizeError); !ok { 63 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err) 64 | } 65 | 66 | if err != nil && err.Error() != errInvalidDataSizeMsg { 67 | t.Errorf("expected error [%s], got [%s]", errInvalidDataSizeMsg, err.Error()) 68 | } 69 | } 70 | 71 | func TestGlobal_FromEncodedBytes_Valid(t *testing.T) { 72 | src := []byte("brpk4q72xwf2m63l") 73 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 74 | 75 | actual, err := FromEncodedBytes(src) 76 | if err != nil { 77 | t.Fatal(err) 78 | } 79 | 80 | if actual != expected { 81 | t.Errorf("expected [%v], got [%v]", expected, actual) 82 | } 83 | } 84 | 85 | func TestGlobal_FromEncodedBytes_Invalid(t *testing.T) { 86 | _, err := FromEncodedBytes([]byte("012brpk4q72xwf2m63l1245453gfdgxz")) 87 | 88 | if _, ok := err.(*InvalidDataSizeError); !ok { 89 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err) 90 | } 91 | 92 | if err != nil && err.Error() != errInvalidDataSizeMsg { 93 | t.Errorf("expected error [%s], got [%s]", errInvalidDataSizeMsg, err.Error()) 94 | } 95 | } 96 | 97 | func TestGlobal_FromBinaryBytes_Valid(t *testing.T) { 98 | src := []byte{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 99 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 100 | 101 | actual, err := FromBinaryBytes(src) 102 | if err != nil { 103 | t.Fatal(err) 104 | } 105 | 106 | if actual != expected { 107 | t.Errorf("expected [%v], got [%v]", expected, actual) 108 | } 109 | } 110 | 111 | func TestGlobal_FromBinaryBytes_Invariant(t *testing.T) { 112 | expected := New(255) 113 | actual, err := FromBinaryBytes(expected[:]) 114 | if err != nil { 115 | t.Fatal(err) 116 | } 117 | 118 | if actual != expected { 119 | t.Errorf("expected [%v], got [%v]", expected, actual) 120 | } 121 | } 122 | 123 | func TestGlobal_FromBinaryBytes_Invalid(t *testing.T) { 124 | for _, c := range []struct { 125 | n int 126 | invalid bool 127 | }{ 128 | {4, true}, 129 | {8, true}, 130 | {10, false}, 131 | {12, true}, 132 | {16, true}, 133 | } { 134 | b := make([]byte, c.n) 135 | _, err := FromBinaryBytes(b) 136 | 137 | if actual, expected := err != nil, c.invalid; actual != expected { 138 | t.Errorf("expected error [%v], got [%v]", expected, actual) 139 | } 140 | } 141 | } 142 | 143 | func TestGlobal_Collection(t *testing.T) { 144 | var ids = []ID{{1}, {2}, {3}, {4}, {5}, {6}} 145 | 146 | t.Run("len", makeCollectionLenTest(ids)) 147 | t.Run("less", makeCollectionLessTest(ids)) 148 | t.Run("swap", makeCollectionSwapTest(ids)) 149 | t.Run("sort", makeCollectionSortTest(ids)) 150 | } 151 | 152 | func makeCollectionLenTest(ids []ID) func(t *testing.T) { 153 | n := len(ids) 154 | return func(t *testing.T) { 155 | if actual, expected := collection([]ID{}).Len(), 0; actual != expected { 156 | t.Errorf("Len() %v, want %v", expected, actual) 157 | } 158 | 159 | if actual, expected := collection(ids).Len(), n; actual != expected { 160 | t.Errorf("expected [%v], got [%v]", expected, actual) 161 | } 162 | } 163 | } 164 | 165 | func makeCollectionLessTest(ids []ID) func(t *testing.T) { 166 | return func(t *testing.T) { 167 | c := collection(ids) 168 | if c.Less(0, 0) { 169 | t.Errorf("expected [false], got [true]") 170 | } 171 | 172 | if !c.Less(0, 1) { 173 | t.Errorf("expected [true], got [false]") 174 | } 175 | 176 | if !c.Less(1, 2) { 177 | t.Errorf("expected [true], got [false]") 178 | } 179 | } 180 | } 181 | 182 | func makeCollectionSwapTest(ids []ID) func(t *testing.T) { 183 | return func(t *testing.T) { 184 | b := make([]ID, len(ids)) 185 | copy(b, ids) 186 | 187 | c := collection(b) 188 | c.Swap(1, 2) 189 | if actual, expected := c[1], ids[2]; actual != expected { 190 | t.Errorf("expected [%v], got [%v]", expected, actual) 191 | } 192 | if actual, expected := c[2], ids[1]; actual != expected { 193 | t.Errorf("expected [%v], got [%v]", expected, actual) 194 | } 195 | c.Swap(3, 3) 196 | if actual, expected := c[3], ids[3]; actual != expected { 197 | t.Errorf("expected [%v], got [%v]", expected, actual) 198 | } 199 | } 200 | } 201 | 202 | func makeCollectionSortTest(ids []ID) func(t *testing.T) { 203 | return func(t *testing.T) { 204 | src := make([]ID, len(ids)) 205 | copy(src, ids) 206 | 207 | // Input IDs are sorted, so a comparison will do the trick. 208 | src[2], src[1] = src[1], src[2] 209 | src[4], src[3] = src[3], src[4] 210 | 211 | Sort(src) 212 | 213 | if actual, expected := src, ids; !reflect.DeepEqual(actual, expected) { 214 | t.Errorf("expected [%v], got [%v]", expected, actual) 215 | } 216 | } 217 | } 218 | 219 | func TestGlobal_Zero(t *testing.T) { 220 | if actual := Zero(); actual != (ID{}) { 221 | t.Error("Zero() not equal to ID{}") 222 | } 223 | } 224 | 225 | func TestGlobal_Zero_IsZero(t *testing.T) { 226 | if !Zero().IsZero() { 227 | t.Error("Zero().IsZero() is not true") 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /benchmark/generation.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | crand "crypto/rand" 5 | mrand "math/rand" 6 | "sync" 7 | "testing" 8 | "time" 9 | 10 | "github.com/bwmarrin/snowflake" 11 | "github.com/celrenheit/sandflake" 12 | "github.com/gofrs/uuid" 13 | "github.com/lucsky/cuid" 14 | "github.com/muyo/sno" 15 | "github.com/oklog/ulid" 16 | "github.com/rs/xid" 17 | "github.com/segmentio/ksuid" 18 | "github.com/sony/sonyflake" 19 | ) 20 | 21 | func benchmarkGeneration(b *testing.B) { 22 | println("\n-- Generation (sequential) -------------------------------------------------------------------\n") 23 | b.Run("s", benchmarkGenerateSequential) 24 | println("\n-- Generation (parallel) ---------------------------------------------------------------------\n") 25 | b.Run("p", benchmarkGenerateParallel) 26 | } 27 | 28 | func benchmarkGenerateSequential(b *testing.B) { 29 | b.Run("sno", benchmarkGenerateSequentialSno) // Bounded 30 | b.Run("xid", benchmarkGenerateSequentialXid) // Unbounded 31 | b.Run("snowflake", benchmarkGenerateSequentialSnowflake) // Bounded 32 | b.Run("sonyflake", benchmarkGenerateSequentialSonyflake) // Bounded 33 | b.Run("sandflake", benchmarkGenerateSequentialSandflake) // Unbounded 34 | b.Run("cuid", benchmarkGenerateSequentialCuid) // Unbounded 35 | b.Run("uuid", benchmarkGenerateSequentialUUID) // Unbounded 36 | b.Run("ulid", benchmarkGenerateSequentialULID) // Unbounded 37 | b.Run("ksuid", benchmarkGenerateSequentialKSUID) // Unbounded 38 | } 39 | 40 | func benchmarkGenerateParallel(b *testing.B) { 41 | b.Run("sno", benchmarkGenerateParallelSno) // Bounded 42 | b.Run("xid", benchmarkGenerateParallelXid) // Unbounded 43 | b.Run("snowflake", benchmarkGenerateParallelSnowflake) // Bounded 44 | b.Run("sonyflake", benchmarkGenerateParallelSonyflake) // Bounded 45 | b.Run("sandflake", benchmarkGenerateParallelSandflake) // Unbounded 46 | b.Run("cuid", benchmarkGenerateParallelCuid) // Unbounded 47 | b.Run("uuid", benchmarkGenerateParallelUUID) // Unbounded 48 | b.Run("ulid", benchmarkGenerateParallelULID) // Unbounded 49 | b.Run("ksuid", benchmarkGenerateParallelKSUID) // Unbounded 50 | } 51 | 52 | func benchmarkGenerateSequentialSno(b *testing.B) { 53 | for i := 0; i < b.N; i++ { 54 | _ = sno.New(255) 55 | } 56 | } 57 | 58 | func benchmarkGenerateSequentialXid(b *testing.B) { 59 | for i := 0; i < b.N; i++ { 60 | _ = xid.New() 61 | } 62 | } 63 | 64 | func benchmarkGenerateSequentialSnowflake(b *testing.B) { 65 | n, _ := snowflake.NewNode(255) 66 | b.ResetTimer() 67 | 68 | for i := 0; i < b.N; i++ { 69 | _ = n.Generate() 70 | } 71 | } 72 | 73 | func benchmarkGenerateSequentialSonyflake(b *testing.B) { 74 | g := sonyflake.NewSonyflake(sonyflake.Settings{}) 75 | b.ResetTimer() 76 | 77 | for i := 0; i < b.N; i++ { 78 | _, _ = g.NextID() 79 | } 80 | } 81 | 82 | func benchmarkGenerateSequentialSandflake(b *testing.B) { 83 | var g sandflake.Generator 84 | b.ResetTimer() 85 | 86 | for i := 0; i < b.N; i++ { 87 | _ = g.Next() 88 | } 89 | } 90 | 91 | func benchmarkGenerateSequentialCuid(b *testing.B) { 92 | for i := 0; i < b.N; i++ { 93 | _ = cuid.New() 94 | } 95 | } 96 | 97 | func benchmarkGenerateSequentialUUID(b *testing.B) { 98 | b.Run("v1", benchmarkGenerateSequentialUUIDv1) 99 | b.Run("v4", benchmarkGenerateSequentialUUIDv4) 100 | } 101 | 102 | func benchmarkGenerateSequentialUUIDv1(b *testing.B) { 103 | for i := 0; i < b.N; i++ { 104 | _, _ = uuid.NewV1() 105 | } 106 | } 107 | 108 | func benchmarkGenerateSequentialUUIDv4(b *testing.B) { 109 | for i := 0; i < b.N; i++ { 110 | _, _ = uuid.NewV4() 111 | } 112 | } 113 | 114 | // A note about the included ULID runs. 115 | // 116 | // ULIDs generators expect time to be passed in as a timestamp with msec precision. All of the other 117 | // libraries being tested handle time sourcing themselves, which is reflected in their results. 118 | // Therefore the time fetching (via time.Now()) including the unit conversion (via ulid.Timestamp()) 119 | // is included in each iteration. If the time had been fetched outside the benchmark loop, the results 120 | // would be roughly 7nsec/op lower (@go 1.14.1, Windows 10, i7 4770k 4.4GHz). 121 | // 122 | // The ULID package benchmarks itself when no entropy source is provided, which in a run resulted 123 | // at 29.8ns/op (relative to unbounded Sno at 8.8ns/op, for reference). However, this test is 124 | // excluded in this benchmark. While it may measure ULID's raw overhead, it does not measure 125 | // a end-user usable case since ULIDs without entropy are essentially a 48bit timestamp and... 126 | // 10 zero bytes, which defeats the purpose of the spec. 127 | func benchmarkGenerateSequentialULID(b *testing.B) { 128 | b.Run("crypto", benchmarkGenerateSequentialULIDCrypto) 129 | b.Run("math", benchmarkSequentialNewULIDMath) 130 | } 131 | 132 | func benchmarkGenerateSequentialULIDCrypto(b *testing.B) { 133 | rng := crand.Reader 134 | b.ResetTimer() 135 | 136 | for i := 0; i < b.N; i++ { 137 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng) 138 | } 139 | } 140 | 141 | func benchmarkSequentialNewULIDMath(b *testing.B) { 142 | rng := mrand.New(mrand.NewSource(time.Now().UnixNano())) 143 | b.ResetTimer() 144 | 145 | for i := 0; i < b.N; i++ { 146 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng) 147 | } 148 | } 149 | 150 | func benchmarkGenerateSequentialKSUID(b *testing.B) { 151 | for i := 0; i < b.N; i++ { 152 | _, _ = ksuid.NewRandom() 153 | } 154 | } 155 | 156 | func benchmarkGenerateParallelSno(b *testing.B) { 157 | b.RunParallel(func(pb *testing.PB) { 158 | for pb.Next() { 159 | _ = sno.New(255) 160 | } 161 | }) 162 | } 163 | 164 | func benchmarkGenerateParallelXid(b *testing.B) { 165 | b.RunParallel(func(pb *testing.PB) { 166 | for pb.Next() { 167 | _ = xid.New() 168 | } 169 | }) 170 | } 171 | 172 | func benchmarkGenerateParallelSnowflake(b *testing.B) { 173 | n, _ := snowflake.NewNode(255) 174 | b.ResetTimer() 175 | 176 | b.RunParallel(func(pb *testing.PB) { 177 | for pb.Next() { 178 | _ = n.Generate() 179 | } 180 | }) 181 | } 182 | 183 | func benchmarkGenerateParallelSonyflake(b *testing.B) { 184 | g := sonyflake.NewSonyflake(sonyflake.Settings{}) 185 | b.ResetTimer() 186 | 187 | b.RunParallel(func(pb *testing.PB) { 188 | for pb.Next() { 189 | _, _ = g.NextID() 190 | } 191 | }) 192 | } 193 | 194 | func benchmarkGenerateParallelSandflake(b *testing.B) { 195 | var g sandflake.Generator 196 | b.ResetTimer() 197 | 198 | b.RunParallel(func(pb *testing.PB) { 199 | for pb.Next() { 200 | _ = g.Next() 201 | } 202 | }) 203 | } 204 | 205 | func benchmarkGenerateParallelCuid(b *testing.B) { 206 | b.RunParallel(func(pb *testing.PB) { 207 | for pb.Next() { 208 | _ = cuid.New() 209 | } 210 | }) 211 | } 212 | 213 | func benchmarkGenerateParallelUUID(b *testing.B) { 214 | b.Run("v1", benchmarkGenerateParallelUUIDv1) 215 | b.Run("v4", benchmarkGenerateParallelUUIDv4) 216 | } 217 | 218 | func benchmarkGenerateParallelUUIDv1(b *testing.B) { 219 | b.RunParallel(func(pb *testing.PB) { 220 | for pb.Next() { 221 | _, _ = uuid.NewV1() 222 | } 223 | }) 224 | } 225 | 226 | func benchmarkGenerateParallelUUIDv4(b *testing.B) { 227 | b.RunParallel(func(pb *testing.PB) { 228 | for pb.Next() { 229 | _, _ = uuid.NewV4() 230 | } 231 | }) 232 | } 233 | 234 | func benchmarkGenerateParallelULID(b *testing.B) { 235 | b.Run("crypto", benchmarkGenerateParallelULIDCrypto) 236 | b.Run("math", benchmarkGenerateParallelULIDMath) 237 | } 238 | 239 | func benchmarkGenerateParallelULIDCrypto(b *testing.B) { 240 | rng := crand.Reader 241 | b.ResetTimer() 242 | 243 | b.RunParallel(func(pb *testing.PB) { 244 | for pb.Next() { 245 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng) 246 | } 247 | }) 248 | } 249 | 250 | func benchmarkGenerateParallelULIDMath(b *testing.B) { 251 | // Note: Requires manual locking for this run to complete. 252 | rng := mrand.New(mrand.NewSource(time.Now().UnixNano())) 253 | mu := sync.Mutex{} 254 | b.ResetTimer() 255 | 256 | b.RunParallel(func(pb *testing.PB) { 257 | for pb.Next() { 258 | mu.Lock() 259 | _, _ = ulid.New(ulid.Timestamp(time.Now()), rng) 260 | mu.Unlock() 261 | } 262 | }) 263 | } 264 | 265 | func benchmarkGenerateParallelKSUID(b *testing.B) { 266 | b.RunParallel(func(pb *testing.PB) { 267 | for pb.Next() { 268 | _, _ = ksuid.NewRandom() 269 | } 270 | }) 271 | } 272 | -------------------------------------------------------------------------------- /id.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import ( 4 | "bytes" 5 | "database/sql/driver" 6 | "encoding/binary" 7 | "time" 8 | "unsafe" 9 | 10 | "github.com/muyo/sno/internal" 11 | ) 12 | 13 | const ( 14 | // SizeBinary is the length of an ID in its binary array representation. 15 | SizeBinary = 10 16 | 17 | // SizeEncoded is the length of an ID in its canonical base-32 encoded representation. 18 | SizeEncoded = 16 19 | 20 | // Epoch is the offset to the Unix epoch, in seconds, that ID timestamps are embedded with. 21 | // Corresponds to 2010-01-01 00:00:00 UTC. 22 | Epoch = 1262304000 23 | epochNsec = Epoch * 1e9 24 | 25 | // TimeUnit is the time unit timestamps are embedded with - 4msec, as expressed in nanoseconds. 26 | TimeUnit = 4e6 27 | 28 | // MaxTimestamp is the max number of time units that can be embedded in an ID's timestamp. 29 | // Corresponds to 2079-09-07 15:47:35.548 UTC in our custom epoch. 30 | MaxTimestamp = 1<<39 - 1 31 | 32 | // MaxPartition is the max Partition number when represented as a uint16. 33 | // It equals max uint16 (65535) and is the equivalent of Partition{255, 255}. 34 | MaxPartition = 1<<16 - 1 35 | 36 | // MaxSequence is the max sequence number supported by generators. As bounds can be set 37 | // individually - this is the upper cap and equals max uint16 (65535). 38 | MaxSequence = 1<<16 - 1 39 | ) 40 | 41 | // ID is the binary representation of a sno ID. 42 | // 43 | // It is comprised of 10 bytes in 2 blocks of 40 bits, with its components stored in big-endian order. 44 | // 45 | // The timestamp: 46 | // 39 bits - unsigned milliseconds since epoch with a 4msec resolution 47 | // 1 bit - the tick-tock toggle 48 | // 49 | // The payload: 50 | // 8 bits - metabyte 51 | // 16 bits - partition 52 | // 16 bits - sequence 53 | // 54 | type ID [SizeBinary]byte 55 | 56 | // Time returns the timestamp of the ID as a time.Time struct. 57 | func (id ID) Time() time.Time { 58 | var ( 59 | units = int64(binary.BigEndian.Uint64(id[:]) >> 25) 60 | s = units/250 + Epoch 61 | ns = (units % 250) * TimeUnit 62 | ) 63 | 64 | return time.Unix(s, ns) 65 | } 66 | 67 | // Timestamp returns the timestamp of the ID as nanoseconds relative to the Unix epoch. 68 | func (id ID) Timestamp() int64 { 69 | return int64(binary.BigEndian.Uint64(id[:])>>25)*TimeUnit + epochNsec 70 | } 71 | 72 | // Meta returns the metabyte of the ID. 73 | func (id ID) Meta() byte { 74 | return id[5] 75 | } 76 | 77 | // Partition returns the partition of the ID. 78 | func (id ID) Partition() Partition { 79 | return Partition{id[6], id[7]} 80 | } 81 | 82 | // Sequence returns the sequence of the ID. 83 | func (id ID) Sequence() uint16 { 84 | return uint16(id[8])<<8 | uint16(id[9]) 85 | } 86 | 87 | // IsZero checks whether the ID is a zero value. 88 | func (id ID) IsZero() bool { 89 | return id == zero 90 | } 91 | 92 | // String implements fmt.Stringer by returning the base32-encoded representation of the ID 93 | // as a string. 94 | func (id ID) String() string { 95 | enc := internal.Encode((*[10]byte)(&id)) 96 | dst := enc[:] 97 | 98 | return *(*string)(unsafe.Pointer(&dst)) 99 | } 100 | 101 | // Bytes returns the ID as a byte slice. 102 | func (id ID) Bytes() []byte { 103 | return id[:] 104 | } 105 | 106 | // MarshalBinary implements encoding.BinaryMarshaler by returning the ID as a byte slice. 107 | func (id ID) MarshalBinary() ([]byte, error) { 108 | return id[:], nil 109 | } 110 | 111 | // UnmarshalBinary implements encoding.BinaryUnmarshaler by copying src into the receiver. 112 | func (id *ID) UnmarshalBinary(src []byte) error { 113 | if len(src) != SizeBinary { 114 | return &InvalidDataSizeError{Size: len(src)} 115 | } 116 | 117 | copy(id[:], src) 118 | 119 | return nil 120 | } 121 | 122 | // MarshalText implements encoding.TextMarshaler by returning the base32-encoded representation 123 | // of the ID as a byte slice. 124 | func (id ID) MarshalText() ([]byte, error) { 125 | b := internal.Encode((*[10]byte)(&id)) 126 | 127 | return b[:], nil 128 | } 129 | 130 | // UnmarshalText implements encoding.TextUnmarshaler by decoding a base32-encoded representation 131 | // of the ID from src into the receiver. 132 | func (id *ID) UnmarshalText(src []byte) error { 133 | if len(src) != SizeEncoded { 134 | return &InvalidDataSizeError{Size: len(src)} 135 | } 136 | 137 | *id = internal.Decode(src) 138 | 139 | return nil 140 | } 141 | 142 | // MarshalJSON implements encoding.json.Marshaler by returning the base32-encoded and quoted 143 | // representation of the ID as a byte slice. 144 | // 145 | // If the ID is a zero value, MarshalJSON will return a byte slice containing 'null' (unquoted) instead. 146 | // 147 | // Note that ID's are byte arrays and Go's std (un)marshaler is unable to distinguish 148 | // the zero values of custom structs as "empty", so the 'omitempty' tag has the same caveats 149 | // as, for example, time.Time. 150 | // 151 | // See https://github.com/golang/go/issues/11939 for tracking purposes as changes are being 152 | // discussed. 153 | func (id ID) MarshalJSON() ([]byte, error) { 154 | if id == zero { 155 | return []byte("null"), nil 156 | } 157 | 158 | dst := []byte("\" \"") 159 | enc := internal.Encode((*[10]byte)(&id)) 160 | copy(dst[1:], enc[:]) 161 | 162 | return dst, nil 163 | } 164 | 165 | // UnmarshalJSON implements encoding.json.Unmarshaler by decoding a base32-encoded and quoted 166 | // representation of an ID from src into the receiver. 167 | // 168 | // If the byte slice is an unquoted 'null', the receiving ID will instead be set 169 | // to a zero ID. 170 | func (id *ID) UnmarshalJSON(src []byte) error { 171 | n := len(src) 172 | if n != SizeEncoded+2 { 173 | if n == 4 && src[0] == 'n' && src[1] == 'u' && src[2] == 'l' && src[3] == 'l' { 174 | *id = zero 175 | return nil 176 | } 177 | 178 | return &InvalidDataSizeError{Size: n} 179 | } 180 | 181 | *id = internal.Decode(src[1 : n-1]) 182 | 183 | return nil 184 | } 185 | 186 | // Compare returns an integer comparing this and that ID lexicographically. 187 | // 188 | // Returns: 189 | // 0 - if this and that are equal, 190 | // -1 - if this is smaller than that, 191 | // 1 - if this is greater than that. 192 | // 193 | // Note that IDs are byte arrays - if all you need is to check for equality, a simple... 194 | // if thisID == thatID {...} 195 | // ... will do the trick. 196 | func (id ID) Compare(that ID) int { 197 | return bytes.Compare(id[:], that[:]) 198 | } 199 | 200 | // Value implements the sql.driver.Valuer interface by returning the ID as a byte slice. 201 | // If you'd rather receive a string, wrapping an ID is a possible solution... 202 | // 203 | // // stringedID wraps a sno ID to provide a driver.Valuer implementation which 204 | // // returns strings. 205 | // type stringedID sno.ID 206 | // 207 | // func (id stringedID) Value() (driver.Value, error) { 208 | // return sno.ID(id).String(), nil 209 | // } 210 | // 211 | // // ... and use it via: 212 | // db.Exec(..., stringedID(id)) 213 | func (id ID) Value() (driver.Value, error) { 214 | return id.MarshalBinary() 215 | } 216 | 217 | // Scan implements the sql.Scanner interface by attempting to convert the given value 218 | // into an ID. 219 | // 220 | // When given a byte slice: 221 | // - with a length of SizeBinary (10), its contents will be copied into ID. 222 | // - with a length of 0, ID will be set to a zero ID. 223 | // - with any other length, sets ID to a zero ID and returns InvalidDataSizeError. 224 | // 225 | // When given a string: 226 | // - with a length of SizeEncoded (16), its contents will be decoded into ID. 227 | // - with a length of 0, ID will be set to a zero ID. 228 | // - with any other length, sets ID to a zero ID and returns InvalidDataSizeError. 229 | // 230 | // When given nil, ID will be set to a zero ID. 231 | // 232 | // When given any other type, returns a InvalidTypeError. 233 | func (id *ID) Scan(value interface{}) error { 234 | switch v := value.(type) { 235 | case []byte: 236 | switch len(v) { 237 | case SizeBinary: 238 | copy(id[:], v) 239 | case 0: 240 | *id = zero 241 | default: 242 | *id = zero 243 | return &InvalidDataSizeError{Size: len(v)} 244 | } 245 | 246 | case string: 247 | switch len(v) { 248 | case SizeEncoded: 249 | *id = internal.Decode(*(*[]byte)(unsafe.Pointer(&v))) 250 | case 0: 251 | *id = zero 252 | default: 253 | *id = zero 254 | return &InvalidDataSizeError{Size: len(v)} 255 | } 256 | 257 | case nil: 258 | *id = zero 259 | 260 | default: 261 | return &InvalidTypeError{Value: value} 262 | } 263 | 264 | return nil 265 | } 266 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | Running the benchmark yourself: 4 | 5 | ``` 6 | go test -run=^$ -bench=. -benchmem 7 | ``` 8 | 9 | ## Results 10 | 11 | Platform: `Go 1.14.1 | i7 4770K (Haswell; 4 physical, 8 logical cores) @ 4.4GHz | Win 10`, ran on `2020/04/06`. 12 | 13 | All libraries being compared are listed as ➜ [Alternatives](../README.md#alternatives) in the root package. 14 | 15 |
16 | 17 | ### Generation 18 | 19 | These results must **not** be taken for their raw numbers. See the explanation 20 | (primarily about the `unbounded` suffix) afterwards. 21 | 22 | **Sequential** 23 | ``` 24 | sno/unbounded 136208883 8.80 ns/op 0 B/op 0 allocs/op 25 | xid 59964620 19.4 ns/op 0 B/op 0 allocs/op 26 | uuid/v1 33327685 36.3 ns/op 0 B/op 0 allocs/op 27 | ulid/math 23083492 50.3 ns/op 16 B/op 1 allocs/op 28 | sno/bounded 21022425 61.0 ns/op 0 B/op 0 allocs/op 29 | ulid/crypto 5797293 204 ns/op 16 B/op 1 allocs/op 30 | uuid/v4 5660026 205 ns/op 16 B/op 1 allocs/op 31 | ksuid 5430244 206 ns/op 0 B/op 0 allocs/op 32 | sandflake 5427452 224 ns/op 3 B/op 1 allocs/op 33 | snowflake 4917784 244 ns/op 0 B/op 0 allocs/op 34 | cuid 3507404 342 ns/op 55 B/op 4 allocs/op 35 | sonyflake 31000 38938 ns/op 0 B/op 0 allocs/op 36 | ``` 37 | 38 | **Parallel** (8 threads) 39 | 40 | ``` 41 | sno/unbounded 65161461 17.8 ns/op 0 B/op 0 allocs/op 42 | xid 63163545 18.1 ns/op 0 B/op 0 allocs/op 43 | sno/bounded 21022425 61.0 ns/op 0 B/op 0 allocs/op 44 | uuid/v1 8695777 137 ns/op 0 B/op 0 allocs/op 45 | uuid/v4 7947076 151 ns/op 16 B/op 1 allocs/op 46 | ulid/crypto 7947030 151 ns/op 16 B/op 1 allocs/op 47 | sandflake 6521745 184 ns/op 3 B/op 1 allocs/op 48 | ulid/math 5825053 206 ns/op 16 B/op 1 allocs/op 49 | snowflake 4917774 244 ns/op 0 B/op 0 allocs/op 50 | ksuid 3692324 316 ns/op 0 B/op 0 allocs/op 51 | cuid 3200022 371 ns/op 55 B/op 4 allocs/op 52 | sonyflake 30896 38740 ns/op 0 B/op 0 allocs/op 53 | ``` 54 | 55 | **Snowflakes** 56 | 57 | What does `unbounded` mean? [xid], for example, is unbounded, i.e. it does not prevent you from generating more IDs 58 | than it has a pool for (nor does it account for time). In other words - at high enough throughput you simply and 59 | silently start overwriting already generated IDs. *Realistically* you are not going to fill its pool of 60 | 16,777,216 items per second. But it does reflect in synthetic benchmarks. [Sandflake] does not bind nor handle clock 61 | drifts either. In both cases their results are `WYSIWYG`. 62 | 63 | The implementations that do bind, approach this issue (and clock drifts) differently. [Sonyflake] goes to sleep, 64 | [Snowflake] spins aggressively to get the OS time. **sno**, when about to overflow, starts a single timer and 65 | locks all overflowing requests on a condition, waking them up when the sequence resets, i.e. time changes. 66 | 67 | Both of the above are edge cases, *realistically* - Go's benchmarks happen to saturate the capacities and hit 68 | those cases. **Most of the time what you get is the unbounded overhead**. Expect said overhead of the 69 | implementations to be considerably lower, but still higher than [xid] and **sno** due to their locking nature. 70 | Similarily, expect some of the generation calls to **sno** to be considerably *slower* when they drop into an 71 | edge case branch, but still very much in the same logarithmic ballpark. 72 | 73 | Note: the `61.0ns/op` is our **throughput upper bound** - `1s / 61.0 ns`, yields `16,393,442`. It's an imprecise 74 | measure, but it actually reflects `16,384,000` - our pool per second. If you shrink that capacity using custom 75 | sequence bounds, that number - `61.0ns/op` - will start growing exponentially, but only if/as your burst through 76 | the available capacity. 77 | 78 | [Sonyflake], for example, is limited to 256 IDs per 10msec (25 600 per second), which is why its numbers *appear* so 79 | high - and why the comparison has a disclaimer. 80 | 81 | **`sno/unbounded`** 82 | 83 | In order to get the `unbounded` results in **sno**'s case, `Generator.New()` must be modified locally 84 | and the... 85 | ``` 86 | if g.seqMax >= seq {...} 87 | ``` 88 | ...condition removed. 89 | 90 | **Entropy** 91 | 92 | All entropy-based implementations lock - and will naturally be slower as they need to read from a entropy source and 93 | have more bits to fiddle with. [ULID] implementation required manual locking of rand.Reader for the parallel test. 94 | 95 | 96 |

97 | 98 | ### Encoding/decoding 99 | 100 | The comparisons below are preceded by some baseline measures for sno relative to std's base32 package 101 | as a reference. 102 | 103 | - `sno/vector` - amd64 SIMD code, 104 | - `sno/scalar` - assembly based fallback on amd64 without SIMD, 105 | - `sno/pure-go` - non-assembly, pure Go implementation used by sno on non-amd64 platforms. 106 | 107 | The actual comparison results utilized `sno/vector` in our case, but `sno/pure-go` - albeit slower - 108 | places just as high. 109 | 110 | **Excluded** 111 | - [Sonyflake] has no canonical encoding; 112 | - [cuid] is base36 only (no binary representation); 113 | 114 | **Notes** 115 | - Expect JSON (un)marshal performance to be nearly identical in most if not all cases; 116 | 117 | 118 | #### Encoding 119 | 120 | **Baseline** 121 | ``` 122 | sno/vector 2000000000 0.85 ns/op 0 B/op 0 allocs/op 123 | sno/scalar 1000000000 2.21 ns/op 0 B/op 0 allocs/op 124 | sno/pure-go 1000000000 2.70 ns/op 0 B/op 0 allocs/op 125 | std 30000000 12.5 ns/op 0 B/op 0 allocs/op 126 | ``` 127 | 128 | **Comparison** 129 | 130 | ``` 131 | sno 963900753 1.18 ns/op 0 B/op 0 allocs/op 132 | xid 240481202 4.94 ns/op 0 B/op 0 allocs/op 133 | ulid 211640920 5.67 ns/op 0 B/op 0 allocs/op 134 | snowflake 71941237 16.5 ns/op 32 B/op 1 allocs/op 135 | sandflake 58868926 21.5 ns/op 32 B/op 1 allocs/op 136 | uuid/v4 55494362 22.1 ns/op 48 B/op 1 allocs/op 137 | uuid/v1 51785808 22.2 ns/op 48 B/op 1 allocs/op 138 | ksuid 19672356 54.7 ns/op 0 B/op 0 allocs/op 139 | ``` 140 | 141 | Using: `String()`, provided by all packages. 142 | 143 | 144 | #### Decoding 145 | 146 | **Baseline** 147 | ``` 148 | sno/vector 2000000000 1.02 ns/op 0 B/op 0 allocs/op 149 | sno/scalar 500000000 2.41 ns/op 0 B/op 0 allocs/op 150 | sno/pure-go 500000000 2.79 ns/op 0 B/op 0 allocs/op 151 | std 50000000 31.8 ns/op 0 B/op 0 allocs/op 152 | ``` 153 | 154 | **Comparison** 155 | 156 | ``` 157 | sno 863313699 1.30 ns/op 0 B/op 0 allocs/op 158 | ulid 239884370 4.98 ns/op 0 B/op 0 allocs/op 159 | xid 156291760 7.62 ns/op 0 B/op 0 allocs/op 160 | snowflake 127603538 9.32 ns/op 0 B/op 0 allocs/op 161 | uuid/v1 30000150 35.7 ns/op 48 B/op 1 allocs/op 162 | uuid/v4 30000300 35.7 ns/op 48 B/op 1 allocs/op 163 | ksuid 27908728 37.5 ns/op 0 B/op 0 allocs/op 164 | sandflake 25533001 40.6 ns/op 32 B/op 1 allocs/op 165 | ``` 166 | 167 | Using: `sno.FromEncodedString`, `ulid.Parse`, `xid.FromString`, `snowflake.ParseString`, `sandflake.Parse`, `ksuid.Parse`, 168 | `uuid.FromString` 169 | 170 | 171 | [UUID]: https://github.com/gofrs/uuid 172 | [KSUID]: https://github.com/segmentio/ksuid 173 | [cuid]: https://github.com/lucsky/cuid 174 | [Snowflake]: https://github.com/bwmarrin/snowflake 175 | [Sonyflake]: https://github.com/sony/sonyflake 176 | [Sandflake]: https://github.com/celrenheit/sandflake 177 | [ULID]: https://github.com/oklog/ulid 178 | [xid]: https://github.com/rs/xid -------------------------------------------------------------------------------- /id_test.go: -------------------------------------------------------------------------------- 1 | package sno 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "reflect" 7 | "sync/atomic" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | func TestID_Time(t *testing.T) { 13 | tn := time.Now() 14 | id := New(255) 15 | 16 | // As we prune the fraction, actual cmp needs to be adjusted. This *may* also fail 17 | // in the rare condition that a new timeframe started between time.Now() and New() 18 | // since we're not using a deterministic time source currently. 19 | expected := tn.UnixNano() / TimeUnit 20 | actual := id.Time().UnixNano() / TimeUnit 21 | 22 | if actual != expected { 23 | t.Errorf("expected [%v], got [%v]", expected, actual) 24 | } 25 | 26 | id = NewWithTime(255, tn) 27 | actual = id.Time().UnixNano() / TimeUnit 28 | 29 | if actual != expected { 30 | t.Errorf("expected [%v], got [%v]", expected, actual) 31 | } 32 | } 33 | 34 | func TestID_Timestamp(t *testing.T) { 35 | tn := time.Now() 36 | id := New(255) 37 | 38 | expected := tn.UnixNano() / TimeUnit * TimeUnit // Drop precision for the comparison. 39 | actual := id.Timestamp() 40 | 41 | if actual != expected { 42 | t.Errorf("expected [%v], got [%v]", expected, actual) 43 | } 44 | 45 | id = NewWithTime(255, tn) 46 | actual = id.Timestamp() 47 | 48 | if actual != expected { 49 | t.Errorf("expected [%v], got [%v]", expected, actual) 50 | } 51 | } 52 | 53 | func TestID_Meta(t *testing.T) { 54 | var expected byte = 255 55 | id := New(expected) 56 | actual := id.Meta() 57 | 58 | if actual != expected { 59 | t.Errorf("expected [%v], got [%v]", expected, actual) 60 | } 61 | } 62 | 63 | func TestID_Partition(t *testing.T) { 64 | expected := generator.Partition() 65 | actual := generator.New(255).Partition() 66 | 67 | if actual != expected { 68 | t.Errorf("expected [%v], got [%v]", expected, actual) 69 | } 70 | } 71 | 72 | func TestID_Sequence(t *testing.T) { 73 | expected := atomic.LoadUint32(&generator.seq) + 1 74 | actual := generator.New(255).Sequence() 75 | 76 | if actual != uint16(expected) { 77 | t.Errorf("expected [%v], got [%v]", expected, actual) 78 | } 79 | } 80 | 81 | func TestID_String(t *testing.T) { 82 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 83 | expected := "brpk4q72xwf2m63l" 84 | actual := src.String() 85 | 86 | if actual != expected { 87 | t.Errorf("expected [%s], got [%s]", expected, actual) 88 | } 89 | } 90 | 91 | func TestID_Bytes(t *testing.T) { 92 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 93 | expected := make([]byte, SizeBinary) 94 | copy(expected, src[:]) 95 | 96 | actual := src.Bytes() 97 | if !bytes.Equal(actual, expected) { 98 | t.Errorf("expected [%s], got [%s]", expected, actual) 99 | } 100 | 101 | actual[SizeBinary-1]++ 102 | if bytes.Equal(expected, actual) { 103 | t.Error("returned a reference to underlying array") 104 | } 105 | } 106 | 107 | func TestID_MarshalText(t *testing.T) { 108 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 109 | expected := []byte("brpk4q72xwf2m63l") 110 | 111 | actual, err := src.MarshalText() 112 | if err != nil { 113 | t.Fatal(err) 114 | } 115 | 116 | if !bytes.Equal(actual, expected) { 117 | t.Errorf("expected [%s], got [%s]", expected, actual) 118 | } 119 | } 120 | 121 | func TestID_UnmarshalText_Valid(t *testing.T) { 122 | actual := ID{} 123 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 124 | 125 | if err := actual.UnmarshalText([]byte("brpk4q72xwf2m63l")); err != nil { 126 | t.Fatal(err) 127 | } 128 | 129 | if actual != expected { 130 | t.Errorf("expected [%s], got [%s]", expected, actual) 131 | } 132 | } 133 | 134 | func TestID_UnmarshalText_Invalid(t *testing.T) { 135 | id := ID{} 136 | err := id.UnmarshalText([]byte("012brpk4q72xwf2m63l1245453gfdgxz")) 137 | 138 | if _, ok := err.(*InvalidDataSizeError); !ok { 139 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err) 140 | } 141 | } 142 | 143 | func TestID_MarshalJSON_Valid(t *testing.T) { 144 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 145 | expected := []byte("\"brpk4q72xwf2m63l\"") 146 | 147 | actual, err := src.MarshalJSON() 148 | if err != nil { 149 | t.Fatal(err) 150 | } 151 | 152 | if !bytes.Equal(actual, expected) { 153 | t.Errorf("expected [%s], got [%s]", expected, actual) 154 | } 155 | } 156 | 157 | func TestID_MarshalJSON_Null(t *testing.T) { 158 | src := ID{} 159 | expected := []byte("null") 160 | actual, err := src.MarshalJSON() 161 | if err != nil { 162 | t.Fatal(err) 163 | } 164 | 165 | if !bytes.Equal(actual, expected) { 166 | t.Errorf("expected [%s], got [%s]", expected, actual) 167 | } 168 | } 169 | 170 | func TestID_UnmarshalJSON_Valid(t *testing.T) { 171 | actual := ID{} 172 | expected := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 173 | 174 | if err := actual.UnmarshalJSON([]byte("\"brpk4q72xwf2m63l\"")); err != nil { 175 | t.Fatal(err) 176 | } 177 | 178 | if actual != expected { 179 | t.Errorf("expected [%s], got [%s]", expected, actual) 180 | } 181 | } 182 | 183 | func TestID_UnmarshalJSON_Invalid(t *testing.T) { 184 | id := ID{} 185 | err := id.UnmarshalJSON([]byte("\"012brpk4q72xwf2m63l1245453gfdgxz\"")) 186 | 187 | if _, ok := err.(*InvalidDataSizeError); !ok { 188 | t.Errorf("expected error with type [%T], got [%T]", &InvalidDataSizeError{}, err) 189 | } 190 | 191 | if err != nil && err.Error() != errInvalidDataSizeMsg { 192 | t.Errorf("expected error [%s], got [%s]", errInvalidDataSizeMsg, err.Error()) 193 | } 194 | } 195 | 196 | func TestID_UnmarshalJSON_Null(t *testing.T) { 197 | actual := ID{} 198 | expected := ID{} 199 | 200 | if err := actual.UnmarshalJSON([]byte("null")); err != nil { 201 | t.Fatal(err) 202 | } 203 | 204 | if actual != expected { 205 | t.Errorf("expected [%s], got [%s]", expected, actual) 206 | } 207 | } 208 | 209 | func TestID_IsZero(t *testing.T) { 210 | for _, c := range []struct { 211 | id ID 212 | want bool 213 | }{ 214 | { 215 | id: New(255), 216 | want: false, 217 | }, 218 | { 219 | id: ID{}, 220 | want: true, 221 | }, 222 | } { 223 | if actual, expected := c.id.IsZero(), c.want; actual != expected { 224 | t.Errorf("expected [%v], got [%v]", expected, actual) 225 | } 226 | } 227 | } 228 | 229 | func TestID_Compare(t *testing.T) { 230 | a := New(100) 231 | l := a 232 | l[5]++ 233 | e := a 234 | b := a 235 | b[5]-- 236 | 237 | if actual := a.Compare(l); actual != -1 { 238 | t.Errorf("expected [-1], got [%d]", actual) 239 | } 240 | 241 | if actual := a.Compare(e); actual != 0 { 242 | t.Errorf("expected [0], got [%d]", actual) 243 | } 244 | 245 | if actual := a.Compare(b); actual != 1 { 246 | t.Errorf("expected [1], got [%d]", actual) 247 | } 248 | } 249 | 250 | func TestID_Value(t *testing.T) { 251 | src := ID{78, 111, 33, 96, 160, 255, 154, 10, 16, 51} 252 | expected := make([]byte, SizeBinary) 253 | copy(expected, src[:]) 254 | 255 | v, err := src.Value() 256 | if err != nil { 257 | t.Errorf("got unexpected error: %s", err) 258 | } 259 | 260 | actual, ok := v.([]byte) 261 | if !ok { 262 | t.Errorf("expected type [%T], got [%T]", expected, actual) 263 | } 264 | 265 | if !bytes.Equal(actual, expected) { 266 | t.Errorf("expected [%s], got [%s]", expected, actual) 267 | } 268 | 269 | actual[SizeBinary-1]++ 270 | if bytes.Equal(expected, actual) { 271 | t.Error("returned a reference to underlying array") 272 | } 273 | } 274 | 275 | func TestID_Scan(t *testing.T) { 276 | id := New(255) 277 | 278 | for _, c := range []struct { 279 | name string 280 | in interface{} 281 | out ID 282 | err error 283 | errMsg string 284 | }{ 285 | {"nil", nil, ID{}, nil, ""}, 286 | {"bytes-valid", id[:], id, nil, ""}, 287 | {"bytes-invalid", make([]byte, 3), zero, &InvalidDataSizeError{Size: 3}, errInvalidDataSizeMsg}, 288 | {"bytes-zero", []byte{}, zero, nil, ""}, 289 | {"string-valid", id.String(), id, nil, ""}, 290 | {"string-invalid", "123", zero, &InvalidDataSizeError{Size: 3}, errInvalidDataSizeMsg}, 291 | {"string-zero", "", zero, nil, ""}, 292 | {"invalid", 69, ID{}, &InvalidTypeError{Value: 69}, fmt.Sprintf(errInvalidTypeFmt, 69)}, 293 | } { 294 | c := c 295 | t.Run(c.name, func(t *testing.T) { 296 | t.Parallel() 297 | 298 | var out ID 299 | err := out.Scan(c.in) 300 | 301 | if actual, expected := out, c.out; actual != expected { 302 | t.Errorf("expected [%s], got [%s]", expected, actual) 303 | } 304 | 305 | if err != nil && c.err == nil { 306 | t.Errorf("got unexpected error: %s", err) 307 | } else if actual, expected := reflect.TypeOf(err), reflect.TypeOf(c.err); actual != expected { 308 | t.Errorf("expected error type [%s], got [%s]", expected, actual) 309 | } else if err != nil && c.errMsg != "" && err.Error() != c.errMsg { 310 | t.Errorf("expected error message [%s], got [%s]", c.errMsg, err.Error()) 311 | } 312 | }) 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /generator.go: -------------------------------------------------------------------------------- 1 | // Package sno provides fast generators of compact, sortable, unique IDs with embedded metadata. 2 | package sno 3 | 4 | import ( 5 | "encoding/binary" 6 | "sync" 7 | "sync/atomic" 8 | "time" 9 | ) 10 | 11 | // GeneratorSnapshot represents the bookkeeping data of a Generator at some point in time. 12 | // 13 | // Snapshots serve both as configuration and a means of restoring generators across restarts, 14 | // to ensure newly generated IDs don't overwrite IDs generated before going offline. 15 | type GeneratorSnapshot struct { 16 | // The Partition the generator is scoped to. A zero value ({0, 0}) is valid and will be used. 17 | Partition Partition `json:"partition"` 18 | 19 | // Sequence pool bounds (inclusive). Can be given in either order - lower value will become lower bound. 20 | // When SequenceMax is 0 and SequenceMin != 65535, SequenceMax will be set to 65535. 21 | SequenceMin uint16 `json:"sequenceMin"` 22 | SequenceMax uint16 `json:"sequenceMax"` 23 | 24 | // Current sequence number. When 0, it will be set to SequenceMin. May overflow SequenceMax, 25 | // but not underflow SequenceMin. 26 | Sequence uint32 `json:"sequence"` 27 | 28 | Now int64 `json:"now"` // Wall time the snapshot was taken at in sno time units and in our epoch. 29 | WallHi int64 `json:"wallHi"` // 30 | WallSafe int64 `json:"wallSafe"` // 31 | Drifts uint32 `json:"drifts"` // Count of wall clock regressions the generator tick-tocked at. 32 | } 33 | 34 | // SequenceOverflowNotification contains information pertaining to the current state of a Generator 35 | // while it is overflowing. 36 | type SequenceOverflowNotification struct { 37 | Now time.Time // Time of tick. 38 | Count uint32 // Number of currently overflowing generation calls. 39 | Ticks uint32 // Total count of ticks while dealing with the *current* overflow. 40 | } 41 | 42 | // Generator is responsible for generating new IDs scoped to a given fixed Partition and 43 | // managing their sequence. 44 | // 45 | // A Generator must be constructed using NewGenerator - the zero value of a Generator is 46 | // an unusable state. 47 | // 48 | // A Generator must not be copied after first use. 49 | type Generator struct { 50 | partition uint32 // Immutable. 51 | 52 | drifts uint32 // Uses the LSB for the tick-tock and serves as a counter. 53 | wallHi uint64 // Atomic. 54 | wallSafe uint64 // Atomic. 55 | regression sync.Mutex // Regression branch lock. 56 | 57 | seq uint32 // Atomic. 58 | seqMin uint32 // Immutable. 59 | seqMax uint32 // Immutable. 60 | seqStatic uint32 // Atomic. See NewWithTime. Not included in snapshots (does not get restored). 61 | 62 | seqOverflowCond *sync.Cond 63 | seqOverflowTicker *time.Ticker 64 | seqOverflowCount uint32 // Behind seqOverflowCond lock. 65 | seqOverflowChan chan<- *SequenceOverflowNotification 66 | } 67 | 68 | // NewGenerator returns a new generator based on the optional Snapshot. 69 | func NewGenerator(snapshot *GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) { 70 | if snapshot != nil { 71 | return newGeneratorFromSnapshot(*snapshot, c) 72 | } 73 | 74 | return newGeneratorFromDefaults(c) 75 | } 76 | 77 | func newGeneratorFromSnapshot(snapshot GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) { 78 | if err := sanitizeSnapshotBounds(&snapshot); err != nil { 79 | return nil, err 80 | } 81 | 82 | return &Generator{ 83 | partition: partitionToInternalRepr(snapshot.Partition), 84 | seq: snapshot.Sequence, 85 | seqMin: uint32(snapshot.SequenceMin), 86 | seqMax: uint32(snapshot.SequenceMax), 87 | seqStatic: uint32(snapshot.SequenceMin - 1), // Offset by -1 since NewWithTime starts this with an incr. 88 | seqOverflowCond: sync.NewCond(&sync.Mutex{}), 89 | seqOverflowChan: c, 90 | drifts: snapshot.Drifts, 91 | wallHi: uint64(snapshot.WallHi), 92 | wallSafe: uint64(snapshot.WallSafe), 93 | }, nil 94 | } 95 | 96 | func newGeneratorFromDefaults(c chan<- *SequenceOverflowNotification) (*Generator, error) { 97 | // Realistically safe, but has an edge case resulting in PartitionPoolExhaustedError. 98 | partition, err := genPartition() 99 | if err != nil { 100 | return nil, err 101 | } 102 | 103 | return &Generator{ 104 | partition: partition, 105 | seqMax: MaxSequence, 106 | seqStatic: ^uint32(0), // Offset by -1 since NewWithTime starts this with an incr. 107 | seqOverflowCond: sync.NewCond(&sync.Mutex{}), 108 | seqOverflowChan: c, 109 | }, nil 110 | } 111 | 112 | // New generates a new ID using the current system time for its timestamp. 113 | func (g *Generator) New(meta byte) (id ID) { 114 | retry: 115 | var ( 116 | // Note: Single load of wallHi for the evaluations is correct (as we only grab wallNow 117 | // once as well). 118 | wallHi = atomic.LoadUint64(&g.wallHi) 119 | wallNow = snotime() 120 | ) 121 | 122 | // Fastest branch if we're still within the most recent time unit. 123 | if wallNow == wallHi { 124 | seq := atomic.AddUint32(&g.seq, 1) 125 | 126 | if g.seqMax >= seq { 127 | g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1) 128 | g.applyPayload(&id, meta, seq) 129 | 130 | return 131 | } 132 | 133 | // This is to be considered an edge case if seqMax actually gets exceeded, but since bounds 134 | // can be set arbitrarily, in a small pool (or in stress tests) this can happen. 135 | // We don't *really* handle this gracefully - we currently clog up and wait until the sequence 136 | // gets reset by a time change *hoping* we'll finally get our turn. If requests to generate 137 | // don't decrease enough, eventually this will starve out resources. 138 | // 139 | // The reason we don't simply plug the broadcast into the time progression branch is precisely 140 | // because that one is going to be the most common branch for many uses realistically (1 or 0 ID per 4msec) 141 | // while this one is for scales on another level. At the same time if we *ever* hit this case, we need 142 | // a periodic flush anyways, because even a single threaded process can easily exhaust the max default 143 | // sequence pool, let alone a smaller one, meaning it could potentially deadlock if all routines get 144 | // locked in on a sequence overflow and no new routine comes to their rescue at a higher time to reset 145 | // the sequence and notify them. 146 | g.seqOverflowCond.L.Lock() 147 | g.seqOverflowCount++ 148 | 149 | if g.seqOverflowTicker == nil { 150 | // Tick *roughly* each 1ms during overflows. 151 | g.seqOverflowTicker = time.NewTicker(TimeUnit / 4) 152 | go g.seqOverflowLoop() 153 | } 154 | 155 | for atomic.LoadUint32(&g.seq) > g.seqMax { 156 | // We spin pessimistically here instead of a straight lock -> wait -> unlock because that'd 157 | // put us back on the New(). At extreme contention we could end up back here anyways. 158 | g.seqOverflowCond.Wait() 159 | } 160 | 161 | g.seqOverflowCount-- 162 | g.seqOverflowCond.L.Unlock() 163 | 164 | goto retry 165 | } 166 | 167 | // Time progression branch. 168 | if wallNow > wallHi && atomic.CompareAndSwapUint64(&g.wallHi, wallHi, wallNow) { 169 | atomic.StoreUint32(&g.seq, g.seqMin) 170 | 171 | g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1) 172 | g.applyPayload(&id, meta, g.seqMin) 173 | 174 | return 175 | } 176 | 177 | // Time regression branch. 178 | g.regression.Lock() 179 | 180 | // Check-again. It's possible that another thread applied the drift while we were spinning (if we were). 181 | if wallHi = atomic.LoadUint64(&g.wallHi); wallNow >= wallHi { 182 | g.regression.Unlock() 183 | 184 | goto retry 185 | } 186 | 187 | if wallNow > g.wallSafe { 188 | // Branch for the one routine that gets to apply the drift. 189 | // wallHi is bidirectional (gets updated whenever the wall clock time progresses - or when a drift 190 | // gets applied, which is when it regresses). In contrast, wallSafe only ever gets updated when 191 | // a drift gets applied and always gets set to the highest time recorded, meaning it 192 | // increases monotonically. 193 | atomic.StoreUint64(&g.wallSafe, wallHi) 194 | atomic.StoreUint64(&g.wallHi, wallNow) 195 | atomic.StoreUint32(&g.seq, g.seqMin) 196 | 197 | g.applyTimestamp(&id, wallNow, atomic.AddUint32(&g.drifts, 1)&1) 198 | g.applyPayload(&id, meta, g.seqMin) 199 | 200 | g.regression.Unlock() 201 | 202 | return 203 | } 204 | 205 | // Branch for all routines that are in an "unsafe" past (e.g. multiple time regressions happened 206 | // before we reached wallSafe again). 207 | g.regression.Unlock() 208 | 209 | time.Sleep(time.Duration(g.wallSafe - wallNow)) 210 | 211 | goto retry 212 | } 213 | 214 | // NewWithTime generates a new ID using the given time for the timestamp. 215 | // 216 | // IDs generated with user-specified timestamps are exempt from the tick-tock mechanism and 217 | // use a sequence separate from New() - one that is independent from time, as time provided to 218 | // this method can be arbitrary. The sequence increases strictly monotonically up to hitting 219 | // the generator's SequenceMax, after which it rolls over silently back to SequenceMin. 220 | // 221 | // That means bounds are respected, but unlike New(), NewWithTime() will not block the caller 222 | // when the (separate) sequence rolls over as the Generator would be unable to determine when 223 | // to resume processing within the constraints of this method. 224 | // 225 | // Managing potential collisions due to the arbitrary time is left to the user. 226 | // 227 | // This utility is primarily meant to enable porting of old IDs to sno and assumed to be ran 228 | // before an ID scheme goes online. 229 | func (g *Generator) NewWithTime(meta byte, t time.Time) (id ID) { 230 | retry: 231 | var seq = atomic.AddUint32(&g.seqStatic, 1) 232 | 233 | if seq > g.seqMax { 234 | if !atomic.CompareAndSwapUint32(&g.seqStatic, seq, g.seqMin) { 235 | goto retry 236 | } 237 | 238 | seq = g.seqMin 239 | } 240 | 241 | g.applyTimestamp(&id, uint64(t.UnixNano()-epochNsec)/TimeUnit, 0) 242 | g.applyPayload(&id, meta, seq) 243 | 244 | return 245 | } 246 | 247 | // Partition returns the fixed identifier of the Generator. 248 | func (g *Generator) Partition() Partition { 249 | return partitionToPublicRepr(g.partition) 250 | } 251 | 252 | // Sequence returns the current sequence the Generator is at. 253 | // 254 | // This does *not* mean that if one were to call New() right now, the generated ID 255 | // will necessarily get this sequence, as other things may happen before. 256 | // 257 | // If the next call to New() would result in a reset of the sequence, SequenceMin 258 | // is returned instead of the current internal sequence. 259 | // 260 | // If the generator is currently overflowing, the sequence returned will be higher than 261 | // the generator's SequenceMax (thus a uint32 return type), meaning it can be used to 262 | // determine the current overflow via: 263 | // overflow := int(uint32(generator.SequenceMax()) - generator.Sequence()) 264 | func (g *Generator) Sequence() uint32 { 265 | if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) { 266 | return atomic.LoadUint32(&g.seq) 267 | } 268 | 269 | return g.seqMin 270 | } 271 | 272 | // SequenceMin returns the lower bound of the sequence pool of this generator. 273 | func (g *Generator) SequenceMin() uint16 { 274 | return uint16(g.seqMin) 275 | } 276 | 277 | // SequenceMax returns the upper bound of the sequence pool of this generator. 278 | func (g *Generator) SequenceMax() uint16 { 279 | return uint16(g.seqMax) 280 | } 281 | 282 | // Len returns the number of IDs generated in the current timeframe. 283 | func (g *Generator) Len() int { 284 | if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) { 285 | if seq := atomic.LoadUint32(&g.seq); g.seqMax > seq { 286 | return int(seq-g.seqMin) + 1 287 | } 288 | 289 | return g.Cap() 290 | } 291 | 292 | return 0 293 | } 294 | 295 | // Cap returns the total capacity of the Generator. 296 | // 297 | // To get its current capacity (e.g. number of possible additional IDs in the current 298 | // timeframe), simply: 299 | // spare := generator.Cap() - generator.Len() 300 | // The result will always be non-negative. 301 | func (g *Generator) Cap() int { 302 | return int(g.seqMax-g.seqMin) + 1 303 | } 304 | 305 | // Snapshot returns a copy of the Generator's current bookkeeping data. 306 | func (g *Generator) Snapshot() GeneratorSnapshot { 307 | var ( 308 | wallNow = snotime() 309 | wallHi = atomic.LoadUint64(&g.wallHi) 310 | seq uint32 311 | ) 312 | 313 | // Be consistent with g.Sequence() and return seqMin if the next call to New() 314 | // would reset the sequence. 315 | if wallNow == wallHi { 316 | seq = atomic.LoadUint32(&g.seq) 317 | } else { 318 | seq = g.seqMin 319 | } 320 | 321 | return GeneratorSnapshot{ 322 | Partition: partitionToPublicRepr(g.partition), 323 | SequenceMin: uint16(g.seqMin), 324 | SequenceMax: uint16(g.seqMax), 325 | Sequence: seq, 326 | Now: int64(wallNow), 327 | WallHi: int64(wallHi), 328 | WallSafe: int64(atomic.LoadUint64(&g.wallSafe)), 329 | Drifts: atomic.LoadUint32(&g.drifts), 330 | } 331 | } 332 | 333 | func (g *Generator) applyTimestamp(id *ID, units uint64, tick uint32) { 334 | // Equivalent to... 335 | // 336 | // id[0] = byte(units >> 31) 337 | // id[1] = byte(units >> 23) 338 | // id[2] = byte(units >> 15) 339 | // id[3] = byte(units >> 7) 340 | // id[4] = byte(units << 1) | byte(tick) 341 | // 342 | // ... and slightly wasteful as we're storing 3 bytes that will get overwritten 343 | // via applyPartition but unlike the code above, the calls to binary.BigEndian.PutUintXX() 344 | // are compiler assisted and boil down to essentially a load + shift + bswap (+ a nop due 345 | // to midstack inlining), which we prefer over the roughly 16 instructions otherwise. 346 | // If applyTimestamp() was implemented straight in assembly, we'd not get it inline. 347 | binary.BigEndian.PutUint64(id[:], units<<25|uint64(tick)<<24) 348 | } 349 | 350 | func (g *Generator) applyPayload(id *ID, meta byte, seq uint32) { 351 | id[5] = meta 352 | binary.BigEndian.PutUint32(id[6:], g.partition|seq) 353 | } 354 | 355 | func (g *Generator) seqOverflowLoop() { 356 | var ( 357 | retryNotify bool 358 | ticks uint32 359 | ) 360 | 361 | for t := range g.seqOverflowTicker.C { 362 | g.seqOverflowCond.L.Lock() 363 | 364 | if g.seqOverflowChan != nil { 365 | // We only ever count ticks when we've got a notification channel up. 366 | // Even if we're at a count of 0 but on our first tick, it means the generator declogged already, 367 | // but we still notify that it happened. 368 | ticks++ 369 | if retryNotify || g.seqOverflowCount == 0 || ticks%4 == 1 { 370 | select { 371 | case g.seqOverflowChan <- &SequenceOverflowNotification{ 372 | Now: t, 373 | Ticks: ticks, 374 | Count: g.seqOverflowCount, 375 | }: 376 | retryNotify = false 377 | 378 | default: 379 | // Simply drop the message for now but try again the next tick already 380 | // instead of waiting for the full interval. 381 | retryNotify = true 382 | } 383 | } 384 | } 385 | 386 | if g.seqOverflowCount == 0 { 387 | g.seqOverflowTicker.Stop() 388 | g.seqOverflowTicker = nil 389 | g.seqOverflowCond.L.Unlock() 390 | 391 | return 392 | } 393 | 394 | // At this point we can unlock already because we don't touch any shared data anymore. 395 | // The broadcasts further don't require us to hold the lock. 396 | g.seqOverflowCond.L.Unlock() 397 | 398 | // Under normal behaviour high load would trigger an overflow and load would remain roughly 399 | // steady, so a seq reset will simply get triggered by a time change happening in New(). 400 | // The actual callers are in a pessimistic loop and will check the condition themselves again. 401 | if g.seqMax >= atomic.LoadUint32(&g.seq) { 402 | g.seqOverflowCond.Broadcast() 403 | 404 | continue 405 | } 406 | 407 | // Handles an edge case where we've got calls locked on an overflow and suddenly no more 408 | // calls to New() come in, meaning there's no one to actually reset the sequence. 409 | var ( 410 | wallNow = uint64(t.UnixNano()-epochNsec) / TimeUnit 411 | wallHi = atomic.LoadUint64(&g.wallHi) 412 | ) 413 | 414 | if wallNow > wallHi { 415 | atomic.StoreUint32(&g.seq, g.seqMin) 416 | g.seqOverflowCond.Broadcast() 417 | 418 | continue // Left for readability of flow. 419 | } 420 | } 421 | } 422 | 423 | // Arbitrary min pool size of 4 per time unit (that is 1000 per sec). 424 | // Separated out as a constant as this value is being tested against. 425 | const minSequencePoolSize = 4 426 | 427 | func sanitizeSnapshotBounds(s *GeneratorSnapshot) error { 428 | // Zero value of SequenceMax will pass as the default max if and only if SequenceMin is not already 429 | // default max (as the range can be defined in either order). 430 | if s.SequenceMax == 0 && s.SequenceMin != MaxSequence { 431 | s.SequenceMax = MaxSequence 432 | } 433 | 434 | if s.SequenceMin == s.SequenceMax { 435 | return invalidSequenceBounds(s, errSequenceBoundsIdenticalMsg) 436 | } 437 | 438 | // Allow bounds to be given in any order. 439 | if s.SequenceMax < s.SequenceMin { 440 | s.SequenceMin, s.SequenceMax = s.SequenceMax, s.SequenceMin 441 | } 442 | 443 | if s.SequenceMax-s.SequenceMin-1 < minSequencePoolSize { 444 | return invalidSequenceBounds(s, errSequencePoolTooSmallMsg) 445 | } 446 | 447 | // Allow zero value to pass as a default of the lower bound. 448 | if s.Sequence == 0 { 449 | s.Sequence = uint32(s.SequenceMin) 450 | } 451 | 452 | if s.Sequence < uint32(s.SequenceMin) { 453 | return invalidSequenceBounds(s, errSequenceUnderflowsBound) 454 | } 455 | 456 | return nil 457 | } 458 | 459 | func invalidSequenceBounds(s *GeneratorSnapshot, msg string) *InvalidSequenceBoundsError { 460 | return &InvalidSequenceBoundsError{ 461 | Cur: s.Sequence, 462 | Min: s.SequenceMin, 463 | Max: s.SequenceMax, 464 | Msg: msg, 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /generator_test.go: -------------------------------------------------------------------------------- 1 | // +build test 2 | 3 | package sno 4 | 5 | import ( 6 | "fmt" 7 | "sync" 8 | "sync/atomic" 9 | "testing" 10 | "time" 11 | _ "unsafe" 12 | 13 | "github.com/muyo/sno/internal" 14 | ) 15 | 16 | // snotime is the actual time source used by Generators during tests. 17 | // 18 | // We split on build tags ("test") to swap out the snotime() implementations provided by platform specific 19 | // code so that tests can use mocked time sources without in any way impacting a Generator's runtime performance 20 | // in production builds. 21 | // 22 | // Note: Attempting to run the test suite without the "test" build tag will fail, resulting in several 23 | // compilation errors. 24 | var snotime = internal.Snotime 25 | 26 | // monotime provides real monotonic clock readings to several tests. 27 | //go:linkname monotime runtime.nanotime 28 | func monotime() int64 29 | 30 | // staticTime provides tests with a fake time source which returns a fixed time on each call. 31 | // The time returned can be changed by directly (atomically) mutating the underlying variable. 32 | func staticTime() uint64 { 33 | return atomic.LoadUint64(staticWallNow) 34 | } 35 | 36 | // staticIncTime provides tests with a fake time source which returns a time based on a fixed time 37 | // monotonically increasing by 1 TimeUnit on each call. 38 | func staticIncTime() uint64 { 39 | wall := atomic.LoadUint64(staticWallNow) + atomic.LoadUint64(staticInc)*TimeUnit 40 | 41 | atomic.AddUint64(staticInc, 1) 42 | 43 | return wall 44 | } 45 | 46 | var ( 47 | staticInc = new(uint64) 48 | staticWallNow = func() *uint64 { 49 | wall := snotime() 50 | return &wall 51 | }() 52 | ) 53 | 54 | func TestGenerator_NewNoOverflow(t *testing.T) { 55 | var ( 56 | part = Partition{255, 255} 57 | seqPool = uint16(MaxSequence / 2) 58 | seqMin = seqPool 59 | seqMax = 2*seqPool - 1 60 | 61 | // Scaled to not exceed bounds, otherwise we run into the seqOverflow race and order - which we 62 | // test for in here - becomes non-deterministic. 63 | sampleSize = int(seqPool) 64 | g, err = NewGenerator(&GeneratorSnapshot{ 65 | Partition: part, 66 | SequenceMin: seqMin, 67 | SequenceMax: seqMax, 68 | }, nil) 69 | ) 70 | 71 | if err != nil { 72 | t.Fatal(err) 73 | } 74 | 75 | ids := make([]ID, sampleSize) 76 | for i := 0; i < sampleSize; i++ { 77 | ids[i] = g.New(byte(i)) 78 | } 79 | 80 | for i := 1; i < sampleSize; i++ { 81 | curID, prevID := ids[i], ids[i-1] 82 | 83 | seq := ids[i].Sequence() 84 | if seq > seqMax { 85 | t.Errorf("%d: sequence overflowing max boundary; max [%d], got [%d]", i, seqMin, seq) 86 | } 87 | 88 | if seq < seqMin { 89 | t.Errorf("%d: sequence underflowing min boundary; min [%d], got [%d]", i, seqMin, seq) 90 | } 91 | 92 | // We're expecting the time to increment and never more than by one time unit, since 93 | // we generated them in sequence. 94 | timeDiff := curID.Timestamp() - prevID.Timestamp() 95 | 96 | // Check if drift got applied in this edge case. 97 | if timeDiff < 0 && curID[4]&1 == 0 { 98 | t.Error("timestamp of next ID lower than previous and no tick-tock applied") 99 | } 100 | 101 | if timeDiff > TimeUnit { 102 | t.Error("timestamp diff between IDs is higher than by one time unit") 103 | } 104 | 105 | if prevID.Partition() != part { 106 | t.Errorf("%d: partition differs from generator's partition; expected [%d], got [%d]", i, part, prevID.Partition()) 107 | } 108 | } 109 | } 110 | 111 | func TestGenerator_NewOverflows(t *testing.T) { 112 | var ( 113 | part = Partition{255, 255} 114 | seqPool = 512 115 | seqOverflows = 16 116 | seqMin = uint16(seqPool) 117 | seqMax = uint16(2*seqPool - 1) 118 | sampleSize = seqPool * seqOverflows 119 | 120 | c = make(chan *SequenceOverflowNotification) 121 | cc = make(chan struct{}) 122 | notesHi = new(int64) 123 | 124 | g, err = NewGenerator(&GeneratorSnapshot{ 125 | Partition: part, 126 | SequenceMin: seqMin, 127 | SequenceMax: seqMax, 128 | }, c) 129 | ) 130 | 131 | if err != nil { 132 | t.Fatal(err) 133 | } 134 | 135 | go func() { 136 | for { 137 | select { 138 | case note := <-c: 139 | if note.Count > 0 { 140 | atomic.AddInt64(notesHi, 1) 141 | } 142 | case <-cc: 143 | return 144 | } 145 | } 146 | }() 147 | 148 | ids := make([]ID, sampleSize) 149 | for i := 0; i < sampleSize; i++ { 150 | ids[i] = g.New(byte(i)) 151 | } 152 | 153 | close(cc) 154 | 155 | // TODO(alcore) The non-blocking writes are far from reliable. The notifications need a rework with 156 | // deep profiling. 157 | if atomic.LoadInt64(notesHi) < int64(seqOverflows)/4 { 158 | t.Errorf("expected at least [%d] overflow notification, got [%d]", seqOverflows/4, atomic.LoadInt64(notesHi)) 159 | } 160 | 161 | timeDist := make(map[int64]int) 162 | 163 | for i := 0; i < sampleSize; i++ { 164 | id := ids[i] 165 | timeDist[id.Timestamp()]++ 166 | 167 | seq := id.Sequence() 168 | if seq > seqMax { 169 | t.Errorf("%d: sequence overflowing max boundary; max [%d], got [%d]", i, seqMin, seq) 170 | } 171 | 172 | if seq < seqMin { 173 | t.Errorf("%d: sequence underflowing min boundary; min [%d], got [%d]", i, seqMin, seq) 174 | } 175 | 176 | if id.Partition() != part { 177 | t.Errorf("%d: partition differs from generator's partition; expected [%d], got [%d]", i, part, id.Partition()) 178 | } 179 | } 180 | 181 | for tf, c := range timeDist { 182 | if c > seqPool { 183 | t.Errorf("count of IDs in the given timeframe exceeds pool; timestamp [%d], pool [%d], count [%d]", tf, seqPool, c) 184 | } 185 | } 186 | } 187 | 188 | func TestGenerator_NewTickTocks(t *testing.T) { 189 | g, ids := testGeneratorNewTickTocksSetup(t) 190 | t.Run("Tick", testGeneratorNewTickTocksTick(g, ids)) 191 | t.Run("SafetySlumber", testGeneratorNewTickTocksSafetySlumber(g, ids)) 192 | t.Run("Tock", testGeneratorNewTickTocksTock(g, ids)) 193 | t.Run("Race", testGeneratorNewTickTocksRace(g, ids)) 194 | } 195 | 196 | func testGeneratorNewTickTocksSetup(t *testing.T) (*Generator, []ID) { 197 | var ( 198 | seqPool = 8096 199 | g, err = NewGenerator(&GeneratorSnapshot{ 200 | Partition: Partition{255, 255}, 201 | SequenceMin: uint16(seqPool), 202 | SequenceMax: uint16(2*seqPool - 1), 203 | }, nil) 204 | ) 205 | if err != nil { 206 | t.Fatal(err) 207 | } 208 | 209 | return g, make([]ID, g.Cap()) 210 | } 211 | 212 | func testGeneratorNewTickTocksTick(g *Generator, ids []ID) func(*testing.T) { 213 | return func(t *testing.T) { 214 | // First batch follows normal time progression. 215 | for i := 0; i < 512; i++ { 216 | ids[i] = g.New(255) 217 | } 218 | 219 | wall := snotime() 220 | atomic.StoreUint64(staticWallNow, wall-TimeUnit) 221 | 222 | // Swap out the time source. Next batch is supposed to set a drift, have their tick-tock bit 223 | // set to 1, and wallSafe on the generator must be set accordingly. 224 | snotime = staticTime 225 | 226 | if atomic.LoadUint32(&g.drifts) != 0 { 227 | t.Errorf("expected [0] drifts recorded, got [%d]", atomic.LoadUint32(&g.drifts)) 228 | } 229 | 230 | if atomic.LoadUint64(&g.wallSafe) != 0 { 231 | t.Errorf("expected wallSafe to be [0], is [%d]", atomic.LoadUint64(&g.wallSafe)) 232 | } 233 | 234 | for j := 512; j < 1024; j++ { 235 | ids[j] = g.New(255) 236 | } 237 | 238 | if atomic.LoadUint32(&g.drifts) != 1 { 239 | t.Errorf("expected [1] drift recorded, got [%d]", atomic.LoadUint32(&g.drifts)) 240 | } 241 | 242 | if atomic.LoadUint64(&g.wallSafe) == atomic.LoadUint64(staticWallNow) { 243 | t.Errorf("expected wallSafe to be [%d], was [%d]", atomic.LoadUint64(staticWallNow), atomic.LoadUint64(&g.wallSafe)) 244 | } 245 | 246 | for i := 0; i < 512; i++ { 247 | if ids[i][4]&1 != 0 { 248 | t.Errorf("%d: expected tick-tock bit to not be set, was set", i) 249 | } 250 | } 251 | 252 | for j := 512; j < 1024; j++ { 253 | if ids[j][4]&1 != 1 { 254 | t.Errorf("%d: expected tick-tock bit to be set, was not", j) 255 | } 256 | } 257 | 258 | snotime = internal.Snotime 259 | } 260 | } 261 | 262 | func testGeneratorNewTickTocksSafetySlumber(g *Generator, ids []ID) func(*testing.T) { 263 | return func(t *testing.T) { 264 | // Multi-regression, checking on a single goroutine. 265 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1)) 266 | 267 | // Use a clock where the first call will return the static clock times 268 | // but subsequent calls will return higher times. Since we didn't adjust the mono clock 269 | // at all insofar, it's currently 1 TimeUnit (first drift) behind wallSafe, which got set 270 | // during the initial drift. This is the time the next generation call(s) are supposed 271 | // to sleep, as we are simulating a multi-regression (into an unsafe past where can't 272 | // tick-tock again until reaching wallSafe). 273 | snotime = staticIncTime 274 | 275 | mono1 := monotime() 276 | id := g.New(255) 277 | if id[4]&1 != 1 { 278 | t.Errorf("expected tick-tock bit to be set, was not") 279 | } 280 | mono2 := monotime() 281 | 282 | monoDiff := mono2 - mono1 283 | 284 | // We had 2 regressions by 1 TimeUnit each, so sleep duration should've been roughly 285 | // the same since time was static (got incremented only after the sleep). 286 | if monoDiff < 2*TimeUnit { 287 | t.Errorf("expected to sleep for at least [%f]ns, took [%d] instead", 2*TimeUnit, monoDiff) 288 | } else if monoDiff > 5*TimeUnit { 289 | t.Errorf("expected to sleep for no more than [%f]ns, took [%d] instead", 5*TimeUnit, monoDiff) 290 | } 291 | 292 | if atomic.LoadUint32(&g.drifts) != 1 { 293 | t.Errorf("expected [1] drift recorded, got [%d]", atomic.LoadUint32(&g.drifts)) 294 | } 295 | 296 | snotime = internal.Snotime 297 | } 298 | } 299 | 300 | func testGeneratorNewTickTocksTock(g *Generator, ids []ID) func(*testing.T) { 301 | return func(t *testing.T) { 302 | // At this point we are going to simulate another drift, somewhere in the 'far' future, 303 | // with parallel load. 304 | snotime = staticTime 305 | atomic.AddUint64(staticWallNow, 100*TimeUnit) 306 | 307 | g.New(255) // Updates wallHi 308 | 309 | // Regress again. Not adjusting mono clock - calls below are supposed to simply drift - drift 310 | // count is supposed to end at 2 (since we're still using the same generator) and tick-tock 311 | // bit is supposed to be unset. 312 | atomic.AddUint64(staticWallNow, ^uint64(2*TimeUnit-1)) 313 | 314 | var ( 315 | batchCount = 4 316 | batchSize = g.Cap() / batchCount 317 | wg sync.WaitGroup 318 | ) 319 | 320 | wg.Add(batchCount) 321 | 322 | for i := 0; i < batchCount; i++ { 323 | go func(mul int) { 324 | for i := mul * batchSize; i < mul*batchSize+batchSize; i++ { 325 | ids[i] = g.New(255) 326 | } 327 | wg.Done() 328 | }(i) 329 | } 330 | 331 | wg.Wait() 332 | 333 | if atomic.LoadUint32(&g.drifts) != 2 { 334 | t.Errorf("expected [2] drifts recorded, got [%d]", atomic.LoadUint32(&g.drifts)) 335 | } 336 | 337 | for i := 0; i < g.Cap(); i++ { 338 | if ids[i][4]&1 != 0 { 339 | t.Errorf("%d: expected tick-tock bit to not be set, was set", i) 340 | } 341 | } 342 | 343 | snotime = internal.Snotime 344 | } 345 | } 346 | 347 | func testGeneratorNewTickTocksRace(g *Generator, ids []ID) func(*testing.T) { 348 | return func(*testing.T) { 349 | snotime = staticTime 350 | 351 | atomic.AddUint64(staticWallNow, 100*TimeUnit) 352 | g.New(255) 353 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1)) 354 | 355 | var ( 356 | wgOuter sync.WaitGroup 357 | wgInner sync.WaitGroup 358 | ) 359 | wgOuter.Add(1000) 360 | 361 | wgInner.Add(1000) 362 | for i := 0; i < 1000; i++ { 363 | go func() { 364 | wgInner.Done() 365 | wgInner.Wait() 366 | for i := 0; i < 2; i++ { 367 | _ = g.New(byte(i)) 368 | } 369 | wgOuter.Done() 370 | }() 371 | } 372 | wgOuter.Wait() 373 | 374 | snotime = internal.Snotime 375 | } 376 | } 377 | 378 | func TestGenerator_NewGeneratorRestoreRegressions(t *testing.T) { 379 | // First one we simply check that the times get applied at all. We get rid of the time 380 | // added while simulating the last drift. 381 | g, err := NewGenerator(nil, nil) 382 | if err != nil { 383 | t.Fatal(err) 384 | } 385 | 386 | // Reset the static clock. 387 | wall := snotime() 388 | snotime = staticTime 389 | atomic.StoreUint64(staticWallNow, wall) 390 | 391 | // Simulate a regression. 392 | g.New(255) 393 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1)) 394 | g.New(255) 395 | 396 | snapshot := g.Snapshot() 397 | 398 | g, err = NewGenerator(&snapshot, nil) 399 | if err != nil { 400 | t.Fatal(err) 401 | } 402 | 403 | if uint64(snapshot.WallSafe) != atomic.LoadUint64(&g.wallSafe) { 404 | t.Errorf("expected [%d], got [%d]", snapshot.WallSafe, atomic.LoadUint64(&g.wallSafe)) 405 | } 406 | 407 | if uint64(snapshot.WallHi) != atomic.LoadUint64(&g.wallHi) { 408 | t.Errorf("expected [%d], got [%d]", snapshot.WallHi, atomic.LoadUint64(&g.wallHi)) 409 | } 410 | 411 | // Second test, with a snapshot taken "in the future" (relative to current wall clock time). 412 | wall = internal.Snotime() 413 | atomic.StoreUint64(staticWallNow, wall+100*TimeUnit) 414 | 415 | // Simulate another regression. Takes place in the future - we are going to take a snapshot 416 | // and create a generator using that snapshot, where the generator will use snotime (current time) 417 | // as comparison and is supposed to handle this as if it is in the past relative to the snapshot. 418 | g.New(255) 419 | atomic.AddUint64(staticWallNow, ^uint64(TimeUnit-1)) 420 | g.New(255) 421 | 422 | snotime = internal.Snotime 423 | 424 | snapshot = g.Snapshot() 425 | 426 | g, err = NewGenerator(&snapshot, nil) 427 | if err != nil { 428 | t.Fatal(err) 429 | } 430 | 431 | if uint64(snapshot.WallSafe) != atomic.LoadUint64(&g.wallSafe) { 432 | t.Errorf("expected [%d], got [%d]", snapshot.WallSafe, atomic.LoadUint64(&g.wallSafe)) 433 | } 434 | 435 | if wall > atomic.LoadUint64(&g.wallHi) { 436 | t.Errorf("expected smaller than [%d], got [%d]", wall, atomic.LoadUint64(&g.wallHi)) 437 | } 438 | } 439 | 440 | func TestGenerator_NewWithTimeOverflows(t *testing.T) { 441 | var ( 442 | part = Partition{255, 255} 443 | seqPool = 12 444 | seqOverflows = 4 445 | seqMin = uint16(seqPool) 446 | seqMax = uint16(2*seqPool - 1) 447 | sampleSize = seqPool * seqOverflows 448 | 449 | g, err = NewGenerator(&GeneratorSnapshot{ 450 | Partition: part, 451 | SequenceMin: seqMin, 452 | SequenceMax: seqMax, 453 | }, nil) 454 | ) 455 | 456 | if err != nil { 457 | t.Fatal(err) 458 | } 459 | 460 | tn := time.Now() 461 | pool := g.Cap() 462 | 463 | ids := make([]ID, sampleSize) 464 | for i := 0; i < sampleSize; i++ { 465 | ids[i] = g.NewWithTime(byte(i), tn) 466 | } 467 | 468 | timeDist := make(map[int64]int) 469 | 470 | for i, s := 0, 0; i < sampleSize; i, s = i+1, s+1 { 471 | id := ids[i] 472 | timeDist[id.Timestamp()]++ 473 | 474 | seq := id.Sequence() 475 | if seq > seqMax { 476 | t.Errorf("%d: sequence overflowing max boundary; max [%d], got [%d]", i, seqMin, seq) 477 | } 478 | 479 | if seq < seqMin { 480 | t.Errorf("%d: sequence underflowing min boundary; min [%d], got [%d]", i, seqMin, seq) 481 | } 482 | 483 | // When we overflow with NewWithTime, the static sequence is supposed to roll over silently. 484 | if s == pool { 485 | s = 0 486 | } else if i > 0 && seq-ids[i-1].Sequence() != 1 { 487 | t.Errorf("%d: expected sequence to increment by 1, got [%d]", i, seq-ids[i-1].Sequence()) 488 | } 489 | 490 | expectedSeq := uint16(s) + seqMin 491 | if seq != expectedSeq { 492 | t.Errorf("%d: expected sequence [%d], got [%d]", i, expectedSeq, seq) 493 | } 494 | 495 | if id.Partition() != part { 496 | t.Errorf("%d: partition differs from generator's partition; expected [%d], got [%d]", i, part, id.Partition()) 497 | } 498 | } 499 | 500 | if len(timeDist) > 1 { 501 | t.Error("IDs generated with the same time ended up with different timestamps") 502 | } 503 | 504 | // Race test. 505 | var wg sync.WaitGroup 506 | wg.Add(1000) 507 | for i := 0; i < 1000; i++ { 508 | go func() { 509 | for i := 0; i < sampleSize; i++ { 510 | _ = g.NewWithTime(byte(i), tn) 511 | } 512 | wg.Done() 513 | }() 514 | } 515 | wg.Wait() 516 | } 517 | 518 | func TestGenerator_Uniqueness(t *testing.T) { 519 | var ( 520 | collisions int 521 | setSize = 4 * MaxSequence 522 | ) 523 | 524 | ids := make(map[ID]struct{}, setSize) 525 | 526 | for i := 1; i < setSize; i++ { 527 | id := generator.New(255) 528 | if _, found := ids[id]; found { 529 | collisions++ 530 | } else { 531 | ids[id] = struct{}{} 532 | } 533 | } 534 | 535 | if collisions > 0 { 536 | t.Errorf("generated %d colliding IDs in a set of %d", collisions, setSize) 537 | } 538 | } 539 | 540 | func TestGenerator_Partition(t *testing.T) { 541 | expected := Partition{'A', 255} 542 | g, err := NewGenerator(&GeneratorSnapshot{ 543 | Partition: expected, 544 | }, nil) 545 | if err != nil { 546 | t.Fatal(err) 547 | } 548 | 549 | actual := g.Partition() 550 | if actual != expected { 551 | t.Errorf("expected [%s], got [%s]", expected, actual) 552 | } 553 | } 554 | 555 | func TestGenerator_SequenceBounds(t *testing.T) { 556 | min := uint16(1024) 557 | max := uint16(2047) 558 | g, err := NewGenerator(&GeneratorSnapshot{ 559 | SequenceMin: min, 560 | SequenceMax: max, 561 | }, nil) 562 | if err != nil { 563 | t.Fatal(err) 564 | } 565 | 566 | if actual, expected := g.SequenceMin(), min; actual != expected { 567 | t.Errorf("expected [%d], got [%d]", expected, actual) 568 | } 569 | 570 | if actual, expected := g.SequenceMax(), max; actual != expected { 571 | t.Errorf("expected [%d], got [%d]", expected, actual) 572 | } 573 | 574 | if actual, expected := g.Cap(), int(max-min)+1; actual != expected { 575 | t.Errorf("expected [%d], got [%d]", expected, actual) 576 | } 577 | 578 | if actual, expected := g.Len(), 0; actual != expected { 579 | t.Errorf("expected [%d], got [%d]", expected, actual) 580 | } 581 | 582 | for i := 0; i < 5; i++ { 583 | g.New(255) 584 | } 585 | 586 | if actual, expected := g.Len(), 5; actual != expected { 587 | t.Errorf("expected [%d], got [%d]", expected, actual) 588 | } 589 | 590 | g, err = NewGenerator(&GeneratorSnapshot{ 591 | SequenceMin: 8, 592 | SequenceMax: 16, 593 | }, nil) 594 | if err != nil { 595 | t.Fatal(err) 596 | } 597 | 598 | // Simulate an overflow. All IDs over Cap() must be generated in a subsequent timeframe 599 | // meaning Len will reflect the count in the last frame. 600 | // TODO(alcore) This *can* occasionally fail as we are not using a deterministic time source, 601 | // meaning first batch can get split up if time changes during the test and then end up 602 | // spilling into the Len() we test for. 603 | for i := 0; i < g.Cap()+7; i++ { 604 | g.New(255) 605 | } 606 | 607 | if actual, expected := g.Len(), 7; actual != expected { 608 | t.Errorf("expected [%d], got [%d]", expected, actual) 609 | } 610 | 611 | g, err = NewGenerator(&GeneratorSnapshot{ 612 | SequenceMin: 8, 613 | SequenceMax: 16, 614 | }, nil) 615 | if err != nil { 616 | t.Fatal(err) 617 | } 618 | 619 | for i := 0; i < g.Cap(); i++ { 620 | g.New(255) 621 | } 622 | 623 | if actual, expected := g.Len(), g.Cap(); actual != expected { 624 | t.Errorf("expected [%d], got [%d]", expected, actual) 625 | } 626 | } 627 | 628 | func TestGenerator_Sequence_Single(t *testing.T) { 629 | g, err := NewGenerator(nil, nil) 630 | if err != nil { 631 | t.Fatal(err) 632 | } 633 | 634 | expected0 := uint32(0) 635 | expected1 := expected0 636 | expected2 := expected1 + 1 637 | actual0 := g.Sequence() 638 | _ = g.New(255) 639 | actual1 := g.Sequence() 640 | _ = g.New(255) 641 | actual2 := g.Sequence() 642 | 643 | if actual0 != expected0 { 644 | t.Errorf("expected [%d], got [%d]", expected0, actual0) 645 | } 646 | if actual1 != expected1 { 647 | t.Errorf("expected [%d], got [%d]", expected1, actual1) 648 | } 649 | if actual2 != expected2 { 650 | t.Errorf("expected [%d], got [%d]", expected2, actual2) 651 | } 652 | } 653 | 654 | func TestGenerator_Sequence_Batch(t *testing.T) { 655 | g, err := NewGenerator(nil, nil) 656 | if err != nil { 657 | t.Fatal(err) 658 | } 659 | 660 | expected := uint32(9) 661 | for i := 0; i <= int(expected); i++ { 662 | _ = g.New(255) 663 | } 664 | 665 | actual := g.Sequence() 666 | if actual != expected { 667 | t.Errorf("expected [%d], got [%d]", expected, actual) 668 | } 669 | } 670 | 671 | func TestGenerator_FromSnapshot_Sequence(t *testing.T) { 672 | seq := uint32(1024) 673 | g, err := NewGenerator(&GeneratorSnapshot{ 674 | SequenceMin: uint16(seq), 675 | Sequence: seq, 676 | }, nil) 677 | if err != nil { 678 | t.Fatal(err) 679 | } 680 | 681 | expected1 := seq 682 | expected2 := seq + 1 683 | _ = g.New(255) 684 | actual1 := g.Sequence() 685 | _ = g.New(255) 686 | actual2 := g.Sequence() 687 | 688 | if actual1 != expected1 { 689 | t.Errorf("expected [%d], got [%d]", expected1, actual1) 690 | } 691 | if actual2 != expected2 { 692 | t.Errorf("expected [%d], got [%d]", expected2, actual2) 693 | } 694 | } 695 | 696 | func TestGenerator_FromSnapshot_Pool_Defaults(t *testing.T) { 697 | t.Parallel() 698 | 699 | g, err := NewGenerator(&GeneratorSnapshot{ 700 | SequenceMin: 0, 701 | SequenceMax: 0, 702 | }, nil) 703 | if err != nil { 704 | t.Fatal(err) 705 | } 706 | 707 | if g.SequenceMin() != 0 { 708 | t.Errorf("expected [%d], got [%d]", 0, g.SequenceMin()) 709 | } 710 | 711 | if g.SequenceMax() != MaxSequence { 712 | t.Errorf("expected [%d], got [%d]", MaxSequence, g.SequenceMax()) 713 | } 714 | 715 | // Max as default when min is given. 716 | g, err = NewGenerator(&GeneratorSnapshot{ 717 | SequenceMin: 2048, 718 | }, nil) 719 | if err != nil { 720 | t.Fatal(err) 721 | } 722 | 723 | if g.SequenceMin() != 2048 { 724 | t.Errorf("expected [%d], got [%d]", 2048, g.SequenceMin()) 725 | } 726 | 727 | if g.SequenceMax() != MaxSequence { 728 | t.Errorf("expected [%d], got [%d]", MaxSequence, g.SequenceMax()) 729 | } 730 | } 731 | 732 | func TestGenerator_FromSnapshot_Pool_BoundsOrder(t *testing.T) { 733 | t.Parallel() 734 | 735 | g, err := NewGenerator(&GeneratorSnapshot{ 736 | SequenceMin: 2048, 737 | SequenceMax: 1024, 738 | }, nil) 739 | if err != nil { 740 | t.Fatal(err) 741 | } 742 | 743 | if g.SequenceMin() != 1024 { 744 | t.Errorf("expected [%d], got [%d]", 1024, g.SequenceMin()) 745 | } 746 | 747 | if g.SequenceMax() != 2048 { 748 | t.Errorf("expected [%d], got [%d]", 2048, g.SequenceMax()) 749 | } 750 | } 751 | 752 | func TestGenerator_FromSnapshot_Pool_None(t *testing.T) { 753 | t.Parallel() 754 | 755 | bound := uint16(2048) 756 | _, err := NewGenerator(&GeneratorSnapshot{ 757 | SequenceMin: bound, 758 | SequenceMax: bound, 759 | }, nil) 760 | if err == nil { 761 | t.Errorf("expected error, got none") 762 | return 763 | } 764 | 765 | verr, ok := err.(*InvalidSequenceBoundsError) 766 | if !ok { 767 | t.Errorf("expected error type [%T], got [%T]", &InvalidSequenceBoundsError{}, err) 768 | return 769 | } 770 | 771 | if verr.Msg != errSequenceBoundsIdenticalMsg { 772 | t.Errorf("expected error msg [%s], got [%s]", errSequenceBoundsIdenticalMsg, verr.Msg) 773 | } 774 | 775 | if verr.Min != bound { 776 | t.Errorf("expected [%d], got [%d]", bound, verr.Min) 777 | } 778 | 779 | if verr.Max != bound { 780 | t.Errorf("expected [%d], got [%d]", bound, verr.Max) 781 | } 782 | 783 | expectedMsg := fmt.Sprintf(errInvalidSequenceBoundsFmt, errSequenceBoundsIdenticalMsg, bound, 0, bound, 1) 784 | if verr.Error() != expectedMsg { 785 | t.Errorf("expected error msg [%s], got [%s]", expectedMsg, verr.Error()) 786 | } 787 | } 788 | 789 | func TestGenerator_FromSnapshot_Pool_Size(t *testing.T) { 790 | t.Parallel() 791 | 792 | seqMin := uint16(0) 793 | seqMax := seqMin + minSequencePoolSize - 1 794 | _, err := NewGenerator(&GeneratorSnapshot{ 795 | SequenceMin: seqMin, 796 | SequenceMax: seqMax, 797 | }, nil) 798 | if err == nil { 799 | t.Errorf("expected error, got none") 800 | return 801 | } 802 | 803 | verr, ok := err.(*InvalidSequenceBoundsError) 804 | if !ok { 805 | t.Errorf("expected error type [%T], got [%T]", &InvalidSequenceBoundsError{}, err) 806 | return 807 | } 808 | 809 | if verr.Msg != errSequencePoolTooSmallMsg { 810 | t.Errorf("expected error msg [%s], got [%s]", errSequencePoolTooSmallMsg, verr.Msg) 811 | } 812 | 813 | if verr.Min != seqMin { 814 | t.Errorf("expected [%d], got [%d]", seqMin, verr.Min) 815 | } 816 | 817 | if verr.Max != seqMax { 818 | t.Errorf("expected [%d], got [%d]", seqMax, verr.Max) 819 | } 820 | 821 | expectedMsg := fmt.Sprintf(errInvalidSequenceBoundsFmt, errSequencePoolTooSmallMsg, seqMin, 0, seqMax, seqMax-seqMin+1) 822 | if verr.Error() != expectedMsg { 823 | t.Errorf("expected error msg [%s], got [%s]", expectedMsg, verr.Error()) 824 | } 825 | } 826 | 827 | func TestGenerator_FromSnapshot_Underflow(t *testing.T) { 828 | t.Parallel() 829 | 830 | seqMin := uint16(2048) 831 | seq := uint32(seqMin - 1) 832 | _, err := NewGenerator(&GeneratorSnapshot{ 833 | SequenceMin: seqMin, 834 | Sequence: seq, 835 | }, nil) 836 | if err == nil { 837 | t.Errorf("expected error, got none") 838 | return 839 | } 840 | 841 | verr, ok := err.(*InvalidSequenceBoundsError) 842 | if !ok { 843 | t.Errorf("expected error type [%T], got [%T]", &InvalidSequenceBoundsError{}, err) 844 | return 845 | } 846 | 847 | if verr.Msg != errSequenceUnderflowsBound { 848 | t.Errorf("expected error msg [%s], got [%s]", errSequenceUnderflowsBound, verr.Msg) 849 | } 850 | 851 | if verr.Min != seqMin { 852 | t.Errorf("expected [%d], got [%d]", seqMin, verr.Min) 853 | } 854 | 855 | if verr.Cur != seq { 856 | t.Errorf("expected [%d], got [%d]", seq, verr.Cur) 857 | } 858 | 859 | expectedMsg := fmt.Sprintf(errInvalidSequenceBoundsFmt, errSequenceUnderflowsBound, seqMin, seq, MaxSequence, MaxSequence-seqMin+1) 860 | if verr.Error() != expectedMsg { 861 | t.Errorf("expected error msg [%s], got [%s]", expectedMsg, verr.Error()) 862 | } 863 | } 864 | 865 | func TestGenerator_Snapshot(t *testing.T) { 866 | var ( 867 | part = Partition{128, 255} 868 | seqMin = uint16(1024) 869 | seqMax = uint16(2047) 870 | seq = uint32(1024) 871 | ) 872 | 873 | snap := &GeneratorSnapshot{ 874 | Partition: part, 875 | SequenceMin: seqMin, 876 | SequenceMax: seqMax, 877 | Sequence: seq, 878 | } 879 | 880 | g, err := NewGenerator(snap, nil) 881 | if err != nil { 882 | t.Fatal(err) 883 | } 884 | 885 | actual := g.Snapshot() 886 | if actual.Sequence != seq { 887 | t.Errorf("expected [%d], got [%d]", seq, actual.Sequence) 888 | } 889 | 890 | atomic.AddUint32(&g.drifts, 1) 891 | wallNow := snotime() 892 | g.New(255) // First call will catch a zero wallHi and reset the sequence, while we want to measure an incr. 893 | g.New(255) 894 | actual = g.Snapshot() 895 | 896 | if uint64(actual.Now) != wallNow { 897 | t.Errorf("expected [%d], got [%d]", wallNow, actual.Now) 898 | } 899 | 900 | if uint64(actual.WallHi) != wallNow { 901 | t.Errorf("expected [%d], got [%d]", wallNow, actual.WallHi) 902 | } 903 | 904 | if actual.Drifts != 1 { 905 | t.Errorf("expected [%d], got [%d]", 1, actual.Drifts) 906 | } 907 | 908 | if actual.Sequence != seq+1 { 909 | t.Errorf("expected [%d], got [%d]", seq+1, actual.Sequence) 910 | } 911 | 912 | if actual.Partition != part { 913 | t.Errorf("expected [%s], got [%s]", part, actual.Partition) 914 | } 915 | 916 | if actual.SequenceMin != seqMin { 917 | t.Errorf("expected [%d], got [%d]", seqMin, actual.SequenceMin) 918 | } 919 | 920 | if actual.SequenceMax != seqMax { 921 | t.Errorf("expected [%d], got [%d]", seqMax, actual.SequenceMax) 922 | } 923 | } 924 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | sno logo

2 | 3 | A spec for **unique IDs in distributed systems** based on the Snowflake design, i.e. a coordination-based ID variant. 4 | It aims to be friendly to both machines and humans, compact, *versatile* and fast. 5 | 6 | This repository contains a **Go** package for generating such IDs. 7 | 8 | [![GoDoc](https://img.shields.io/badge/doc-reference-00a1fe.svg?style=flat-square)](https://pkg.go.dev/github.com/muyo/sno?tab=doc) 9 | [![Stable version](https://img.shields.io/github/v/release/muyo/sno?color=00a1fe&label=stable&sort=semver&style=flat-square)](https://github.com/muyo/sno/releases) 10 | [![Travis build: master](https://img.shields.io/travis/muyo/sno/master.svg?logo=travis&label=ci&style=flat-square)](https://travis-ci.com/muyo/sno) 11 | [![Coverage](https://img.shields.io/codecov/c/github/muyo/sno.svg?logo=codecov&logoColor=ffffff&style=flat-square)](https://codecov.io/gh/muyo/sno) 12 | [![Go Report Card](https://goreportcard.com/badge/github.com/muyo/sno?style=flat-square)](https://goreportcard.com/report/github.com/muyo/sno) 13 | [![License](http://img.shields.io/badge/license-MIT-00a1fe.svg?style=flat-square)](https://raw.githubusercontent.com/muyo/sno/master/LICENSE) 14 | ```bash 15 | go get -u github.com/muyo/sno 16 | ``` 17 | 18 | ### Features 19 | 20 | - **Compact** - **10 bytes** in its binary representation, canonically [encoded](#encoding) as **16 characters**. 21 |
URL-safe and non-ambiguous encoding which also happens to be at the binary length of UUIDs - 22 | **sno**s can be stored as UUIDs in your database of choice. 23 | - **K-sortable** in either representation. 24 | - **[Embedded timestamp](#time-and-sequence)** with a **4msec resolution**, bounded within the years **2010 - 2079**. 25 |
Handles clock drifts gracefully, without waiting. 26 | - **[Embedded byte](#metabyte)** for arbitrary data. 27 | - **[Simple data layout](#layout)** - straightforward to inspect or encode/decode. 28 | - **[Optional and flexible](#usage)** configuration and coordination. 29 | - **[Fast](./benchmark#results)**, wait-free, safe for concurrent use. 30 |
Clocks in at about 500 LoC, has no external dependencies and minimal dependencies on std. 31 | - ‭A pool of **≥ 16,384,000** IDs per second. 32 |
65,536 guaranteed unique IDs per 4msec per partition (65,536 combinations) per metabyte 33 | (256 combinations) per tick-tock (1 bit adjustment for clock drifts). 34 | **549,755,813,888,000** is the global pool **per second** when all components are taken into account. 35 | 36 | ### Non-features / cons 37 | 38 | - True randomness. **sno**s embed a counter and have **no entropy**. They are not suitable in a context where 39 | unpredictability of IDs is a must. They still, however, meet the common requirement of keeping internal counts 40 | (e.g. total number of entitites) unguessable and appear obfuscated; 41 | - Time precision. While *good enough* for many use cases, not quite there for others. The ➜ [Metabyte](#metabyte) 42 | can be used to get around this limitation, however. 43 | - It's 10 bytes, not 8. This is suboptimal as far as memory alignment is considered (platform dependent). 44 | 45 | 46 |
47 | 48 | ## Usage (➜ [API](https://pkg.go.dev/github.com/muyo/sno?tab=doc)) 49 | 50 | **sno** comes with a package-level generator on top of letting you configure your own generators. 51 | 52 | Generating a new ID using the defaults takes no more than importing the package and: 53 | 54 | ```go 55 | id := sno.New(0) 56 | ``` 57 | 58 | Where `0` is the ➜ [Metabyte](#metabyte).
59 | 60 | The global generator is immutable and private. It's therefore also not possible to restore it using a Snapshot. 61 | Its Partition is based on time and changes across restarts. 62 | 63 | ### Partitions (➜ [doc](https://pkg.go.dev/github.com/muyo/sno?tab=doc#Partition)) 64 | 65 | As soon as you run more than 1 generator, you **should** start coordinating the creation of Generators to 66 | actually *guarantee* a collision-free ride. This applies to all specs of the Snowflake variant. 67 | 68 | Partitions are one of several friends you have to get you those guarantees. A Partition is 2 bytes. 69 | What they mean and how you define them is up to you. 70 | 71 | ```go 72 | generator, err := sno.NewGenerator(&sno.GeneratorSnapshot{ 73 | Partition: sno.Partition{'A', 10} 74 | }, nil) 75 | ``` 76 | 77 | Multiple generators can share a partition by dividing the sequence pool between 78 | them (➜ [Sequence sharding](#sequence-sharding)). 79 | 80 | ### Snapshots (➜ [doc](https://pkg.go.dev/github.com/muyo/sno?tab=doc#GeneratorSnapshot)) 81 | 82 | Snapshots happen to serve both as configuration and a means of saving and restoring generator data. They are 83 | optional - simply pass `nil` to `NewGenerator()`, to get a Generator with sane defaults and a unique (in-process) 84 | Partition. 85 | 86 | Snapshots can be taken at runtime: 87 | 88 | ```go 89 | s := generator.Snapshot() 90 | ``` 91 | 92 | This exposes most of a Generator's internal bookkeeping data. In an ideal world where programmers are not lazy 93 | until their system runs into an edge case - you'd persist that snapshot across restarts and restore generators 94 | instead of just creating them from scratch each time. This will keep you safe both if a large clock drift happens 95 | during the restart -- or before, and you just happen to come back online again "in the past", relative to IDs that 96 | had already been generated. 97 | 98 | A snapshot is a sample in time - it will very quickly get stale. Only take snapshots meant for restoring them 99 | later when generators are already offline - or for metrics purposes when online. 100 | 101 | 102 |
103 | 104 | ## Layout 105 | 106 | A **sno** is simply 80-bits comprised of two 40-bit blocks: the **timestamp** and the **payload**. The bytes are 107 | stored in **big-endian** order in all representations to retain their sortable property. 108 | ![Layout](./.github/layout.png) 109 | Both blocks can be inspected and mutated independently in either representation. Bits of the components in the binary 110 | representation don't spill over into other bytes which means no additional bit twiddling voodoo is necessary* to extract 111 | them. 112 | 113 | \*The tick-tock bit in the timestamp is the only exception (➜ [Time and sequence](#time-and-sequence)). 114 | 115 |
116 | 117 | ## Time and sequence 118 | 119 | ### Time 120 | 121 | **sno**s embed a timestamp comprised of 39 bits with the epoch **milliseconds at a 4msec resolution** (floored, 122 | unsigned) and one bit, the LSB of the entire block - for the tick-tock toggle. 123 | 124 | ### Epoch 125 | 126 | The **epoch is custom** and **constant**. It is bounded within `2010-01-01 00:00:00 UTC` and 127 | `2079-09-07 15:47:35.548 UTC`. The lower bound is `1262304000` seconds relative to Unix. 128 | 129 | If you *really* have to break out of the epoch - or want to store higher precision - the metabyte is your friend. 130 | 131 | ### Precision 132 | 133 | Higher precision *is not necessarily* a good thing. Think in dataset and sorting terms, or in sampling rates. You 134 | want to grab all requests with an error code of `403` in a given second, where the code may be encoded in the metabyte. 135 | At a resolution of 1 second, you binary search for just one index and then proceed straight up linearly. 136 | That's simple enough. 137 | 138 | At a resolution of 1msec however, you now need to find the corresponding 1000 potential starting offsets because 139 | your `403` requests are interleaved with the `200` requests (potentially). At 4msec, this is 250 steps. 140 | 141 | Everything has tradeoffs. This was a compromise between precision, size, simple data layout -- and factors like that above. 142 | 143 | ### Sequence 144 | 145 | **sno**s embed a sequence (2 bytes) that is **relative to time**. It does not overflow and resets on each new time 146 | unit (4msec). A higher sequence within a given timeframe **does not necessarily indicate order of creation**. 147 | It is not advertised as monotonic because its monotonicity is dependent on usage. A single generator writing 148 | to a single partition, *ceteris paribus*, *will* result in monotonic increments and *will* represent order of creation. 149 | 150 | With multiple writers in the same partition, increment order is *undefined*. If the generator moves back in time, 151 | the order will still be monotonic but sorted either 2msec after or before IDs previously already written at that 152 | time (see tick-tock). 153 | 154 | #### Sequence sharding 155 | 156 | The sequence pool has a range of `[0..65535]` (inclusive). **sno** supports partition sharing out of the box 157 | by further sharding the sequence - that is multiple writers (generators) in the same partition. 158 | 159 | This is done by dividing the pool between all writers, via user-specified bounds. 160 | 161 | A generator will reset to its lower bound on each new time unit - and will never overflow its upper bound. 162 | Collisions are therefore guaranteed impossible unless misconfigured and they overlap with another 163 | *currently online* generator. 164 | 165 | 166 |

167 |

Star Trek: Voyager mode, How to shard sequences

168 |

169 | 170 | This can be useful when multiple containers on one physical machine are to write as a cluster to a partition 171 | defined by the machine's ID (or simpler - multiple processes on one host). Or if multiple remote 172 | services across the globe were to do that. 173 | 174 | ```go 175 | var PeoplePartition = sno.Partition{'P', 0} 176 | 177 | // In process/container/remote host #1 178 | generator1, err := sno.NewGenerator(&sno.GeneratorSnapshot{ 179 | Partition: PeoplePartition, 180 | SequenceMin: 0, 181 | SequenceMax: 32767 // 32768 - 1 182 | }, nil) 183 | 184 | // In process/container/remote host #2 185 | generator2, err := sno.NewGenerator(&sno.GeneratorSnapshot{ 186 | Partition: PeoplePartition, 187 | SequenceMin: 32768, 188 | SequenceMax: 65535 // 65536 - 1 189 | }, nil) 190 | ``` 191 | 192 | You will notice that we have simply divided our total pool of 65,536 into 2 even and **non-overlapping** 193 | sectors. In the first snapshot `SequenceMin` could be omitted - and `SequenceMax` in the second, as those are the 194 | defaults used when they are not defined. You will get an error when trying to set limits above the capacity of 195 | generators, but since the library is oblivious to your setup - it cannot warn you about overlaps and cannot 196 | resize on its own either. 197 | 198 | The pools can be defined arbitrarily - as long as you make sure they don't overlap across *currently online* 199 | generators. 200 | 201 | It is safe for a range previously used by another generator to be assigned to a different generator under the 202 | following conditions: 203 | - it happens in a different timeframe *in the future*, i.e. no sooner than after 4msec have passed (no orchestrator 204 | is fast enough to get a new container online to replace a dead one for this to be a worry); 205 | - if you can guarantee the new Generator won't regress into a time the previous Generator was running in. 206 | 207 | If you create the new Generator using a Snapshot of the former as it went offline, you do not need to worry about those 208 | conditions and can resume writing to the same range immediately - the obvious tradeoff being the need to coordinate 209 | the exchange of Snapshots. 210 | 211 | If your clusters are always fixed size - reserving ranges is straightforward. With dynamic sizes, a potential simple 212 | scheme is to reserve the lower byte of the partition for scaling. Divide your sequence pool by, say, 8, keep 213 | assigning higher ranges until you hit your divider. When you do, increment partition by 1, start assigning 214 | ranges from scratch. This gave us 2048 identifiable origins by using just one byte of the partition. 215 | 216 | That said, the partition pool available is large enough that the likelihood you'll ever *need* 217 | this is slim to none. Suffice to know you *can* if you want to. 218 | 219 | Besides for guaranteeing a collision-free ride, this approach can also be used to attach more semantic meaning to 220 | partitions themselves, them being placed higher in the sort order. 221 | In other words - with it, the origin of an ID can be determined by inspecting the sequence 222 | alone, which frees up the partition for another meaning. 223 | 224 | How about... 225 | 226 | ```go 227 | var requestIDGenerator, _ = sno.NewGenerator(&GeneratorSnapshot{ 228 | SequenceMax: 32767, 229 | }, nil) 230 | 231 | type Service byte 232 | type Call byte 233 | 234 | const ( 235 | UsersSvc Service = 1 236 | UserList Call = 1 237 | UserCreate Call = 2 238 | UserDelete Call = 3 239 | ) 240 | 241 | func genRequestID(svc Service, methodID Call) sno.ID { 242 | id := requestIDGenerator.New(byte(svc)) 243 | // Overwrites the upper byte of the fixed partition. 244 | // In our case - we didn't define it but gave a non-nil snapshot, so it is {0, 0}. 245 | id[6] = byte(methodID) 246 | 247 | return id 248 | } 249 | ``` 250 | 251 |

252 |

253 | 254 | #### Sequence overflow 255 | 256 | Remember that limiting the sequence pool also limits max throughput of each generator. For an explanation on what 257 | happens when you're running at or over capacity, see the details below or take a look at ➜ [Benchmarks](#benchmarks) 258 | which explains the numbers involved. 259 | 260 |

261 |

Star Trek: Voyager mode, Behaviour on sequence overflow

262 |

263 | 264 | The sequence never overflows and the generator is designed with a single-return `New()` method that does not return 265 | errors nor invalid IDs. *Realistically* the default generator will never overflow simply because you won't saturate 266 | the capacity. 267 | 268 | But since you can set bounds yourself, the capacity could shrink to `4` per 4msec (smallest allowed). 269 | Now that's more likely. So when you start overflowing, the generator will *stall* and *pray* for a 270 | reduction in throughput sometime in the near future. 271 | 272 | From **sno**'s persective requesting more IDs than it can safely give you **immediately** is not an error - but 273 | it *may* require correcting on *your end*. And you should know about that. Therefore, if 274 | you want to know when it happens - simply give **sno** a channel along with its configuration snapshot. 275 | 276 | When a thread requests an ID and gets stalled, **once** per time unit, you will get a `SequenceOverflowNotification` 277 | on that channel. 278 | 279 | ```go 280 | type SequenceOverflowNotification struct { 281 | Now time.Time // Time of tick. 282 | Count uint32 // Number of currently overflowing generation calls. 283 | Ticks uint32 // For how many ticks in total we've already been dealing with the *current* overflow. 284 | } 285 | ``` 286 | Keep track of the counter. If it keeps increasing, you're no longer bursting - you're simply over capacity 287 | and *eventually* need to slow down or you'll *eventually* starve your system. The `Ticks` count lets you estimate 288 | how long the generator has already been overflowing without keeping track of time yourself. A tick is *roughly* 1ms. 289 | 290 | The order of generation when stalling occurs is `undefined`. It is not a FIFO queue, it's a race. Previously stalled 291 | goroutines get woken up alongside inflight goroutines which have not yet been stalled, where the order of the former is 292 | handled by the runtime. A livelock is therefore possible if demand doesn't decrease. This behaviour *may* change and 293 | inflight goroutines *may* get thrown onto the stalling wait list if one is up and running, but this requires careful 294 | inspection. And since this is considered an unrealistic scenario which can be avoided with simple configuration, 295 | it's not a priority. 296 | 297 |

298 |

299 | 300 | #### Clock drift and the tick-tock toggle 301 | 302 | Just like all other specs that rely on clock times to resolve ambiguity, **sno**s are prone to clock drifts. But 303 | unlike all those others specs, **sno** adjusts itself to the new time - instead of waiting (blocking), it tick-tocks. 304 | 305 | **The tl;dr** applying to any system, really: ensure your deployments use properly synchronized system clocks 306 | (via NTP) to mitigate the *size* of drifts. Ideally, use a NTP server pool that applies 307 | a gradual [smear for leap seconds](https://developers.google.com/time/smear). Despite the original Snowflake spec 308 | suggesting otherwise, using NTP in slew mode (to avoid regressions entirely) 309 | [is not always a good idea](https://www.redhat.com/en/blog/avoiding-clock-drift-vms). 310 | 311 | Also remember that containers tend to get *paused* meaning their clocks are paused with them. 312 | 313 | As far as **sno**, collisions and performance are concerned, in typical scenarios you can enjoy a wait-free ride 314 | without requiring slew mode nor having to worry about even large drifts. 315 | 316 |

317 |

Star Trek: Voyager mode, How tick-tocking works

318 |

319 | 320 | **sno** attempts to eliminate the issue *entirely* - both despite and because of its small pool of bits to work with. 321 | 322 | The approach it takes is simple - each generator keeps track of the highest wall clock time it got from the OS\*, 323 | each time it generates a new timestamp. If we get a time that is lower than the one we recorded, i.e. the clock 324 | drifted backwards and we'd risk generating colliding IDs, we toggle a bit - stored from here on out in 325 | each **sno** generated *until the next regression*. Rinse, repeat - tick, tock. 326 | 327 | (\*IDs created with a user-defined time are exempt from this mechanism as their time is arbitrary. The means 328 | to *bring your own time* are provided to make porting old IDs simpler and is assumed to be done before an ID 329 | scheme goes online) 330 | 331 | In practice this means that we switch back and forth between two alternating timelines. Remember how the pool 332 | we've got is 16,384,000 IDs per second? When we tick or tock, we simply jump between two pools with the same 333 | capacity. 334 | 335 | Why not simply use that bit to store a higher resolution time fraction? True, we'd get twice the pool which 336 | seemingly boils down to the same - except it doesn't. That is due to how the sequence increases. Even if you 337 | had a throughput of merely 1 ID per hour, while the chance would be astronomically low - if the clock drifted 338 | back that whole hour, you *could* get a collision. The higher your throughput, the bigger the probability. 339 | ID's of the Snowflake variant, **sno** being one of them, are about **guarantees - not probabilities**. 340 | So this is a **sno-go**. 341 | 342 | (I will show myself out...) 343 | 344 | The simplistic approach of tick-tocking *entirely eliminates* that collision chance - but with a rigorous assumption: 345 | regressions happen at most once into a specific period, i.e. from the highest recorded time into the past 346 | and never back into that particular timeframe (let alone even further into the past). 347 | 348 | This *generally* is exactly the case but oddities as far as time synchronization, bad clocks and NTP client 349 | behaviour goes *do* happen. And in distributed systems, every edge case that can happen - *will* happen. What do? 350 | 351 | ##### How others do it 352 | 353 | - [Sonyflake] goes to sleep until back at the wall clock time it was already at 354 | previously. All goroutines attempting to generate are blocked. 355 | - [snowflake] hammers the OS with syscalls to get the current time until back 356 | at the time it was already at previously. All goroutines attempting to generate are blocked. 357 | - [xid] goes ¯\\_(ツ)_/¯ and does not tackle drifts at all. 358 | - Entropy-based specs (like UUID or KSUID) don't really need to care as they are generally not prone, even to 359 | extreme drifts - you run with a risk all the time. 360 | 361 | The approach one library took was to keep generating, but timestamp all IDs with the highest time recorded instead. 362 | This worked, because it had a large entropy pool to work with, for one (so a potential large spike in IDs generated 363 | in the same timeframe wasn't much of a consideration). **sno** has none. But more importantly - it disagrees on the 364 | reasoning about time and clocks. If we moved backwards, it means that an *adjustment* happened and we are *now* 365 | closer to the *correct* time from the perspective of a wider system. 366 | 367 | **sno** therefore keeps generating without waiting, using the time as reported by the system - in the "past" so to 368 | speak, but with the tick-tock bit toggled. 369 | 370 | *If* another regression happens, into that timeframe or even further back, *only then* do we tell all contenders 371 | to wait. We get a wait-free fast path *most of the time* - and safety if things go southways. 372 | 373 | ##### Tick-tocking obviously affects the sort order as it changes the timestamp 374 | 375 | Even though the toggle is *not* part of the milliseconds, you can think of it as if it were. Toggling is then like 376 | moving two milliseconds back and forth, but since our milliseconds are floored to increments of 4msec, we never 377 | hit the range of a previous timeframe. Alternating timelines are as such sorted *as if* they were 2msec apart from 378 | each other, but as far as the actual stored time is considered - they are timestamped at exactly the same millisecond. 379 | 380 | They won't sort in an interleaved fashion, but will be *right next* to the other timeline. Technically they *were* 381 | created at a different time, so being able to make that distinction is considered a plus by the author. 382 | 383 |

384 |

385 |

386 | 387 | ## Metabyte 388 | 389 | The **metabyte** is unique to **sno** across the specs the author researched, but the concept of embedding metadata 390 | in IDs is an ancient one. It's effectively just a *byte-of-whatever-you-want-it-to-be* - but perhaps 391 | *8-bits-of-whatever-you-want-them-to-be* does a better job of explaining its versatility. 392 | 393 | ### `0` is a valid metabyte 394 | 395 | **sno** is agnostic as to what that byte represents and it is **optional**. None of the properties of **sno**s 396 | get violated if you simply pass a `0`. 397 | 398 | However, if you can't find use for it, then you may be better served using a different ID spec/library 399 | altogether (➜ [Alternatives](#alternatives)). You'd be wasting a byte that could give you benefits elsewhere. 400 | 401 | ### Why? 402 | 403 | Many databases, especially embedded ones, are extremely efficient when all you need is the keys - not all 404 | the data all those keys represent. None of the Snowflake-like specs would provide a means to do that without 405 | excessive overrides (or too small a pool to work with), essentially a different format altogether, and so - **sno**. 406 | 407 |

408 |

409 | And simple constants tend to do the trick. 410 |

411 |

412 | 413 | Untyped integers can pass as `uint8` (i.e. `byte`) in Go, so the following would work and keep things tidy: 414 | 415 | ```go 416 | const ( 417 | PersonType = iota 418 | OtherType 419 | ) 420 | 421 | type Person struct { 422 | ID sno.ID 423 | Name string 424 | } 425 | 426 | person := Person{ 427 | ID: sno.New(PersonType), 428 | Name: "A Special Snöflinga", 429 | } 430 | ``` 431 |

432 |

433 | 434 |
435 | 436 | *Information that describes something* has the nice property of also helping to *identify* something across a sea 437 | of possibilities. It's a natural fit. 438 | 439 | Do everyone a favor, though, and **don't embed confidential information**. It will stop being confidential and 440 | become public knowledge the moment you do that. Let's stick to *nice* property, avoiding `PEBKAC`. 441 | 442 | ### Sort order and placement 443 | 444 | The metabyte follows the timestamp. This clusters IDs by the timestamp and then by the metabyte (for example - 445 | the type of the entity), *before* the fixed partition. 446 | 447 | If you were to use machine-ID based partitions across a cluster generating, say, `Person` entities, where `Person` 448 | corresponds to a metabyte of `1` - this has the neat property of grouping all `People` generated across the entirety 449 | of your system in the given timeframe in a sortable manner. In database terms, you *could* think of the metabyte as 450 | identifying a table that is sharded across many partitions - or as part of a compound key. But that's just one of 451 | many ways it can be utilized. 452 | 453 | Placement at the beginning of the second block allows the metabyte to potentially both extend the timestamp 454 | block or provide additional semantics to the payload block. Even if you always leave it empty, sort 455 | order nor sort/insert performance won't be hampered. 456 | 457 | ### But it's just a single byte! 458 | 459 | A single byte is plenty. 460 | 461 |

462 |

Here's a few ideas for things you did not know you wanted, yet.

463 |

464 | 465 | - IDs for requests in a HTTP context: 1 byte is enough to contain one of all possible standard HTTP status codes. 466 | *Et voila*, you now got all requests that resulted in an error nicely sorted and clustered. 467 |
Limit yourself to the non-exotic status codes and you can store the HTTP verb along with the status code. 468 | In that single byte. Suddenly even the partition (if it's tied to a machine/cluster) gains relevant semantics, 469 | as you've gained a timeseries of requests that started fail-cascading in the cluster. Constrain yourself even 470 | further to just one bit for `OK` or `ERROR` and you made room to also store information about the operation that 471 | was requested (think resource endpoint). 472 | 473 | - How about storing a (immutable) bitmask along with the ID? Save some 7 bytes of bools by doing so and have the 474 | flags readily available during an efficient sequential key traversal using your storage engine of choice. 475 | 476 | - Want to version-control a `Message`? Limit yourself to at most 256 versions and it becomes trivial. Take the ID 477 | of the last version created, increment its metabyte - and that's it. What you now have is effectively a simplistic 478 | versioning schema, where the IDs of all possible versions can be inferred without lookups, joins, indices and whatnot. 479 | And many databases will just store them *close* to each other. Locality is a thing. 480 |
How? The only part that changed was the metabyte. All other components remained the same, but we ended up with 481 | a new ID pointing to the most recent version. Admittedly the timestamp lost its default semantics of 482 | *moment of creation* and instead is *moment of creation of first version*, but you'd store a `revisedAt` timestamp 483 | anyways, wouldn't you?
And if you *really* wanted to support more versions - the IDs have certain properties 484 | that can be (ab)used for this. Increment this, decrement that... 485 | 486 | - Sometimes a single byte is all the data that you actually need to store, along with the time 487 | *when something happened*. Batch processing succeeded? `sno.New(0)`, done. Failed? `sno.New(1)`, done. You now 488 | have a uniquely identifiable event, know *when* and *where* it happened, what the outcome was - and you still 489 | had 7 spare bits (for higher precision time, maybe?) 490 | 491 | - Polymorphism has already been covered. Consider not just data storage, but also things like (un)marshaling 492 | polymorphic types efficiently. Take a JSON of `{id: "aaaaaaaa55aaaaaa", foo: "bar", baz: "bar"}`. 493 | The 8-th and 9-th (0-indexed) characters of the ID contain the encoded bits of the metabyte. Decode that 494 | (use one of the utilities provided by the library) and you now know what internal type the data should unmarshal 495 | to without first unmarshaling into an intermediary structure (nor rolling out a custom decoder for this type). 496 | There are many approaches to tackle this - an ID just happens to lend itself naturally to solve it and is easily 497 | portable. 498 | 499 | - 2 bytes for partitions not enough for your needs? Use a fixed byte as the metabyte -- you have extended the 500 | fixed partition to 3 bytes. Wrap a generator with a custom one to apply that metabyte for you each time you use it. 501 | The metabyte is, after all, part of the partition. It's just separated out for semantic purposes but its actual 502 | semantics are left to you. 503 |

504 |

505 | 506 |
507 | 508 | ## Encoding 509 | 510 | The encoding is a **custom base32** variant stemming from base32hex. Let's *not* call it *sno32*. 511 | A canonically encoded **sno** is a regexp of `[2-9a-x]{16}`. 512 | 513 | The following alphabet is used: 514 | 515 | ``` 516 | 23456789abcdefghijklmnopqrstuvwx 517 | ``` 518 | 519 | This is 2 contiguous ASCII ranges: `50..57` (digits) and `97..120` (*strictly* lowercase letters). 520 | 521 | On `amd64` encoding/decoding is vectorized and **[extremely fast](./benchmark#encodingdecoding)**. 522 | 523 |
524 | 525 | ## Alternatives 526 | 527 | | Name | Binary (bytes) | Encoded (chars)* | Sortable | Random** | Metadata | nsec/ID 528 | |------------:|:--------------:|:----------------:|:---------:|:---------:|:--------:|--------: 529 | | [UUID] | 16 | 36 | ![no] | ![yes] | ![no] | ≥36.3 530 | | [KSUID] | 20 | 27 | ![yes] | ![yes] | ![no] | 206.0 531 | | [ULID] | 16 | 26 | ![yes] | ![yes] | ![no] | ≥50.3 532 | | [Sandflake] | 16 | 26 | ![yes] | ![meh] | ![no] | 224.0 533 | | [cuid] | ![no] | 25 | ![yes] | ![meh] | ![no] | 342.0 534 | | [xid] | 12 | 20 | ![yes] | ![no] | ![no] | 19.4 535 | | **sno** | 10 | **16** | ![yes] | ![no] | ![yes] | **8.8** 536 | | [Snowflake] | **8** | ≤20 | ![yes] | ![no] | ![no] | 28.9 537 | 538 | 539 | [UUID]: https://github.com/gofrs/uuid 540 | [KSUID]: https://github.com/segmentio/ksuid 541 | [cuid]: https://github.com/lucsky/cuid 542 | [Snowflake]: https://github.com/bwmarrin/snowflake 543 | [Sonyflake]: https://github.com/sony/sonyflake 544 | [Sandflake]: https://github.com/celrenheit/sandflake 545 | [ULID]: https://github.com/oklog/ulid 546 | [xid]: https://github.com/rs/xid 547 | 548 | [yes]: ./.github/ico-yes.svg 549 | [meh]: ./.github/ico-meh.svg 550 | [no]: ./.github/ico-no.svg 551 | 552 | \* Using canonical encoding.
553 | \** When used with a proper CSPRNG. The more important aspect is the distinction between entropy-based and 554 | coordination-based IDs. [Sandflake] and [cuid] do contain entropy, but not sufficient to rely on entropy 555 | alone to avoid collisions (3 bytes and 4 bytes respectively).
556 | 557 | For performance results see ➜ [Benchmark](./benchmark). `≥` values given for libraries which provide more 558 | than one variant, whereas the fastest one is listed. 559 | 560 | 561 |

562 | 563 | ## Attributions 564 | 565 | **sno** is both based on and inspired by [xid] - more so than by the original Snowflake - but the changes it 566 | introduces are unfortunately incompatible with xid's spec. 567 | 568 | ## Further reading 569 | 570 | - [Original Snowflake implementation](https://github.com/twitter-archive/snowflake/tree/snowflake-2010) and 571 | [related post](https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake.html) 572 | - [Mongo ObjectIds](https://docs.mongodb.com/manual/reference/method/ObjectId/) 573 | - [Instagram: Sharding & IDs at Instagram](https://instagram-engineering.com/sharding-ids-at-instagram-1cf5a71e5a5c) 574 | - [Flickr: Ticket Servers: Distributed Unique Primary Keys on the Cheap](http://code.flickr.net/2010/02/08/ticket-servers-distributed-unique-primary-keys-on-the-cheap/) 575 | - [Segment: A brief history of the UUID](https://segment.com/blog/a-brief-history-of-the-uuid/) - about KSUID and the shortcomings of UUIDs. 576 | - [Farfetch: Unique integer generation in distributed systems](https://www.farfetchtechblog.com/en/blog/post/unique-integer-generation-in-distributed-systems) - uint32 utilizing Cassandra to coordinate. 577 | 578 | Also potentially of interest: 579 | - [Lamport timestamps](https://en.wikipedia.org/wiki/Lamport_timestamps) (vector/logical clocks) 580 | - [The Bloom Clock](https://arxiv.org/pdf/1905.13064.pdf) by Lum Ramabaja 581 | --------------------------------------------------------------------------------