├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── appveyor.yml
├── bolt_386.go
├── bolt_amd64.go
├── bolt_arm.go
├── bolt_arm64.go
├── bolt_linux.go
├── bolt_openbsd.go
├── bolt_ppc.go
├── bolt_ppc64.go
├── bolt_ppc64le.go
├── bolt_s390x.go
├── bolt_unix.go
├── bolt_unix_solaris.go
├── bolt_windows.go
├── boltsync_unix.go
├── bucket.go
├── bucket_test.go
├── cmd
    └── bolt
    │   ├── main.go
    │   └── main_test.go
├── cursor.go
├── cursor_test.go
├── db.go
├── db_test.go
├── doc.go
├── errors.go
├── freelist.go
├── freelist_test.go
├── node.go
├── node_test.go
├── page.go
├── page_test.go
├── quick_test.go
├── simulation_test.go
├── tx.go
└── tx_test.go


/.gitignore:
--------------------------------------------------------------------------------
1 | *.prof
2 | *.test
3 | *.swp
4 | /bin/
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Ben Johnson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | BRANCH=`git rev-parse --abbrev-ref HEAD`
 2 | COMMIT=`git rev-parse --short HEAD`
 3 | GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
 4 | 
 5 | default: build
 6 | 
 7 | race:
 8 | 	@go test -v -race -test.run="TestSimulate_(100op|1000op)"
 9 | 
10 | # go get github.com/kisielk/errcheck
11 | errcheck:
12 | 	@errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt
13 | 
14 | test: 
15 | 	@go test -v -cover .
16 | 	@go test -v ./cmd/bolt
17 | 
18 | .PHONY: fmt test
19 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: "{build}"
 2 | 
 3 | os: Windows Server 2012 R2
 4 | 
 5 | clone_folder: c:\gopath\src\github.com\boltdb\bolt
 6 | 
 7 | environment:
 8 |   GOPATH: c:\gopath
 9 | 
10 | install:
11 |   - echo %PATH%
12 |   - echo %GOPATH%
13 |   - go version
14 |   - go env
15 |   - go get -v -t ./...
16 | 
17 | build_script:
18 |   - go test -v ./...
19 | 


--------------------------------------------------------------------------------
/bolt_386.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | // maxMapSize represents the largest mmap size supported by Bolt.
 4 | const maxMapSize = 0x7FFFFFFF // 2GB
 5 | 
 6 | // maxAllocSize is the size used when creating array pointers.
 7 | const maxAllocSize = 0xFFFFFFF
 8 | 
 9 | // Are unaligned load/stores broken on this arch?
10 | var brokenUnaligned = false
11 | 


--------------------------------------------------------------------------------
/bolt_amd64.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | // maxMapSize represents the largest mmap size supported by Bolt.
 4 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB
 5 | 
 6 | // maxAllocSize is the size used when creating array pointers.
 7 | const maxAllocSize = 0x7FFFFFFF
 8 | 
 9 | // Are unaligned load/stores broken on this arch?
10 | var brokenUnaligned = false
11 | 


--------------------------------------------------------------------------------
/bolt_arm.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | import "unsafe"
 4 | 
 5 | // maxMapSize represents the largest mmap size supported by Bolt.
 6 | const maxMapSize = 0x7FFFFFFF // 2GB
 7 | 
 8 | // maxAllocSize is the size used when creating array pointers.
 9 | const maxAllocSize = 0xFFFFFFF
10 | 
11 | // Are unaligned load/stores broken on this arch?
12 | var brokenUnaligned bool
13 | 
14 | func init() {
15 | 	// Simple check to see whether this arch handles unaligned load/stores
16 | 	// correctly.
17 | 
18 | 	// ARM9 and older devices require load/stores to be from/to aligned
19 | 	// addresses. If not, the lower 2 bits are cleared and that address is
20 | 	// read in a jumbled up order.
21 | 
22 | 	// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
23 | 
24 | 	raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
25 | 	val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
26 | 
27 | 	brokenUnaligned = val != 0x11222211
28 | }
29 | 


--------------------------------------------------------------------------------
/bolt_arm64.go:
--------------------------------------------------------------------------------
 1 | // +build arm64
 2 | 
 3 | package bolt
 4 | 
 5 | // maxMapSize represents the largest mmap size supported by Bolt.
 6 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB
 7 | 
 8 | // maxAllocSize is the size used when creating array pointers.
 9 | const maxAllocSize = 0x7FFFFFFF
10 | 
11 | // Are unaligned load/stores broken on this arch?
12 | var brokenUnaligned = false
13 | 


--------------------------------------------------------------------------------
/bolt_linux.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | )
 6 | 
 7 | // fdatasync flushes written data to a file descriptor.
 8 | func fdatasync(db *DB) error {
 9 | 	return syscall.Fdatasync(int(db.file.Fd()))
10 | }
11 | 


--------------------------------------------------------------------------------
/bolt_openbsd.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | import (
 4 | 	"syscall"
 5 | 	"unsafe"
 6 | )
 7 | 
 8 | const (
 9 | 	msAsync      = 1 << iota // perform asynchronous writes
10 | 	msSync                   // perform synchronous writes
11 | 	msInvalidate             // invalidate cached data
12 | )
13 | 
14 | func msync(db *DB) error {
15 | 	_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
16 | 	if errno != 0 {
17 | 		return errno
18 | 	}
19 | 	return nil
20 | }
21 | 
22 | func fdatasync(db *DB) error {
23 | 	if db.data != nil {
24 | 		return msync(db)
25 | 	}
26 | 	return db.file.Sync()
27 | }
28 | 


--------------------------------------------------------------------------------
/bolt_ppc.go:
--------------------------------------------------------------------------------
 1 | // +build ppc
 2 | 
 3 | package bolt
 4 | 
 5 | // maxMapSize represents the largest mmap size supported by Bolt.
 6 | const maxMapSize = 0x7FFFFFFF // 2GB
 7 | 
 8 | // maxAllocSize is the size used when creating array pointers.
 9 | const maxAllocSize = 0xFFFFFFF
10 | 


--------------------------------------------------------------------------------
/bolt_ppc64.go:
--------------------------------------------------------------------------------
 1 | // +build ppc64
 2 | 
 3 | package bolt
 4 | 
 5 | // maxMapSize represents the largest mmap size supported by Bolt.
 6 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB
 7 | 
 8 | // maxAllocSize is the size used when creating array pointers.
 9 | const maxAllocSize = 0x7FFFFFFF
10 | 
11 | // Are unaligned load/stores broken on this arch?
12 | var brokenUnaligned = false
13 | 


--------------------------------------------------------------------------------
/bolt_ppc64le.go:
--------------------------------------------------------------------------------
 1 | // +build ppc64le
 2 | 
 3 | package bolt
 4 | 
 5 | // maxMapSize represents the largest mmap size supported by Bolt.
 6 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB
 7 | 
 8 | // maxAllocSize is the size used when creating array pointers.
 9 | const maxAllocSize = 0x7FFFFFFF
10 | 
11 | // Are unaligned load/stores broken on this arch?
12 | var brokenUnaligned = false
13 | 


--------------------------------------------------------------------------------
/bolt_s390x.go:
--------------------------------------------------------------------------------
 1 | // +build s390x
 2 | 
 3 | package bolt
 4 | 
 5 | // maxMapSize represents the largest mmap size supported by Bolt.
 6 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB
 7 | 
 8 | // maxAllocSize is the size used when creating array pointers.
 9 | const maxAllocSize = 0x7FFFFFFF
10 | 
11 | // Are unaligned load/stores broken on this arch?
12 | var brokenUnaligned = false
13 | 


--------------------------------------------------------------------------------
/bolt_unix.go:
--------------------------------------------------------------------------------
 1 | // +build !windows,!plan9,!solaris
 2 | 
 3 | package bolt
 4 | 
 5 | import (
 6 | 	"fmt"
 7 | 	"os"
 8 | 	"syscall"
 9 | 	"time"
10 | 	"unsafe"
11 | )
12 | 
13 | // flock acquires an advisory lock on a file descriptor.
14 | func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
15 | 	var t time.Time
16 | 	for {
17 | 		// If we're beyond our timeout then return an error.
18 | 		// This can only occur after we've attempted a flock once.
19 | 		if t.IsZero() {
20 | 			t = time.Now()
21 | 		} else if timeout > 0 && time.Since(t) > timeout {
22 | 			return ErrTimeout
23 | 		}
24 | 		flag := syscall.LOCK_SH
25 | 		if exclusive {
26 | 			flag = syscall.LOCK_EX
27 | 		}
28 | 
29 | 		// Otherwise attempt to obtain an exclusive lock.
30 | 		err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB)
31 | 		if err == nil {
32 | 			return nil
33 | 		} else if err != syscall.EWOULDBLOCK {
34 | 			return err
35 | 		}
36 | 
37 | 		// Wait for a bit and try again.
38 | 		time.Sleep(50 * time.Millisecond)
39 | 	}
40 | }
41 | 
42 | // funlock releases an advisory lock on a file descriptor.
43 | func funlock(db *DB) error {
44 | 	return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
45 | }
46 | 
47 | // mmap memory maps a DB's data file.
48 | func mmap(db *DB, sz int) error {
49 | 	// Map the data file to memory.
50 | 	b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
51 | 	if err != nil {
52 | 		return err
53 | 	}
54 | 
55 | 	// Advise the kernel that the mmap is accessed randomly.
56 | 	if err := madvise(b, syscall.MADV_RANDOM); err != nil {
57 | 		return fmt.Errorf("madvise: %s", err)
58 | 	}
59 | 
60 | 	// Save the original byte slice and convert to a byte array pointer.
61 | 	db.dataref = b
62 | 	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
63 | 	db.datasz = sz
64 | 	return nil
65 | }
66 | 
67 | // munmap unmaps a DB's data file from memory.
68 | func munmap(db *DB) error {
69 | 	// Ignore the unmap if we have no mapped data.
70 | 	if db.dataref == nil {
71 | 		return nil
72 | 	}
73 | 
74 | 	// Unmap using the original byte slice.
75 | 	err := syscall.Munmap(db.dataref)
76 | 	db.dataref = nil
77 | 	db.data = nil
78 | 	db.datasz = 0
79 | 	return err
80 | }
81 | 
82 | // NOTE: This function is copied from stdlib because it is not available on darwin.
83 | func madvise(b []byte, advice int) (err error) {
84 | 	_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
85 | 	if e1 != 0 {
86 | 		err = e1
87 | 	}
88 | 	return
89 | }
90 | 


--------------------------------------------------------------------------------
/bolt_unix_solaris.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"syscall"
 7 | 	"time"
 8 | 	"unsafe"
 9 | 
10 | 	"golang.org/x/sys/unix"
11 | )
12 | 
13 | // flock acquires an advisory lock on a file descriptor.
14 | func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
15 | 	var t time.Time
16 | 	for {
17 | 		// If we're beyond our timeout then return an error.
18 | 		// This can only occur after we've attempted a flock once.
19 | 		if t.IsZero() {
20 | 			t = time.Now()
21 | 		} else if timeout > 0 && time.Since(t) > timeout {
22 | 			return ErrTimeout
23 | 		}
24 | 		var lock syscall.Flock_t
25 | 		lock.Start = 0
26 | 		lock.Len = 0
27 | 		lock.Pid = 0
28 | 		lock.Whence = 0
29 | 		lock.Pid = 0
30 | 		if exclusive {
31 | 			lock.Type = syscall.F_WRLCK
32 | 		} else {
33 | 			lock.Type = syscall.F_RDLCK
34 | 		}
35 | 		err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock)
36 | 		if err == nil {
37 | 			return nil
38 | 		} else if err != syscall.EAGAIN {
39 | 			return err
40 | 		}
41 | 
42 | 		// Wait for a bit and try again.
43 | 		time.Sleep(50 * time.Millisecond)
44 | 	}
45 | }
46 | 
47 | // funlock releases an advisory lock on a file descriptor.
48 | func funlock(db *DB) error {
49 | 	var lock syscall.Flock_t
50 | 	lock.Start = 0
51 | 	lock.Len = 0
52 | 	lock.Type = syscall.F_UNLCK
53 | 	lock.Whence = 0
54 | 	return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
55 | }
56 | 
57 | // mmap memory maps a DB's data file.
58 | func mmap(db *DB, sz int) error {
59 | 	// Map the data file to memory.
60 | 	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
61 | 	if err != nil {
62 | 		return err
63 | 	}
64 | 
65 | 	// Advise the kernel that the mmap is accessed randomly.
66 | 	if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
67 | 		return fmt.Errorf("madvise: %s", err)
68 | 	}
69 | 
70 | 	// Save the original byte slice and convert to a byte array pointer.
71 | 	db.dataref = b
72 | 	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
73 | 	db.datasz = sz
74 | 	return nil
75 | }
76 | 
77 | // munmap unmaps a DB's data file from memory.
78 | func munmap(db *DB) error {
79 | 	// Ignore the unmap if we have no mapped data.
80 | 	if db.dataref == nil {
81 | 		return nil
82 | 	}
83 | 
84 | 	// Unmap using the original byte slice.
85 | 	err := unix.Munmap(db.dataref)
86 | 	db.dataref = nil
87 | 	db.data = nil
88 | 	db.datasz = 0
89 | 	return err
90 | }
91 | 


--------------------------------------------------------------------------------
/bolt_windows.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"syscall"
  7 | 	"time"
  8 | 	"unsafe"
  9 | )
 10 | 
 11 | // LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
 12 | var (
 13 | 	modkernel32      = syscall.NewLazyDLL("kernel32.dll")
 14 | 	procLockFileEx   = modkernel32.NewProc("LockFileEx")
 15 | 	procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
 16 | )
 17 | 
 18 | const (
 19 | 	lockExt = ".lock"
 20 | 
 21 | 	// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
 22 | 	flagLockExclusive       = 2
 23 | 	flagLockFailImmediately = 1
 24 | 
 25 | 	// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
 26 | 	errLockViolation syscall.Errno = 0x21
 27 | )
 28 | 
 29 | func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
 30 | 	r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
 31 | 	if r == 0 {
 32 | 		return err
 33 | 	}
 34 | 	return nil
 35 | }
 36 | 
 37 | func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
 38 | 	r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
 39 | 	if r == 0 {
 40 | 		return err
 41 | 	}
 42 | 	return nil
 43 | }
 44 | 
 45 | // fdatasync flushes written data to a file descriptor.
 46 | func fdatasync(db *DB) error {
 47 | 	return db.file.Sync()
 48 | }
 49 | 
 50 | // flock acquires an advisory lock on a file descriptor.
 51 | func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
 52 | 	// Create a separate lock file on windows because a process
 53 | 	// cannot share an exclusive lock on the same file. This is
 54 | 	// needed during Tx.WriteTo().
 55 | 	f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode)
 56 | 	if err != nil {
 57 | 		return err
 58 | 	}
 59 | 	db.lockfile = f
 60 | 
 61 | 	var t time.Time
 62 | 	for {
 63 | 		// If we're beyond our timeout then return an error.
 64 | 		// This can only occur after we've attempted a flock once.
 65 | 		if t.IsZero() {
 66 | 			t = time.Now()
 67 | 		} else if timeout > 0 && time.Since(t) > timeout {
 68 | 			return ErrTimeout
 69 | 		}
 70 | 
 71 | 		var flag uint32 = flagLockFailImmediately
 72 | 		if exclusive {
 73 | 			flag |= flagLockExclusive
 74 | 		}
 75 | 
 76 | 		err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
 77 | 		if err == nil {
 78 | 			return nil
 79 | 		} else if err != errLockViolation {
 80 | 			return err
 81 | 		}
 82 | 
 83 | 		// Wait for a bit and try again.
 84 | 		time.Sleep(50 * time.Millisecond)
 85 | 	}
 86 | }
 87 | 
 88 | // funlock releases an advisory lock on a file descriptor.
 89 | func funlock(db *DB) error {
 90 | 	err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
 91 | 	db.lockfile.Close()
 92 | 	os.Remove(db.path + lockExt)
 93 | 	return err
 94 | }
 95 | 
 96 | // mmap memory maps a DB's data file.
 97 | // Based on: https://github.com/edsrzf/mmap-go
 98 | func mmap(db *DB, sz int) error {
 99 | 	if !db.readOnly {
100 | 		// Truncate the database to the size of the mmap.
101 | 		if err := db.file.Truncate(int64(sz)); err != nil {
102 | 			return fmt.Errorf("truncate: %s", err)
103 | 		}
104 | 	}
105 | 
106 | 	// Open a file mapping handle.
107 | 	sizelo := uint32(sz >> 32)
108 | 	sizehi := uint32(sz) & 0xffffffff
109 | 	h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
110 | 	if h == 0 {
111 | 		return os.NewSyscallError("CreateFileMapping", errno)
112 | 	}
113 | 
114 | 	// Create the memory map.
115 | 	addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
116 | 	if addr == 0 {
117 | 		return os.NewSyscallError("MapViewOfFile", errno)
118 | 	}
119 | 
120 | 	// Close mapping handle.
121 | 	if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
122 | 		return os.NewSyscallError("CloseHandle", err)
123 | 	}
124 | 
125 | 	// Convert to a byte array.
126 | 	db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
127 | 	db.datasz = sz
128 | 
129 | 	return nil
130 | }
131 | 
132 | // munmap unmaps a pointer from a file.
133 | // Based on: https://github.com/edsrzf/mmap-go
134 | func munmap(db *DB) error {
135 | 	if db.data == nil {
136 | 		return nil
137 | 	}
138 | 
139 | 	addr := (uintptr)(unsafe.Pointer(&db.data[0]))
140 | 	if err := syscall.UnmapViewOfFile(addr); err != nil {
141 | 		return os.NewSyscallError("UnmapViewOfFile", err)
142 | 	}
143 | 	return nil
144 | }
145 | 


--------------------------------------------------------------------------------
/boltsync_unix.go:
--------------------------------------------------------------------------------
1 | // +build !windows,!plan9,!linux,!openbsd
2 | 
3 | package bolt
4 | 
5 | // fdatasync flushes written data to a file descriptor.
6 | func fdatasync(db *DB) error {
7 | 	return db.file.Sync()
8 | }
9 | 


--------------------------------------------------------------------------------
/bucket.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"unsafe"
  7 | )
  8 | 
  9 | const (
 10 | 	// MaxKeySize is the maximum length of a key, in bytes.
 11 | 	MaxKeySize = 32768
 12 | 
 13 | 	// MaxValueSize is the maximum length of a value, in bytes.
 14 | 	MaxValueSize = (1 << 31) - 2
 15 | )
 16 | 
 17 | const (
 18 | 	maxUint = ^uint(0)
 19 | 	minUint = 0
 20 | 	maxInt  = int(^uint(0) >> 1)
 21 | 	minInt  = -maxInt - 1
 22 | )
 23 | 
 24 | const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
 25 | 
 26 | const (
 27 | 	minFillPercent = 0.1
 28 | 	maxFillPercent = 1.0
 29 | )
 30 | 
 31 | // DefaultFillPercent is the percentage that split pages are filled.
 32 | // This value can be changed by setting Bucket.FillPercent.
 33 | const DefaultFillPercent = 0.5
 34 | 
 35 | // Bucket represents a collection of key/value pairs inside the database.
 36 | type Bucket struct {
 37 | 	*bucket
 38 | 	tx       *Tx                // the associated transaction
 39 | 	buckets  map[string]*Bucket // subbucket cache
 40 | 	page     *page              // inline page reference
 41 | 	rootNode *node              // materialized node for the root page.
 42 | 	nodes    map[pgid]*node     // node cache
 43 | 
 44 | 	// Sets the threshold for filling nodes when they split. By default,
 45 | 	// the bucket will fill to 50% but it can be useful to increase this
 46 | 	// amount if you know that your write workloads are mostly append-only.
 47 | 	//
 48 | 	// This is non-persisted across transactions so it must be set in every Tx.
 49 | 	FillPercent float64
 50 | }
 51 | 
 52 | // bucket represents the on-file representation of a bucket.
 53 | // This is stored as the "value" of a bucket key. If the bucket is small enough,
 54 | // then its root page can be stored inline in the "value", after the bucket
 55 | // header. In the case of inline buckets, the "root" will be 0.
 56 | type bucket struct {
 57 | 	root     pgid   // page id of the bucket's root-level page
 58 | 	sequence uint64 // monotonically incrementing, used by NextSequence()
 59 | }
 60 | 
 61 | // newBucket returns a new bucket associated with a transaction.
 62 | func newBucket(tx *Tx) Bucket {
 63 | 	var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
 64 | 	if tx.writable {
 65 | 		b.buckets = make(map[string]*Bucket)
 66 | 		b.nodes = make(map[pgid]*node)
 67 | 	}
 68 | 	return b
 69 | }
 70 | 
 71 | // Tx returns the tx of the bucket.
 72 | func (b *Bucket) Tx() *Tx {
 73 | 	return b.tx
 74 | }
 75 | 
 76 | // Root returns the root of the bucket.
 77 | func (b *Bucket) Root() pgid {
 78 | 	return b.root
 79 | }
 80 | 
 81 | // Writable returns whether the bucket is writable.
 82 | func (b *Bucket) Writable() bool {
 83 | 	return b.tx.writable
 84 | }
 85 | 
 86 | // Cursor creates a cursor associated with the bucket.
 87 | // The cursor is only valid as long as the transaction is open.
 88 | // Do not use a cursor after the transaction is closed.
 89 | func (b *Bucket) Cursor() *Cursor {
 90 | 	// Update transaction statistics.
 91 | 	b.tx.stats.CursorCount++
 92 | 
 93 | 	// Allocate and return a cursor.
 94 | 	return &Cursor{
 95 | 		bucket: b,
 96 | 		stack:  make([]elemRef, 0),
 97 | 	}
 98 | }
 99 | 
100 | // Bucket retrieves a nested bucket by name.
101 | // Returns nil if the bucket does not exist.
102 | // The bucket instance is only valid for the lifetime of the transaction.
103 | func (b *Bucket) Bucket(name []byte) *Bucket {
104 | 	if b.buckets != nil {
105 | 		if child := b.buckets[string(name)]; child != nil {
106 | 			return child
107 | 		}
108 | 	}
109 | 
110 | 	// Move cursor to key.
111 | 	c := b.Cursor()
112 | 	k, v, flags := c.seek(name)
113 | 
114 | 	// Return nil if the key doesn't exist or it is not a bucket.
115 | 	if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
116 | 		return nil
117 | 	}
118 | 
119 | 	// Otherwise create a bucket and cache it.
120 | 	var child = b.openBucket(v)
121 | 	if b.buckets != nil {
122 | 		b.buckets[string(name)] = child
123 | 	}
124 | 
125 | 	return child
126 | }
127 | 
128 | // Helper method that re-interprets a sub-bucket value
129 | // from a parent into a Bucket
130 | func (b *Bucket) openBucket(value []byte) *Bucket {
131 | 	var child = newBucket(b.tx)
132 | 
133 | 	// If unaligned load/stores are broken on this arch and value is
134 | 	// unaligned simply clone to an aligned byte array.
135 | 	unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
136 | 
137 | 	if unaligned {
138 | 		value = cloneBytes(value)
139 | 	}
140 | 
141 | 	// If this is a writable transaction then we need to copy the bucket entry.
142 | 	// Read-only transactions can point directly at the mmap entry.
143 | 	if b.tx.writable && !unaligned {
144 | 		child.bucket = &bucket{}
145 | 		*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
146 | 	} else {
147 | 		child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
148 | 	}
149 | 
150 | 	// Save a reference to the inline page if the bucket is inline.
151 | 	if child.root == 0 {
152 | 		child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
153 | 	}
154 | 
155 | 	return &child
156 | }
157 | 
158 | // CreateBucket creates a new bucket at the given key and returns the new bucket.
159 | // Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
160 | // The bucket instance is only valid for the lifetime of the transaction.
161 | func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
162 | 	if b.tx.db == nil {
163 | 		return nil, ErrTxClosed
164 | 	} else if !b.tx.writable {
165 | 		return nil, ErrTxNotWritable
166 | 	} else if len(key) == 0 {
167 | 		return nil, ErrBucketNameRequired
168 | 	}
169 | 
170 | 	// Move cursor to correct position.
171 | 	c := b.Cursor()
172 | 	k, _, flags := c.seek(key)
173 | 
174 | 	// Return an error if there is an existing key.
175 | 	if bytes.Equal(key, k) {
176 | 		if (flags & bucketLeafFlag) != 0 {
177 | 			return nil, ErrBucketExists
178 | 		}
179 | 		return nil, ErrIncompatibleValue
180 | 	}
181 | 
182 | 	// Create empty, inline bucket.
183 | 	var bucket = Bucket{
184 | 		bucket:      &bucket{},
185 | 		rootNode:    &node{isLeaf: true},
186 | 		FillPercent: DefaultFillPercent,
187 | 	}
188 | 	var value = bucket.write()
189 | 
190 | 	// Insert into node.
191 | 	key = cloneBytes(key)
192 | 	c.node().put(key, key, value, 0, bucketLeafFlag)
193 | 
194 | 	// Since subbuckets are not allowed on inline buckets, we need to
195 | 	// dereference the inline page, if it exists. This will cause the bucket
196 | 	// to be treated as a regular, non-inline bucket for the rest of the tx.
197 | 	b.page = nil
198 | 
199 | 	return b.Bucket(key), nil
200 | }
201 | 
202 | // CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
203 | // Returns an error if the bucket name is blank, or if the bucket name is too long.
204 | // The bucket instance is only valid for the lifetime of the transaction.
205 | func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
206 | 	child, err := b.CreateBucket(key)
207 | 	if err == ErrBucketExists {
208 | 		return b.Bucket(key), nil
209 | 	} else if err != nil {
210 | 		return nil, err
211 | 	}
212 | 	return child, nil
213 | }
214 | 
215 | // DeleteBucket deletes a bucket at the given key.
216 | // Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
217 | func (b *Bucket) DeleteBucket(key []byte) error {
218 | 	if b.tx.db == nil {
219 | 		return ErrTxClosed
220 | 	} else if !b.Writable() {
221 | 		return ErrTxNotWritable
222 | 	}
223 | 
224 | 	// Move cursor to correct position.
225 | 	c := b.Cursor()
226 | 	k, _, flags := c.seek(key)
227 | 
228 | 	// Return an error if bucket doesn't exist or is not a bucket.
229 | 	if !bytes.Equal(key, k) {
230 | 		return ErrBucketNotFound
231 | 	} else if (flags & bucketLeafFlag) == 0 {
232 | 		return ErrIncompatibleValue
233 | 	}
234 | 
235 | 	// Recursively delete all child buckets.
236 | 	child := b.Bucket(key)
237 | 	err := child.ForEach(func(k, v []byte) error {
238 | 		if v == nil {
239 | 			if err := child.DeleteBucket(k); err != nil {
240 | 				return fmt.Errorf("delete bucket: %s", err)
241 | 			}
242 | 		}
243 | 		return nil
244 | 	})
245 | 	if err != nil {
246 | 		return err
247 | 	}
248 | 
249 | 	// Remove cached copy.
250 | 	delete(b.buckets, string(key))
251 | 
252 | 	// Release all bucket pages to freelist.
253 | 	child.nodes = nil
254 | 	child.rootNode = nil
255 | 	child.free()
256 | 
257 | 	// Delete the node if we have a matching key.
258 | 	c.node().del(key)
259 | 
260 | 	return nil
261 | }
262 | 
263 | // Get retrieves the value for a key in the bucket.
264 | // Returns a nil value if the key does not exist or if the key is a nested bucket.
265 | // The returned value is only valid for the life of the transaction.
266 | func (b *Bucket) Get(key []byte) []byte {
267 | 	k, v, flags := b.Cursor().seek(key)
268 | 
269 | 	// Return nil if this is a bucket.
270 | 	if (flags & bucketLeafFlag) != 0 {
271 | 		return nil
272 | 	}
273 | 
274 | 	// If our target node isn't the same key as what's passed in then return nil.
275 | 	if !bytes.Equal(key, k) {
276 | 		return nil
277 | 	}
278 | 	return v
279 | }
280 | 
281 | // Put sets the value for a key in the bucket.
282 | // If the key exist then its previous value will be overwritten.
283 | // Supplied value must remain valid for the life of the transaction.
284 | // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
285 | func (b *Bucket) Put(key []byte, value []byte) error {
286 | 	if b.tx.db == nil {
287 | 		return ErrTxClosed
288 | 	} else if !b.Writable() {
289 | 		return ErrTxNotWritable
290 | 	} else if len(key) == 0 {
291 | 		return ErrKeyRequired
292 | 	} else if len(key) > MaxKeySize {
293 | 		return ErrKeyTooLarge
294 | 	} else if int64(len(value)) > MaxValueSize {
295 | 		return ErrValueTooLarge
296 | 	}
297 | 
298 | 	// Move cursor to correct position.
299 | 	c := b.Cursor()
300 | 	k, _, flags := c.seek(key)
301 | 
302 | 	// Return an error if there is an existing key with a bucket value.
303 | 	if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
304 | 		return ErrIncompatibleValue
305 | 	}
306 | 
307 | 	// Insert into node.
308 | 	key = cloneBytes(key)
309 | 	c.node().put(key, key, value, 0, 0)
310 | 
311 | 	return nil
312 | }
313 | 
314 | // Delete removes a key from the bucket.
315 | // If the key does not exist then nothing is done and a nil error is returned.
316 | // Returns an error if the bucket was created from a read-only transaction.
317 | func (b *Bucket) Delete(key []byte) error {
318 | 	if b.tx.db == nil {
319 | 		return ErrTxClosed
320 | 	} else if !b.Writable() {
321 | 		return ErrTxNotWritable
322 | 	}
323 | 
324 | 	// Move cursor to correct position.
325 | 	c := b.Cursor()
326 | 	_, _, flags := c.seek(key)
327 | 
328 | 	// Return an error if there is already existing bucket value.
329 | 	if (flags & bucketLeafFlag) != 0 {
330 | 		return ErrIncompatibleValue
331 | 	}
332 | 
333 | 	// Delete the node if we have a matching key.
334 | 	c.node().del(key)
335 | 
336 | 	return nil
337 | }
338 | 
339 | // Sequence returns the current integer for the bucket without incrementing it.
340 | func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
341 | 
342 | // SetSequence updates the sequence number for the bucket.
343 | func (b *Bucket) SetSequence(v uint64) error {
344 | 	if b.tx.db == nil {
345 | 		return ErrTxClosed
346 | 	} else if !b.Writable() {
347 | 		return ErrTxNotWritable
348 | 	}
349 | 
350 | 	// Materialize the root node if it hasn't been already so that the
351 | 	// bucket will be saved during commit.
352 | 	if b.rootNode == nil {
353 | 		_ = b.node(b.root, nil)
354 | 	}
355 | 
356 | 	// Increment and return the sequence.
357 | 	b.bucket.sequence = v
358 | 	return nil
359 | }
360 | 
361 | // NextSequence returns an autoincrementing integer for the bucket.
362 | func (b *Bucket) NextSequence() (uint64, error) {
363 | 	if b.tx.db == nil {
364 | 		return 0, ErrTxClosed
365 | 	} else if !b.Writable() {
366 | 		return 0, ErrTxNotWritable
367 | 	}
368 | 
369 | 	// Materialize the root node if it hasn't been already so that the
370 | 	// bucket will be saved during commit.
371 | 	if b.rootNode == nil {
372 | 		_ = b.node(b.root, nil)
373 | 	}
374 | 
375 | 	// Increment and return the sequence.
376 | 	b.bucket.sequence++
377 | 	return b.bucket.sequence, nil
378 | }
379 | 
380 | // ForEach executes a function for each key/value pair in a bucket.
381 | // If the provided function returns an error then the iteration is stopped and
382 | // the error is returned to the caller. The provided function must not modify
383 | // the bucket; this will result in undefined behavior.
384 | func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
385 | 	if b.tx.db == nil {
386 | 		return ErrTxClosed
387 | 	}
388 | 	c := b.Cursor()
389 | 	for k, v := c.First(); k != nil; k, v = c.Next() {
390 | 		if err := fn(k, v); err != nil {
391 | 			return err
392 | 		}
393 | 	}
394 | 	return nil
395 | }
396 | 
397 | // Stat returns stats on a bucket.
398 | func (b *Bucket) Stats() BucketStats {
399 | 	var s, subStats BucketStats
400 | 	pageSize := b.tx.db.pageSize
401 | 	s.BucketN += 1
402 | 	if b.root == 0 {
403 | 		s.InlineBucketN += 1
404 | 	}
405 | 	b.forEachPage(func(p *page, depth int) {
406 | 		if (p.flags & leafPageFlag) != 0 {
407 | 			s.KeyN += int(p.count)
408 | 
409 | 			// used totals the used bytes for the page
410 | 			used := pageHeaderSize
411 | 
412 | 			if p.count != 0 {
413 | 				// If page has any elements, add all element headers.
414 | 				used += leafPageElementSize * int(p.count-1)
415 | 
416 | 				// Add all element key, value sizes.
417 | 				// The computation takes advantage of the fact that the position
418 | 				// of the last element's key/value equals to the total of the sizes
419 | 				// of all previous elements' keys and values.
420 | 				// It also includes the last element's header.
421 | 				lastElement := p.leafPageElement(p.count - 1)
422 | 				used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
423 | 			}
424 | 
425 | 			if b.root == 0 {
426 | 				// For inlined bucket just update the inline stats
427 | 				s.InlineBucketInuse += used
428 | 			} else {
429 | 				// For non-inlined bucket update all the leaf stats
430 | 				s.LeafPageN++
431 | 				s.LeafInuse += used
432 | 				s.LeafOverflowN += int(p.overflow)
433 | 
434 | 				// Collect stats from sub-buckets.
435 | 				// Do that by iterating over all element headers
436 | 				// looking for the ones with the bucketLeafFlag.
437 | 				for i := uint16(0); i < p.count; i++ {
438 | 					e := p.leafPageElement(i)
439 | 					if (e.flags & bucketLeafFlag) != 0 {
440 | 						// For any bucket element, open the element value
441 | 						// and recursively call Stats on the contained bucket.
442 | 						subStats.Add(b.openBucket(e.value()).Stats())
443 | 					}
444 | 				}
445 | 			}
446 | 		} else if (p.flags & branchPageFlag) != 0 {
447 | 			s.BranchPageN++
448 | 			lastElement := p.branchPageElement(p.count - 1)
449 | 
450 | 			// used totals the used bytes for the page
451 | 			// Add header and all element headers.
452 | 			used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
453 | 
454 | 			// Add size of all keys and values.
455 | 			// Again, use the fact that last element's position equals to
456 | 			// the total of key, value sizes of all previous elements.
457 | 			used += int(lastElement.pos + lastElement.ksize)
458 | 			s.BranchInuse += used
459 | 			s.BranchOverflowN += int(p.overflow)
460 | 		}
461 | 
462 | 		// Keep track of maximum page depth.
463 | 		if depth+1 > s.Depth {
464 | 			s.Depth = (depth + 1)
465 | 		}
466 | 	})
467 | 
468 | 	// Alloc stats can be computed from page counts and pageSize.
469 | 	s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
470 | 	s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
471 | 
472 | 	// Add the max depth of sub-buckets to get total nested depth.
473 | 	s.Depth += subStats.Depth
474 | 	// Add the stats for all sub-buckets
475 | 	s.Add(subStats)
476 | 	return s
477 | }
478 | 
479 | // forEachPage iterates over every page in a bucket, including inline pages.
480 | func (b *Bucket) forEachPage(fn func(*page, int)) {
481 | 	// If we have an inline page then just use that.
482 | 	if b.page != nil {
483 | 		fn(b.page, 0)
484 | 		return
485 | 	}
486 | 
487 | 	// Otherwise traverse the page hierarchy.
488 | 	b.tx.forEachPage(b.root, 0, fn)
489 | }
490 | 
491 | // forEachPageNode iterates over every page (or node) in a bucket.
492 | // This also includes inline pages.
493 | func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
494 | 	// If we have an inline page or root node then just use that.
495 | 	if b.page != nil {
496 | 		fn(b.page, nil, 0)
497 | 		return
498 | 	}
499 | 	b._forEachPageNode(b.root, 0, fn)
500 | }
501 | 
502 | func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
503 | 	var p, n = b.pageNode(pgid)
504 | 
505 | 	// Execute function.
506 | 	fn(p, n, depth)
507 | 
508 | 	// Recursively loop over children.
509 | 	if p != nil {
510 | 		if (p.flags & branchPageFlag) != 0 {
511 | 			for i := 0; i < int(p.count); i++ {
512 | 				elem := p.branchPageElement(uint16(i))
513 | 				b._forEachPageNode(elem.pgid, depth+1, fn)
514 | 			}
515 | 		}
516 | 	} else {
517 | 		if !n.isLeaf {
518 | 			for _, inode := range n.inodes {
519 | 				b._forEachPageNode(inode.pgid, depth+1, fn)
520 | 			}
521 | 		}
522 | 	}
523 | }
524 | 
525 | // spill writes all the nodes for this bucket to dirty pages.
526 | func (b *Bucket) spill() error {
527 | 	// Spill all child buckets first.
528 | 	for name, child := range b.buckets {
529 | 		// If the child bucket is small enough and it has no child buckets then
530 | 		// write it inline into the parent bucket's page. Otherwise spill it
531 | 		// like a normal bucket and make the parent value a pointer to the page.
532 | 		var value []byte
533 | 		if child.inlineable() {
534 | 			child.free()
535 | 			value = child.write()
536 | 		} else {
537 | 			if err := child.spill(); err != nil {
538 | 				return err
539 | 			}
540 | 
541 | 			// Update the child bucket header in this bucket.
542 | 			value = make([]byte, unsafe.Sizeof(bucket{}))
543 | 			var bucket = (*bucket)(unsafe.Pointer(&value[0]))
544 | 			*bucket = *child.bucket
545 | 		}
546 | 
547 | 		// Skip writing the bucket if there are no materialized nodes.
548 | 		if child.rootNode == nil {
549 | 			continue
550 | 		}
551 | 
552 | 		// Update parent node.
553 | 		var c = b.Cursor()
554 | 		k, _, flags := c.seek([]byte(name))
555 | 		if !bytes.Equal([]byte(name), k) {
556 | 			panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
557 | 		}
558 | 		if flags&bucketLeafFlag == 0 {
559 | 			panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
560 | 		}
561 | 		c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
562 | 	}
563 | 
564 | 	// Ignore if there's not a materialized root node.
565 | 	if b.rootNode == nil {
566 | 		return nil
567 | 	}
568 | 
569 | 	// Spill nodes.
570 | 	if err := b.rootNode.spill(); err != nil {
571 | 		return err
572 | 	}
573 | 	b.rootNode = b.rootNode.root()
574 | 
575 | 	// Update the root node for this bucket.
576 | 	if b.rootNode.pgid >= b.tx.meta.pgid {
577 | 		panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
578 | 	}
579 | 	b.root = b.rootNode.pgid
580 | 
581 | 	return nil
582 | }
583 | 
584 | // inlineable returns true if a bucket is small enough to be written inline
585 | // and if it contains no subbuckets. Otherwise returns false.
586 | func (b *Bucket) inlineable() bool {
587 | 	var n = b.rootNode
588 | 
589 | 	// Bucket must only contain a single leaf node.
590 | 	if n == nil || !n.isLeaf {
591 | 		return false
592 | 	}
593 | 
594 | 	// Bucket is not inlineable if it contains subbuckets or if it goes beyond
595 | 	// our threshold for inline bucket size.
596 | 	var size = pageHeaderSize
597 | 	for _, inode := range n.inodes {
598 | 		size += leafPageElementSize + len(inode.key) + len(inode.value)
599 | 
600 | 		if inode.flags&bucketLeafFlag != 0 {
601 | 			return false
602 | 		} else if size > b.maxInlineBucketSize() {
603 | 			return false
604 | 		}
605 | 	}
606 | 
607 | 	return true
608 | }
609 | 
610 | // Returns the maximum total size of a bucket to make it a candidate for inlining.
611 | func (b *Bucket) maxInlineBucketSize() int {
612 | 	return b.tx.db.pageSize / 4
613 | }
614 | 
615 | // write allocates and writes a bucket to a byte slice.
616 | func (b *Bucket) write() []byte {
617 | 	// Allocate the appropriate size.
618 | 	var n = b.rootNode
619 | 	var value = make([]byte, bucketHeaderSize+n.size())
620 | 
621 | 	// Write a bucket header.
622 | 	var bucket = (*bucket)(unsafe.Pointer(&value[0]))
623 | 	*bucket = *b.bucket
624 | 
625 | 	// Convert byte slice to a fake page and write the root node.
626 | 	var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
627 | 	n.write(p)
628 | 
629 | 	return value
630 | }
631 | 
632 | // rebalance attempts to balance all nodes.
633 | func (b *Bucket) rebalance() {
634 | 	for _, n := range b.nodes {
635 | 		n.rebalance()
636 | 	}
637 | 	for _, child := range b.buckets {
638 | 		child.rebalance()
639 | 	}
640 | }
641 | 
642 | // node creates a node from a page and associates it with a given parent.
643 | func (b *Bucket) node(pgid pgid, parent *node) *node {
644 | 	_assert(b.nodes != nil, "nodes map expected")
645 | 
646 | 	// Retrieve node if it's already been created.
647 | 	if n := b.nodes[pgid]; n != nil {
648 | 		return n
649 | 	}
650 | 
651 | 	// Otherwise create a node and cache it.
652 | 	n := &node{bucket: b, parent: parent}
653 | 	if parent == nil {
654 | 		b.rootNode = n
655 | 	} else {
656 | 		parent.children = append(parent.children, n)
657 | 	}
658 | 
659 | 	// Use the inline page if this is an inline bucket.
660 | 	var p = b.page
661 | 	if p == nil {
662 | 		p = b.tx.page(pgid)
663 | 	}
664 | 
665 | 	// Read the page into the node and cache it.
666 | 	n.read(p)
667 | 	b.nodes[pgid] = n
668 | 
669 | 	// Update statistics.
670 | 	b.tx.stats.NodeCount++
671 | 
672 | 	return n
673 | }
674 | 
675 | // free recursively frees all pages in the bucket.
676 | func (b *Bucket) free() {
677 | 	if b.root == 0 {
678 | 		return
679 | 	}
680 | 
681 | 	var tx = b.tx
682 | 	b.forEachPageNode(func(p *page, n *node, _ int) {
683 | 		if p != nil {
684 | 			tx.db.freelist.free(tx.meta.txid, p)
685 | 		} else {
686 | 			n.free()
687 | 		}
688 | 	})
689 | 	b.root = 0
690 | }
691 | 
692 | // dereference removes all references to the old mmap.
693 | func (b *Bucket) dereference() {
694 | 	if b.rootNode != nil {
695 | 		b.rootNode.root().dereference()
696 | 	}
697 | 
698 | 	for _, child := range b.buckets {
699 | 		child.dereference()
700 | 	}
701 | }
702 | 
703 | // pageNode returns the in-memory node, if it exists.
704 | // Otherwise returns the underlying page.
705 | func (b *Bucket) pageNode(id pgid) (*page, *node) {
706 | 	// Inline buckets have a fake page embedded in their value so treat them
707 | 	// differently. We'll return the rootNode (if available) or the fake page.
708 | 	if b.root == 0 {
709 | 		if id != 0 {
710 | 			panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
711 | 		}
712 | 		if b.rootNode != nil {
713 | 			return nil, b.rootNode
714 | 		}
715 | 		return b.page, nil
716 | 	}
717 | 
718 | 	// Check the node cache for non-inline buckets.
719 | 	if b.nodes != nil {
720 | 		if n := b.nodes[id]; n != nil {
721 | 			return nil, n
722 | 		}
723 | 	}
724 | 
725 | 	// Finally lookup the page from the transaction if no node is materialized.
726 | 	return b.tx.page(id), nil
727 | }
728 | 
729 | // BucketStats records statistics about resources used by a bucket.
730 | type BucketStats struct {
731 | 	// Page count statistics.
732 | 	BranchPageN     int // number of logical branch pages
733 | 	BranchOverflowN int // number of physical branch overflow pages
734 | 	LeafPageN       int // number of logical leaf pages
735 | 	LeafOverflowN   int // number of physical leaf overflow pages
736 | 
737 | 	// Tree statistics.
738 | 	KeyN  int // number of keys/value pairs
739 | 	Depth int // number of levels in B+tree
740 | 
741 | 	// Page size utilization.
742 | 	BranchAlloc int // bytes allocated for physical branch pages
743 | 	BranchInuse int // bytes actually used for branch data
744 | 	LeafAlloc   int // bytes allocated for physical leaf pages
745 | 	LeafInuse   int // bytes actually used for leaf data
746 | 
747 | 	// Bucket statistics
748 | 	BucketN           int // total number of buckets including the top bucket
749 | 	InlineBucketN     int // total number on inlined buckets
750 | 	InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
751 | }
752 | 
753 | func (s *BucketStats) Add(other BucketStats) {
754 | 	s.BranchPageN += other.BranchPageN
755 | 	s.BranchOverflowN += other.BranchOverflowN
756 | 	s.LeafPageN += other.LeafPageN
757 | 	s.LeafOverflowN += other.LeafOverflowN
758 | 	s.KeyN += other.KeyN
759 | 	if s.Depth < other.Depth {
760 | 		s.Depth = other.Depth
761 | 	}
762 | 	s.BranchAlloc += other.BranchAlloc
763 | 	s.BranchInuse += other.BranchInuse
764 | 	s.LeafAlloc += other.LeafAlloc
765 | 	s.LeafInuse += other.LeafInuse
766 | 
767 | 	s.BucketN += other.BucketN
768 | 	s.InlineBucketN += other.InlineBucketN
769 | 	s.InlineBucketInuse += other.InlineBucketInuse
770 | }
771 | 
772 | // cloneBytes returns a copy of a given slice.
773 | func cloneBytes(v []byte) []byte {
774 | 	var clone = make([]byte, len(v))
775 | 	copy(clone, v)
776 | 	return clone
777 | }
778 | 


--------------------------------------------------------------------------------
/cmd/bolt/main_test.go:
--------------------------------------------------------------------------------
  1 | package main_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	crypto "crypto/rand"
  6 | 	"encoding/binary"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"io/ioutil"
 10 | 	"math/rand"
 11 | 	"os"
 12 | 	"strconv"
 13 | 	"testing"
 14 | 
 15 | 	"github.com/boltdb/bolt"
 16 | 	"github.com/boltdb/bolt/cmd/bolt"
 17 | )
 18 | 
 19 | // Ensure the "info" command can print information about a database.
 20 | func TestInfoCommand_Run(t *testing.T) {
 21 | 	db := MustOpen(0666, nil)
 22 | 	db.DB.Close()
 23 | 	defer db.Close()
 24 | 
 25 | 	// Run the info command.
 26 | 	m := NewMain()
 27 | 	if err := m.Run("info", db.Path); err != nil {
 28 | 		t.Fatal(err)
 29 | 	}
 30 | }
 31 | 
 32 | // Ensure the "stats" command executes correctly with an empty database.
 33 | func TestStatsCommand_Run_EmptyDatabase(t *testing.T) {
 34 | 	// Ignore
 35 | 	if os.Getpagesize() != 4096 {
 36 | 		t.Skip("system does not use 4KB page size")
 37 | 	}
 38 | 
 39 | 	db := MustOpen(0666, nil)
 40 | 	defer db.Close()
 41 | 	db.DB.Close()
 42 | 
 43 | 	// Generate expected result.
 44 | 	exp := "Aggregate statistics for 0 buckets\n\n" +
 45 | 		"Page count statistics\n" +
 46 | 		"\tNumber of logical branch pages: 0\n" +
 47 | 		"\tNumber of physical branch overflow pages: 0\n" +
 48 | 		"\tNumber of logical leaf pages: 0\n" +
 49 | 		"\tNumber of physical leaf overflow pages: 0\n" +
 50 | 		"Tree statistics\n" +
 51 | 		"\tNumber of keys/value pairs: 0\n" +
 52 | 		"\tNumber of levels in B+tree: 0\n" +
 53 | 		"Page size utilization\n" +
 54 | 		"\tBytes allocated for physical branch pages: 0\n" +
 55 | 		"\tBytes actually used for branch data: 0 (0%)\n" +
 56 | 		"\tBytes allocated for physical leaf pages: 0\n" +
 57 | 		"\tBytes actually used for leaf data: 0 (0%)\n" +
 58 | 		"Bucket statistics\n" +
 59 | 		"\tTotal number of buckets: 0\n" +
 60 | 		"\tTotal number on inlined buckets: 0 (0%)\n" +
 61 | 		"\tBytes used for inlined buckets: 0 (0%)\n"
 62 | 
 63 | 	// Run the command.
 64 | 	m := NewMain()
 65 | 	if err := m.Run("stats", db.Path); err != nil {
 66 | 		t.Fatal(err)
 67 | 	} else if m.Stdout.String() != exp {
 68 | 		t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String())
 69 | 	}
 70 | }
 71 | 
 72 | // Ensure the "stats" command can execute correctly.
 73 | func TestStatsCommand_Run(t *testing.T) {
 74 | 	// Ignore
 75 | 	if os.Getpagesize() != 4096 {
 76 | 		t.Skip("system does not use 4KB page size")
 77 | 	}
 78 | 
 79 | 	db := MustOpen(0666, nil)
 80 | 	defer db.Close()
 81 | 
 82 | 	if err := db.Update(func(tx *bolt.Tx) error {
 83 | 		// Create "foo" bucket.
 84 | 		b, err := tx.CreateBucket([]byte("foo"))
 85 | 		if err != nil {
 86 | 			return err
 87 | 		}
 88 | 		for i := 0; i < 10; i++ {
 89 | 			if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
 90 | 				return err
 91 | 			}
 92 | 		}
 93 | 
 94 | 		// Create "bar" bucket.
 95 | 		b, err = tx.CreateBucket([]byte("bar"))
 96 | 		if err != nil {
 97 | 			return err
 98 | 		}
 99 | 		for i := 0; i < 100; i++ {
100 | 			if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
101 | 				return err
102 | 			}
103 | 		}
104 | 
105 | 		// Create "baz" bucket.
106 | 		b, err = tx.CreateBucket([]byte("baz"))
107 | 		if err != nil {
108 | 			return err
109 | 		}
110 | 		if err := b.Put([]byte("key"), []byte("value")); err != nil {
111 | 			return err
112 | 		}
113 | 
114 | 		return nil
115 | 	}); err != nil {
116 | 		t.Fatal(err)
117 | 	}
118 | 	db.DB.Close()
119 | 
120 | 	// Generate expected result.
121 | 	exp := "Aggregate statistics for 3 buckets\n\n" +
122 | 		"Page count statistics\n" +
123 | 		"\tNumber of logical branch pages: 0\n" +
124 | 		"\tNumber of physical branch overflow pages: 0\n" +
125 | 		"\tNumber of logical leaf pages: 1\n" +
126 | 		"\tNumber of physical leaf overflow pages: 0\n" +
127 | 		"Tree statistics\n" +
128 | 		"\tNumber of keys/value pairs: 111\n" +
129 | 		"\tNumber of levels in B+tree: 1\n" +
130 | 		"Page size utilization\n" +
131 | 		"\tBytes allocated for physical branch pages: 0\n" +
132 | 		"\tBytes actually used for branch data: 0 (0%)\n" +
133 | 		"\tBytes allocated for physical leaf pages: 4096\n" +
134 | 		"\tBytes actually used for leaf data: 1996 (48%)\n" +
135 | 		"Bucket statistics\n" +
136 | 		"\tTotal number of buckets: 3\n" +
137 | 		"\tTotal number on inlined buckets: 2 (66%)\n" +
138 | 		"\tBytes used for inlined buckets: 236 (11%)\n"
139 | 
140 | 	// Run the command.
141 | 	m := NewMain()
142 | 	if err := m.Run("stats", db.Path); err != nil {
143 | 		t.Fatal(err)
144 | 	} else if m.Stdout.String() != exp {
145 | 		t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String())
146 | 	}
147 | }
148 | 
149 | // Main represents a test wrapper for main.Main that records output.
150 | type Main struct {
151 | 	*main.Main
152 | 	Stdin  bytes.Buffer
153 | 	Stdout bytes.Buffer
154 | 	Stderr bytes.Buffer
155 | }
156 | 
157 | // NewMain returns a new instance of Main.
158 | func NewMain() *Main {
159 | 	m := &Main{Main: main.NewMain()}
160 | 	m.Main.Stdin = &m.Stdin
161 | 	m.Main.Stdout = &m.Stdout
162 | 	m.Main.Stderr = &m.Stderr
163 | 	return m
164 | }
165 | 
166 | // MustOpen creates a Bolt database in a temporary location.
167 | func MustOpen(mode os.FileMode, options *bolt.Options) *DB {
168 | 	// Create temporary path.
169 | 	f, _ := ioutil.TempFile("", "bolt-")
170 | 	f.Close()
171 | 	os.Remove(f.Name())
172 | 
173 | 	db, err := bolt.Open(f.Name(), mode, options)
174 | 	if err != nil {
175 | 		panic(err.Error())
176 | 	}
177 | 	return &DB{DB: db, Path: f.Name()}
178 | }
179 | 
180 | // DB is a test wrapper for bolt.DB.
181 | type DB struct {
182 | 	*bolt.DB
183 | 	Path string
184 | }
185 | 
186 | // Close closes and removes the database.
187 | func (db *DB) Close() error {
188 | 	defer os.Remove(db.Path)
189 | 	return db.DB.Close()
190 | }
191 | 
192 | func TestCompactCommand_Run(t *testing.T) {
193 | 	var s int64
194 | 	if err := binary.Read(crypto.Reader, binary.BigEndian, &s); err != nil {
195 | 		t.Fatal(err)
196 | 	}
197 | 	rand.Seed(s)
198 | 
199 | 	dstdb := MustOpen(0666, nil)
200 | 	dstdb.Close()
201 | 
202 | 	// fill the db
203 | 	db := MustOpen(0666, nil)
204 | 	if err := db.Update(func(tx *bolt.Tx) error {
205 | 		n := 2 + rand.Intn(5)
206 | 		for i := 0; i < n; i++ {
207 | 			k := []byte(fmt.Sprintf("b%d", i))
208 | 			b, err := tx.CreateBucketIfNotExists(k)
209 | 			if err != nil {
210 | 				return err
211 | 			}
212 | 			if err := b.SetSequence(uint64(i)); err != nil {
213 | 				return err
214 | 			}
215 | 			if err := fillBucket(b, append(k, '.')); err != nil {
216 | 				return err
217 | 			}
218 | 		}
219 | 		return nil
220 | 	}); err != nil {
221 | 		db.Close()
222 | 		t.Fatal(err)
223 | 	}
224 | 
225 | 	// make the db grow by adding large values, and delete them.
226 | 	if err := db.Update(func(tx *bolt.Tx) error {
227 | 		b, err := tx.CreateBucketIfNotExists([]byte("large_vals"))
228 | 		if err != nil {
229 | 			return err
230 | 		}
231 | 		n := 5 + rand.Intn(5)
232 | 		for i := 0; i < n; i++ {
233 | 			v := make([]byte, 1000*1000*(1+rand.Intn(5)))
234 | 			_, err := crypto.Read(v)
235 | 			if err != nil {
236 | 				return err
237 | 			}
238 | 			if err := b.Put([]byte(fmt.Sprintf("l%d", i)), v); err != nil {
239 | 				return err
240 | 			}
241 | 		}
242 | 		return nil
243 | 	}); err != nil {
244 | 		db.Close()
245 | 		t.Fatal(err)
246 | 	}
247 | 	if err := db.Update(func(tx *bolt.Tx) error {
248 | 		c := tx.Bucket([]byte("large_vals")).Cursor()
249 | 		for k, _ := c.First(); k != nil; k, _ = c.Next() {
250 | 			if err := c.Delete(); err != nil {
251 | 				return err
252 | 			}
253 | 		}
254 | 		return tx.DeleteBucket([]byte("large_vals"))
255 | 	}); err != nil {
256 | 		db.Close()
257 | 		t.Fatal(err)
258 | 	}
259 | 	db.DB.Close()
260 | 	defer db.Close()
261 | 	defer dstdb.Close()
262 | 
263 | 	dbChk, err := chkdb(db.Path)
264 | 	if err != nil {
265 | 		t.Fatal(err)
266 | 	}
267 | 
268 | 	m := NewMain()
269 | 	if err := m.Run("compact", "-o", dstdb.Path, db.Path); err != nil {
270 | 		t.Fatal(err)
271 | 	}
272 | 
273 | 	dbChkAfterCompact, err := chkdb(db.Path)
274 | 	if err != nil {
275 | 		t.Fatal(err)
276 | 	}
277 | 
278 | 	dstdbChk, err := chkdb(dstdb.Path)
279 | 	if err != nil {
280 | 		t.Fatal(err)
281 | 	}
282 | 
283 | 	if !bytes.Equal(dbChk, dbChkAfterCompact) {
284 | 		t.Error("the original db has been touched")
285 | 	}
286 | 	if !bytes.Equal(dbChk, dstdbChk) {
287 | 		t.Error("the compacted db data isn't the same than the original db")
288 | 	}
289 | }
290 | 
291 | func fillBucket(b *bolt.Bucket, prefix []byte) error {
292 | 	n := 10 + rand.Intn(50)
293 | 	for i := 0; i < n; i++ {
294 | 		v := make([]byte, 10*(1+rand.Intn(4)))
295 | 		_, err := crypto.Read(v)
296 | 		if err != nil {
297 | 			return err
298 | 		}
299 | 		k := append(prefix, []byte(fmt.Sprintf("k%d", i))...)
300 | 		if err := b.Put(k, v); err != nil {
301 | 			return err
302 | 		}
303 | 	}
304 | 	// limit depth of subbuckets
305 | 	s := 2 + rand.Intn(4)
306 | 	if len(prefix) > (2*s + 1) {
307 | 		return nil
308 | 	}
309 | 	n = 1 + rand.Intn(3)
310 | 	for i := 0; i < n; i++ {
311 | 		k := append(prefix, []byte(fmt.Sprintf("b%d", i))...)
312 | 		sb, err := b.CreateBucket(k)
313 | 		if err != nil {
314 | 			return err
315 | 		}
316 | 		if err := fillBucket(sb, append(k, '.')); err != nil {
317 | 			return err
318 | 		}
319 | 	}
320 | 	return nil
321 | }
322 | 
323 | func chkdb(path string) ([]byte, error) {
324 | 	db, err := bolt.Open(path, 0666, nil)
325 | 	if err != nil {
326 | 		return nil, err
327 | 	}
328 | 	defer db.Close()
329 | 	var buf bytes.Buffer
330 | 	err = db.View(func(tx *bolt.Tx) error {
331 | 		return tx.ForEach(func(name []byte, b *bolt.Bucket) error {
332 | 			return walkBucket(b, name, nil, &buf)
333 | 		})
334 | 	})
335 | 	if err != nil {
336 | 		return nil, err
337 | 	}
338 | 	return buf.Bytes(), nil
339 | }
340 | 
341 | func walkBucket(parent *bolt.Bucket, k []byte, v []byte, w io.Writer) error {
342 | 	if _, err := fmt.Fprintf(w, "%d:%x=%x\n", parent.Sequence(), k, v); err != nil {
343 | 		return err
344 | 	}
345 | 
346 | 	// not a bucket, exit.
347 | 	if v != nil {
348 | 		return nil
349 | 	}
350 | 	return parent.ForEach(func(k, v []byte) error {
351 | 		if v == nil {
352 | 			return walkBucket(parent.Bucket(k), k, nil, w)
353 | 		}
354 | 		return walkBucket(parent, k, v, w)
355 | 	})
356 | }
357 | 


--------------------------------------------------------------------------------
/cursor.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"sort"
  7 | )
  8 | 
  9 | // Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
 10 | // Cursors see nested buckets with value == nil.
 11 | // Cursors can be obtained from a transaction and are valid as long as the transaction is open.
 12 | //
 13 | // Keys and values returned from the cursor are only valid for the life of the transaction.
 14 | //
 15 | // Changing data while traversing with a cursor may cause it to be invalidated
 16 | // and return unexpected keys and/or values. You must reposition your cursor
 17 | // after mutating data.
 18 | type Cursor struct {
 19 | 	bucket *Bucket
 20 | 	stack  []elemRef
 21 | }
 22 | 
 23 | // Bucket returns the bucket that this cursor was created from.
 24 | func (c *Cursor) Bucket() *Bucket {
 25 | 	return c.bucket
 26 | }
 27 | 
 28 | // First moves the cursor to the first item in the bucket and returns its key and value.
 29 | // If the bucket is empty then a nil key and value are returned.
 30 | // The returned key and value are only valid for the life of the transaction.
 31 | func (c *Cursor) First() (key []byte, value []byte) {
 32 | 	_assert(c.bucket.tx.db != nil, "tx closed")
 33 | 	c.stack = c.stack[:0]
 34 | 	p, n := c.bucket.pageNode(c.bucket.root)
 35 | 	c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
 36 | 	c.first()
 37 | 
 38 | 	// If we land on an empty page then move to the next value.
 39 | 	// https://github.com/boltdb/bolt/issues/450
 40 | 	if c.stack[len(c.stack)-1].count() == 0 {
 41 | 		c.next()
 42 | 	}
 43 | 
 44 | 	k, v, flags := c.keyValue()
 45 | 	if (flags & uint32(bucketLeafFlag)) != 0 {
 46 | 		return k, nil
 47 | 	}
 48 | 	return k, v
 49 | 
 50 | }
 51 | 
 52 | // Last moves the cursor to the last item in the bucket and returns its key and value.
 53 | // If the bucket is empty then a nil key and value are returned.
 54 | // The returned key and value are only valid for the life of the transaction.
 55 | func (c *Cursor) Last() (key []byte, value []byte) {
 56 | 	_assert(c.bucket.tx.db != nil, "tx closed")
 57 | 	c.stack = c.stack[:0]
 58 | 	p, n := c.bucket.pageNode(c.bucket.root)
 59 | 	ref := elemRef{page: p, node: n}
 60 | 	ref.index = ref.count() - 1
 61 | 	c.stack = append(c.stack, ref)
 62 | 	c.last()
 63 | 	k, v, flags := c.keyValue()
 64 | 	if (flags & uint32(bucketLeafFlag)) != 0 {
 65 | 		return k, nil
 66 | 	}
 67 | 	return k, v
 68 | }
 69 | 
 70 | // Next moves the cursor to the next item in the bucket and returns its key and value.
 71 | // If the cursor is at the end of the bucket then a nil key and value are returned.
 72 | // The returned key and value are only valid for the life of the transaction.
 73 | func (c *Cursor) Next() (key []byte, value []byte) {
 74 | 	_assert(c.bucket.tx.db != nil, "tx closed")
 75 | 	k, v, flags := c.next()
 76 | 	if (flags & uint32(bucketLeafFlag)) != 0 {
 77 | 		return k, nil
 78 | 	}
 79 | 	return k, v
 80 | }
 81 | 
 82 | // Prev moves the cursor to the previous item in the bucket and returns its key and value.
 83 | // If the cursor is at the beginning of the bucket then a nil key and value are returned.
 84 | // The returned key and value are only valid for the life of the transaction.
 85 | func (c *Cursor) Prev() (key []byte, value []byte) {
 86 | 	_assert(c.bucket.tx.db != nil, "tx closed")
 87 | 
 88 | 	// Attempt to move back one element until we're successful.
 89 | 	// Move up the stack as we hit the beginning of each page in our stack.
 90 | 	for i := len(c.stack) - 1; i >= 0; i-- {
 91 | 		elem := &c.stack[i]
 92 | 		if elem.index > 0 {
 93 | 			elem.index--
 94 | 			break
 95 | 		}
 96 | 		c.stack = c.stack[:i]
 97 | 	}
 98 | 
 99 | 	// If we've hit the end then return nil.
100 | 	if len(c.stack) == 0 {
101 | 		return nil, nil
102 | 	}
103 | 
104 | 	// Move down the stack to find the last element of the last leaf under this branch.
105 | 	c.last()
106 | 	k, v, flags := c.keyValue()
107 | 	if (flags & uint32(bucketLeafFlag)) != 0 {
108 | 		return k, nil
109 | 	}
110 | 	return k, v
111 | }
112 | 
113 | // Seek moves the cursor to a given key and returns it.
114 | // If the key does not exist then the next key is used. If no keys
115 | // follow, a nil key is returned.
116 | // The returned key and value are only valid for the life of the transaction.
117 | func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
118 | 	k, v, flags := c.seek(seek)
119 | 
120 | 	// If we ended up after the last element of a page then move to the next one.
121 | 	if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
122 | 		k, v, flags = c.next()
123 | 	}
124 | 
125 | 	if k == nil {
126 | 		return nil, nil
127 | 	} else if (flags & uint32(bucketLeafFlag)) != 0 {
128 | 		return k, nil
129 | 	}
130 | 	return k, v
131 | }
132 | 
133 | // Delete removes the current key/value under the cursor from the bucket.
134 | // Delete fails if current key/value is a bucket or if the transaction is not writable.
135 | func (c *Cursor) Delete() error {
136 | 	if c.bucket.tx.db == nil {
137 | 		return ErrTxClosed
138 | 	} else if !c.bucket.Writable() {
139 | 		return ErrTxNotWritable
140 | 	}
141 | 
142 | 	key, _, flags := c.keyValue()
143 | 	// Return an error if current value is a bucket.
144 | 	if (flags & bucketLeafFlag) != 0 {
145 | 		return ErrIncompatibleValue
146 | 	}
147 | 	c.node().del(key)
148 | 
149 | 	return nil
150 | }
151 | 
152 | // seek moves the cursor to a given key and returns it.
153 | // If the key does not exist then the next key is used.
154 | func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
155 | 	_assert(c.bucket.tx.db != nil, "tx closed")
156 | 
157 | 	// Start from root page/node and traverse to correct page.
158 | 	c.stack = c.stack[:0]
159 | 	c.search(seek, c.bucket.root)
160 | 	ref := &c.stack[len(c.stack)-1]
161 | 
162 | 	// If the cursor is pointing to the end of page/node then return nil.
163 | 	if ref.index >= ref.count() {
164 | 		return nil, nil, 0
165 | 	}
166 | 
167 | 	// If this is a bucket then return a nil value.
168 | 	return c.keyValue()
169 | }
170 | 
171 | // first moves the cursor to the first leaf element under the last page in the stack.
172 | func (c *Cursor) first() {
173 | 	for {
174 | 		// Exit when we hit a leaf page.
175 | 		var ref = &c.stack[len(c.stack)-1]
176 | 		if ref.isLeaf() {
177 | 			break
178 | 		}
179 | 
180 | 		// Keep adding pages pointing to the first element to the stack.
181 | 		var pgid pgid
182 | 		if ref.node != nil {
183 | 			pgid = ref.node.inodes[ref.index].pgid
184 | 		} else {
185 | 			pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
186 | 		}
187 | 		p, n := c.bucket.pageNode(pgid)
188 | 		c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
189 | 	}
190 | }
191 | 
192 | // last moves the cursor to the last leaf element under the last page in the stack.
193 | func (c *Cursor) last() {
194 | 	for {
195 | 		// Exit when we hit a leaf page.
196 | 		ref := &c.stack[len(c.stack)-1]
197 | 		if ref.isLeaf() {
198 | 			break
199 | 		}
200 | 
201 | 		// Keep adding pages pointing to the last element in the stack.
202 | 		var pgid pgid
203 | 		if ref.node != nil {
204 | 			pgid = ref.node.inodes[ref.index].pgid
205 | 		} else {
206 | 			pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
207 | 		}
208 | 		p, n := c.bucket.pageNode(pgid)
209 | 
210 | 		var nextRef = elemRef{page: p, node: n}
211 | 		nextRef.index = nextRef.count() - 1
212 | 		c.stack = append(c.stack, nextRef)
213 | 	}
214 | }
215 | 
216 | // next moves to the next leaf element and returns the key and value.
217 | // If the cursor is at the last leaf element then it stays there and returns nil.
218 | func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
219 | 	for {
220 | 		// Attempt to move over one element until we're successful.
221 | 		// Move up the stack as we hit the end of each page in our stack.
222 | 		var i int
223 | 		for i = len(c.stack) - 1; i >= 0; i-- {
224 | 			elem := &c.stack[i]
225 | 			if elem.index < elem.count()-1 {
226 | 				elem.index++
227 | 				break
228 | 			}
229 | 		}
230 | 
231 | 		// If we've hit the root page then stop and return. This will leave the
232 | 		// cursor on the last element of the last page.
233 | 		if i == -1 {
234 | 			return nil, nil, 0
235 | 		}
236 | 
237 | 		// Otherwise start from where we left off in the stack and find the
238 | 		// first element of the first leaf page.
239 | 		c.stack = c.stack[:i+1]
240 | 		c.first()
241 | 
242 | 		// If this is an empty page then restart and move back up the stack.
243 | 		// https://github.com/boltdb/bolt/issues/450
244 | 		if c.stack[len(c.stack)-1].count() == 0 {
245 | 			continue
246 | 		}
247 | 
248 | 		return c.keyValue()
249 | 	}
250 | }
251 | 
252 | // search recursively performs a binary search against a given page/node until it finds a given key.
253 | func (c *Cursor) search(key []byte, pgid pgid) {
254 | 	p, n := c.bucket.pageNode(pgid)
255 | 	if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
256 | 		panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
257 | 	}
258 | 	e := elemRef{page: p, node: n}
259 | 	c.stack = append(c.stack, e)
260 | 
261 | 	// If we're on a leaf page/node then find the specific node.
262 | 	if e.isLeaf() {
263 | 		c.nsearch(key)
264 | 		return
265 | 	}
266 | 
267 | 	if n != nil {
268 | 		c.searchNode(key, n)
269 | 		return
270 | 	}
271 | 	c.searchPage(key, p)
272 | }
273 | 
274 | func (c *Cursor) searchNode(key []byte, n *node) {
275 | 	var exact bool
276 | 	index := sort.Search(len(n.inodes), func(i int) bool {
277 | 		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
278 | 		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
279 | 		ret := bytes.Compare(n.inodes[i].key, key)
280 | 		if ret == 0 {
281 | 			exact = true
282 | 		}
283 | 		return ret != -1
284 | 	})
285 | 	if !exact && index > 0 {
286 | 		index--
287 | 	}
288 | 	c.stack[len(c.stack)-1].index = index
289 | 
290 | 	// Recursively search to the next page.
291 | 	c.search(key, n.inodes[index].pgid)
292 | }
293 | 
294 | func (c *Cursor) searchPage(key []byte, p *page) {
295 | 	// Binary search for the correct range.
296 | 	inodes := p.branchPageElements()
297 | 
298 | 	var exact bool
299 | 	index := sort.Search(int(p.count), func(i int) bool {
300 | 		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
301 | 		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
302 | 		ret := bytes.Compare(inodes[i].key(), key)
303 | 		if ret == 0 {
304 | 			exact = true
305 | 		}
306 | 		return ret != -1
307 | 	})
308 | 	if !exact && index > 0 {
309 | 		index--
310 | 	}
311 | 	c.stack[len(c.stack)-1].index = index
312 | 
313 | 	// Recursively search to the next page.
314 | 	c.search(key, inodes[index].pgid)
315 | }
316 | 
317 | // nsearch searches the leaf node on the top of the stack for a key.
318 | func (c *Cursor) nsearch(key []byte) {
319 | 	e := &c.stack[len(c.stack)-1]
320 | 	p, n := e.page, e.node
321 | 
322 | 	// If we have a node then search its inodes.
323 | 	if n != nil {
324 | 		index := sort.Search(len(n.inodes), func(i int) bool {
325 | 			return bytes.Compare(n.inodes[i].key, key) != -1
326 | 		})
327 | 		e.index = index
328 | 		return
329 | 	}
330 | 
331 | 	// If we have a page then search its leaf elements.
332 | 	inodes := p.leafPageElements()
333 | 	index := sort.Search(int(p.count), func(i int) bool {
334 | 		return bytes.Compare(inodes[i].key(), key) != -1
335 | 	})
336 | 	e.index = index
337 | }
338 | 
339 | // keyValue returns the key and value of the current leaf element.
340 | func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
341 | 	ref := &c.stack[len(c.stack)-1]
342 | 	if ref.count() == 0 || ref.index >= ref.count() {
343 | 		return nil, nil, 0
344 | 	}
345 | 
346 | 	// Retrieve value from node.
347 | 	if ref.node != nil {
348 | 		inode := &ref.node.inodes[ref.index]
349 | 		return inode.key, inode.value, inode.flags
350 | 	}
351 | 
352 | 	// Or retrieve value from page.
353 | 	elem := ref.page.leafPageElement(uint16(ref.index))
354 | 	return elem.key(), elem.value(), elem.flags
355 | }
356 | 
357 | // node returns the node that the cursor is currently positioned on.
358 | func (c *Cursor) node() *node {
359 | 	_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
360 | 
361 | 	// If the top of the stack is a leaf node then just return it.
362 | 	if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
363 | 		return ref.node
364 | 	}
365 | 
366 | 	// Start from root and traverse down the hierarchy.
367 | 	var n = c.stack[0].node
368 | 	if n == nil {
369 | 		n = c.bucket.node(c.stack[0].page.id, nil)
370 | 	}
371 | 	for _, ref := range c.stack[:len(c.stack)-1] {
372 | 		_assert(!n.isLeaf, "expected branch node")
373 | 		n = n.childAt(int(ref.index))
374 | 	}
375 | 	_assert(n.isLeaf, "expected leaf node")
376 | 	return n
377 | }
378 | 
379 | // elemRef represents a reference to an element on a given page/node.
380 | type elemRef struct {
381 | 	page  *page
382 | 	node  *node
383 | 	index int
384 | }
385 | 
386 | // isLeaf returns whether the ref is pointing at a leaf page/node.
387 | func (r *elemRef) isLeaf() bool {
388 | 	if r.node != nil {
389 | 		return r.node.isLeaf
390 | 	}
391 | 	return (r.page.flags & leafPageFlag) != 0
392 | }
393 | 
394 | // count returns the number of inodes or page elements.
395 | func (r *elemRef) count() int {
396 | 	if r.node != nil {
397 | 		return len(r.node.inodes)
398 | 	}
399 | 	return int(r.page.count)
400 | }
401 | 


--------------------------------------------------------------------------------
/cursor_test.go:
--------------------------------------------------------------------------------
  1 | package bolt_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"os"
  9 | 	"reflect"
 10 | 	"sort"
 11 | 	"testing"
 12 | 	"testing/quick"
 13 | 
 14 | 	"github.com/boltdb/bolt"
 15 | )
 16 | 
 17 | // Ensure that a cursor can return a reference to the bucket that created it.
 18 | func TestCursor_Bucket(t *testing.T) {
 19 | 	db := MustOpenDB()
 20 | 	defer db.MustClose()
 21 | 	if err := db.Update(func(tx *bolt.Tx) error {
 22 | 		b, err := tx.CreateBucket([]byte("widgets"))
 23 | 		if err != nil {
 24 | 			t.Fatal(err)
 25 | 		}
 26 | 		if cb := b.Cursor().Bucket(); !reflect.DeepEqual(cb, b) {
 27 | 			t.Fatal("cursor bucket mismatch")
 28 | 		}
 29 | 		return nil
 30 | 	}); err != nil {
 31 | 		t.Fatal(err)
 32 | 	}
 33 | }
 34 | 
 35 | // Ensure that a Tx cursor can seek to the appropriate keys.
 36 | func TestCursor_Seek(t *testing.T) {
 37 | 	db := MustOpenDB()
 38 | 	defer db.MustClose()
 39 | 	if err := db.Update(func(tx *bolt.Tx) error {
 40 | 		b, err := tx.CreateBucket([]byte("widgets"))
 41 | 		if err != nil {
 42 | 			t.Fatal(err)
 43 | 		}
 44 | 		if err := b.Put([]byte("foo"), []byte("0001")); err != nil {
 45 | 			t.Fatal(err)
 46 | 		}
 47 | 		if err := b.Put([]byte("bar"), []byte("0002")); err != nil {
 48 | 			t.Fatal(err)
 49 | 		}
 50 | 		if err := b.Put([]byte("baz"), []byte("0003")); err != nil {
 51 | 			t.Fatal(err)
 52 | 		}
 53 | 
 54 | 		if _, err := b.CreateBucket([]byte("bkt")); err != nil {
 55 | 			t.Fatal(err)
 56 | 		}
 57 | 		return nil
 58 | 	}); err != nil {
 59 | 		t.Fatal(err)
 60 | 	}
 61 | 
 62 | 	if err := db.View(func(tx *bolt.Tx) error {
 63 | 		c := tx.Bucket([]byte("widgets")).Cursor()
 64 | 
 65 | 		// Exact match should go to the key.
 66 | 		if k, v := c.Seek([]byte("bar")); !bytes.Equal(k, []byte("bar")) {
 67 | 			t.Fatalf("unexpected key: %v", k)
 68 | 		} else if !bytes.Equal(v, []byte("0002")) {
 69 | 			t.Fatalf("unexpected value: %v", v)
 70 | 		}
 71 | 
 72 | 		// Inexact match should go to the next key.
 73 | 		if k, v := c.Seek([]byte("bas")); !bytes.Equal(k, []byte("baz")) {
 74 | 			t.Fatalf("unexpected key: %v", k)
 75 | 		} else if !bytes.Equal(v, []byte("0003")) {
 76 | 			t.Fatalf("unexpected value: %v", v)
 77 | 		}
 78 | 
 79 | 		// Low key should go to the first key.
 80 | 		if k, v := c.Seek([]byte("")); !bytes.Equal(k, []byte("bar")) {
 81 | 			t.Fatalf("unexpected key: %v", k)
 82 | 		} else if !bytes.Equal(v, []byte("0002")) {
 83 | 			t.Fatalf("unexpected value: %v", v)
 84 | 		}
 85 | 
 86 | 		// High key should return no key.
 87 | 		if k, v := c.Seek([]byte("zzz")); k != nil {
 88 | 			t.Fatalf("expected nil key: %v", k)
 89 | 		} else if v != nil {
 90 | 			t.Fatalf("expected nil value: %v", v)
 91 | 		}
 92 | 
 93 | 		// Buckets should return their key but no value.
 94 | 		if k, v := c.Seek([]byte("bkt")); !bytes.Equal(k, []byte("bkt")) {
 95 | 			t.Fatalf("unexpected key: %v", k)
 96 | 		} else if v != nil {
 97 | 			t.Fatalf("expected nil value: %v", v)
 98 | 		}
 99 | 
100 | 		return nil
101 | 	}); err != nil {
102 | 		t.Fatal(err)
103 | 	}
104 | }
105 | 
106 | func TestCursor_Delete(t *testing.T) {
107 | 	db := MustOpenDB()
108 | 	defer db.MustClose()
109 | 
110 | 	const count = 1000
111 | 
112 | 	// Insert every other key between 0 and $count.
113 | 	if err := db.Update(func(tx *bolt.Tx) error {
114 | 		b, err := tx.CreateBucket([]byte("widgets"))
115 | 		if err != nil {
116 | 			t.Fatal(err)
117 | 		}
118 | 		for i := 0; i < count; i += 1 {
119 | 			k := make([]byte, 8)
120 | 			binary.BigEndian.PutUint64(k, uint64(i))
121 | 			if err := b.Put(k, make([]byte, 100)); err != nil {
122 | 				t.Fatal(err)
123 | 			}
124 | 		}
125 | 		if _, err := b.CreateBucket([]byte("sub")); err != nil {
126 | 			t.Fatal(err)
127 | 		}
128 | 		return nil
129 | 	}); err != nil {
130 | 		t.Fatal(err)
131 | 	}
132 | 
133 | 	if err := db.Update(func(tx *bolt.Tx) error {
134 | 		c := tx.Bucket([]byte("widgets")).Cursor()
135 | 		bound := make([]byte, 8)
136 | 		binary.BigEndian.PutUint64(bound, uint64(count/2))
137 | 		for key, _ := c.First(); bytes.Compare(key, bound) < 0; key, _ = c.Next() {
138 | 			if err := c.Delete(); err != nil {
139 | 				t.Fatal(err)
140 | 			}
141 | 		}
142 | 
143 | 		c.Seek([]byte("sub"))
144 | 		if err := c.Delete(); err != bolt.ErrIncompatibleValue {
145 | 			t.Fatalf("unexpected error: %s", err)
146 | 		}
147 | 
148 | 		return nil
149 | 	}); err != nil {
150 | 		t.Fatal(err)
151 | 	}
152 | 
153 | 	if err := db.View(func(tx *bolt.Tx) error {
154 | 		stats := tx.Bucket([]byte("widgets")).Stats()
155 | 		if stats.KeyN != count/2+1 {
156 | 			t.Fatalf("unexpected KeyN: %d", stats.KeyN)
157 | 		}
158 | 		return nil
159 | 	}); err != nil {
160 | 		t.Fatal(err)
161 | 	}
162 | }
163 | 
164 | // Ensure that a Tx cursor can seek to the appropriate keys when there are a
165 | // large number of keys. This test also checks that seek will always move
166 | // forward to the next key.
167 | //
168 | // Related: https://github.com/boltdb/bolt/pull/187
169 | func TestCursor_Seek_Large(t *testing.T) {
170 | 	db := MustOpenDB()
171 | 	defer db.MustClose()
172 | 
173 | 	var count = 10000
174 | 
175 | 	// Insert every other key between 0 and $count.
176 | 	if err := db.Update(func(tx *bolt.Tx) error {
177 | 		b, err := tx.CreateBucket([]byte("widgets"))
178 | 		if err != nil {
179 | 			t.Fatal(err)
180 | 		}
181 | 
182 | 		for i := 0; i < count; i += 100 {
183 | 			for j := i; j < i+100; j += 2 {
184 | 				k := make([]byte, 8)
185 | 				binary.BigEndian.PutUint64(k, uint64(j))
186 | 				if err := b.Put(k, make([]byte, 100)); err != nil {
187 | 					t.Fatal(err)
188 | 				}
189 | 			}
190 | 		}
191 | 		return nil
192 | 	}); err != nil {
193 | 		t.Fatal(err)
194 | 	}
195 | 
196 | 	if err := db.View(func(tx *bolt.Tx) error {
197 | 		c := tx.Bucket([]byte("widgets")).Cursor()
198 | 		for i := 0; i < count; i++ {
199 | 			seek := make([]byte, 8)
200 | 			binary.BigEndian.PutUint64(seek, uint64(i))
201 | 
202 | 			k, _ := c.Seek(seek)
203 | 
204 | 			// The last seek is beyond the end of the the range so
205 | 			// it should return nil.
206 | 			if i == count-1 {
207 | 				if k != nil {
208 | 					t.Fatal("expected nil key")
209 | 				}
210 | 				continue
211 | 			}
212 | 
213 | 			// Otherwise we should seek to the exact key or the next key.
214 | 			num := binary.BigEndian.Uint64(k)
215 | 			if i%2 == 0 {
216 | 				if num != uint64(i) {
217 | 					t.Fatalf("unexpected num: %d", num)
218 | 				}
219 | 			} else {
220 | 				if num != uint64(i+1) {
221 | 					t.Fatalf("unexpected num: %d", num)
222 | 				}
223 | 			}
224 | 		}
225 | 
226 | 		return nil
227 | 	}); err != nil {
228 | 		t.Fatal(err)
229 | 	}
230 | }
231 | 
232 | // Ensure that a cursor can iterate over an empty bucket without error.
233 | func TestCursor_EmptyBucket(t *testing.T) {
234 | 	db := MustOpenDB()
235 | 	defer db.MustClose()
236 | 	if err := db.Update(func(tx *bolt.Tx) error {
237 | 		_, err := tx.CreateBucket([]byte("widgets"))
238 | 		return err
239 | 	}); err != nil {
240 | 		t.Fatal(err)
241 | 	}
242 | 
243 | 	if err := db.View(func(tx *bolt.Tx) error {
244 | 		c := tx.Bucket([]byte("widgets")).Cursor()
245 | 		k, v := c.First()
246 | 		if k != nil {
247 | 			t.Fatalf("unexpected key: %v", k)
248 | 		} else if v != nil {
249 | 			t.Fatalf("unexpected value: %v", v)
250 | 		}
251 | 		return nil
252 | 	}); err != nil {
253 | 		t.Fatal(err)
254 | 	}
255 | }
256 | 
257 | // Ensure that a Tx cursor can reverse iterate over an empty bucket without error.
258 | func TestCursor_EmptyBucketReverse(t *testing.T) {
259 | 	db := MustOpenDB()
260 | 	defer db.MustClose()
261 | 
262 | 	if err := db.Update(func(tx *bolt.Tx) error {
263 | 		_, err := tx.CreateBucket([]byte("widgets"))
264 | 		return err
265 | 	}); err != nil {
266 | 		t.Fatal(err)
267 | 	}
268 | 	if err := db.View(func(tx *bolt.Tx) error {
269 | 		c := tx.Bucket([]byte("widgets")).Cursor()
270 | 		k, v := c.Last()
271 | 		if k != nil {
272 | 			t.Fatalf("unexpected key: %v", k)
273 | 		} else if v != nil {
274 | 			t.Fatalf("unexpected value: %v", v)
275 | 		}
276 | 		return nil
277 | 	}); err != nil {
278 | 		t.Fatal(err)
279 | 	}
280 | }
281 | 
282 | // Ensure that a Tx cursor can iterate over a single root with a couple elements.
283 | func TestCursor_Iterate_Leaf(t *testing.T) {
284 | 	db := MustOpenDB()
285 | 	defer db.MustClose()
286 | 
287 | 	if err := db.Update(func(tx *bolt.Tx) error {
288 | 		b, err := tx.CreateBucket([]byte("widgets"))
289 | 		if err != nil {
290 | 			t.Fatal(err)
291 | 		}
292 | 		if err := b.Put([]byte("baz"), []byte{}); err != nil {
293 | 			t.Fatal(err)
294 | 		}
295 | 		if err := b.Put([]byte("foo"), []byte{0}); err != nil {
296 | 			t.Fatal(err)
297 | 		}
298 | 		if err := b.Put([]byte("bar"), []byte{1}); err != nil {
299 | 			t.Fatal(err)
300 | 		}
301 | 		return nil
302 | 	}); err != nil {
303 | 		t.Fatal(err)
304 | 	}
305 | 	tx, err := db.Begin(false)
306 | 	if err != nil {
307 | 		t.Fatal(err)
308 | 	}
309 | 	defer func() { _ = tx.Rollback() }()
310 | 
311 | 	c := tx.Bucket([]byte("widgets")).Cursor()
312 | 
313 | 	k, v := c.First()
314 | 	if !bytes.Equal(k, []byte("bar")) {
315 | 		t.Fatalf("unexpected key: %v", k)
316 | 	} else if !bytes.Equal(v, []byte{1}) {
317 | 		t.Fatalf("unexpected value: %v", v)
318 | 	}
319 | 
320 | 	k, v = c.Next()
321 | 	if !bytes.Equal(k, []byte("baz")) {
322 | 		t.Fatalf("unexpected key: %v", k)
323 | 	} else if !bytes.Equal(v, []byte{}) {
324 | 		t.Fatalf("unexpected value: %v", v)
325 | 	}
326 | 
327 | 	k, v = c.Next()
328 | 	if !bytes.Equal(k, []byte("foo")) {
329 | 		t.Fatalf("unexpected key: %v", k)
330 | 	} else if !bytes.Equal(v, []byte{0}) {
331 | 		t.Fatalf("unexpected value: %v", v)
332 | 	}
333 | 
334 | 	k, v = c.Next()
335 | 	if k != nil {
336 | 		t.Fatalf("expected nil key: %v", k)
337 | 	} else if v != nil {
338 | 		t.Fatalf("expected nil value: %v", v)
339 | 	}
340 | 
341 | 	k, v = c.Next()
342 | 	if k != nil {
343 | 		t.Fatalf("expected nil key: %v", k)
344 | 	} else if v != nil {
345 | 		t.Fatalf("expected nil value: %v", v)
346 | 	}
347 | 
348 | 	if err := tx.Rollback(); err != nil {
349 | 		t.Fatal(err)
350 | 	}
351 | }
352 | 
353 | // Ensure that a Tx cursor can iterate in reverse over a single root with a couple elements.
354 | func TestCursor_LeafRootReverse(t *testing.T) {
355 | 	db := MustOpenDB()
356 | 	defer db.MustClose()
357 | 
358 | 	if err := db.Update(func(tx *bolt.Tx) error {
359 | 		b, err := tx.CreateBucket([]byte("widgets"))
360 | 		if err != nil {
361 | 			t.Fatal(err)
362 | 		}
363 | 		if err := b.Put([]byte("baz"), []byte{}); err != nil {
364 | 			t.Fatal(err)
365 | 		}
366 | 		if err := b.Put([]byte("foo"), []byte{0}); err != nil {
367 | 			t.Fatal(err)
368 | 		}
369 | 		if err := b.Put([]byte("bar"), []byte{1}); err != nil {
370 | 			t.Fatal(err)
371 | 		}
372 | 		return nil
373 | 	}); err != nil {
374 | 		t.Fatal(err)
375 | 	}
376 | 	tx, err := db.Begin(false)
377 | 	if err != nil {
378 | 		t.Fatal(err)
379 | 	}
380 | 	c := tx.Bucket([]byte("widgets")).Cursor()
381 | 
382 | 	if k, v := c.Last(); !bytes.Equal(k, []byte("foo")) {
383 | 		t.Fatalf("unexpected key: %v", k)
384 | 	} else if !bytes.Equal(v, []byte{0}) {
385 | 		t.Fatalf("unexpected value: %v", v)
386 | 	}
387 | 
388 | 	if k, v := c.Prev(); !bytes.Equal(k, []byte("baz")) {
389 | 		t.Fatalf("unexpected key: %v", k)
390 | 	} else if !bytes.Equal(v, []byte{}) {
391 | 		t.Fatalf("unexpected value: %v", v)
392 | 	}
393 | 
394 | 	if k, v := c.Prev(); !bytes.Equal(k, []byte("bar")) {
395 | 		t.Fatalf("unexpected key: %v", k)
396 | 	} else if !bytes.Equal(v, []byte{1}) {
397 | 		t.Fatalf("unexpected value: %v", v)
398 | 	}
399 | 
400 | 	if k, v := c.Prev(); k != nil {
401 | 		t.Fatalf("expected nil key: %v", k)
402 | 	} else if v != nil {
403 | 		t.Fatalf("expected nil value: %v", v)
404 | 	}
405 | 
406 | 	if k, v := c.Prev(); k != nil {
407 | 		t.Fatalf("expected nil key: %v", k)
408 | 	} else if v != nil {
409 | 		t.Fatalf("expected nil value: %v", v)
410 | 	}
411 | 
412 | 	if err := tx.Rollback(); err != nil {
413 | 		t.Fatal(err)
414 | 	}
415 | }
416 | 
417 | // Ensure that a Tx cursor can restart from the beginning.
418 | func TestCursor_Restart(t *testing.T) {
419 | 	db := MustOpenDB()
420 | 	defer db.MustClose()
421 | 
422 | 	if err := db.Update(func(tx *bolt.Tx) error {
423 | 		b, err := tx.CreateBucket([]byte("widgets"))
424 | 		if err != nil {
425 | 			t.Fatal(err)
426 | 		}
427 | 		if err := b.Put([]byte("bar"), []byte{}); err != nil {
428 | 			t.Fatal(err)
429 | 		}
430 | 		if err := b.Put([]byte("foo"), []byte{}); err != nil {
431 | 			t.Fatal(err)
432 | 		}
433 | 		return nil
434 | 	}); err != nil {
435 | 		t.Fatal(err)
436 | 	}
437 | 
438 | 	tx, err := db.Begin(false)
439 | 	if err != nil {
440 | 		t.Fatal(err)
441 | 	}
442 | 	c := tx.Bucket([]byte("widgets")).Cursor()
443 | 
444 | 	if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) {
445 | 		t.Fatalf("unexpected key: %v", k)
446 | 	}
447 | 	if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) {
448 | 		t.Fatalf("unexpected key: %v", k)
449 | 	}
450 | 
451 | 	if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) {
452 | 		t.Fatalf("unexpected key: %v", k)
453 | 	}
454 | 	if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) {
455 | 		t.Fatalf("unexpected key: %v", k)
456 | 	}
457 | 
458 | 	if err := tx.Rollback(); err != nil {
459 | 		t.Fatal(err)
460 | 	}
461 | }
462 | 
463 | // Ensure that a cursor can skip over empty pages that have been deleted.
464 | func TestCursor_First_EmptyPages(t *testing.T) {
465 | 	db := MustOpenDB()
466 | 	defer db.MustClose()
467 | 
468 | 	// Create 1000 keys in the "widgets" bucket.
469 | 	if err := db.Update(func(tx *bolt.Tx) error {
470 | 		b, err := tx.CreateBucket([]byte("widgets"))
471 | 		if err != nil {
472 | 			t.Fatal(err)
473 | 		}
474 | 
475 | 		for i := 0; i < 1000; i++ {
476 | 			if err := b.Put(u64tob(uint64(i)), []byte{}); err != nil {
477 | 				t.Fatal(err)
478 | 			}
479 | 		}
480 | 
481 | 		return nil
482 | 	}); err != nil {
483 | 		t.Fatal(err)
484 | 	}
485 | 
486 | 	// Delete half the keys and then try to iterate.
487 | 	if err := db.Update(func(tx *bolt.Tx) error {
488 | 		b := tx.Bucket([]byte("widgets"))
489 | 		for i := 0; i < 600; i++ {
490 | 			if err := b.Delete(u64tob(uint64(i))); err != nil {
491 | 				t.Fatal(err)
492 | 			}
493 | 		}
494 | 
495 | 		c := b.Cursor()
496 | 		var n int
497 | 		for k, _ := c.First(); k != nil; k, _ = c.Next() {
498 | 			n++
499 | 		}
500 | 		if n != 400 {
501 | 			t.Fatalf("unexpected key count: %d", n)
502 | 		}
503 | 
504 | 		return nil
505 | 	}); err != nil {
506 | 		t.Fatal(err)
507 | 	}
508 | }
509 | 
510 | // Ensure that a Tx can iterate over all elements in a bucket.
511 | func TestCursor_QuickCheck(t *testing.T) {
512 | 	f := func(items testdata) bool {
513 | 		db := MustOpenDB()
514 | 		defer db.MustClose()
515 | 
516 | 		// Bulk insert all values.
517 | 		tx, err := db.Begin(true)
518 | 		if err != nil {
519 | 			t.Fatal(err)
520 | 		}
521 | 		b, err := tx.CreateBucket([]byte("widgets"))
522 | 		if err != nil {
523 | 			t.Fatal(err)
524 | 		}
525 | 		for _, item := range items {
526 | 			if err := b.Put(item.Key, item.Value); err != nil {
527 | 				t.Fatal(err)
528 | 			}
529 | 		}
530 | 		if err := tx.Commit(); err != nil {
531 | 			t.Fatal(err)
532 | 		}
533 | 
534 | 		// Sort test data.
535 | 		sort.Sort(items)
536 | 
537 | 		// Iterate over all items and check consistency.
538 | 		var index = 0
539 | 		tx, err = db.Begin(false)
540 | 		if err != nil {
541 | 			t.Fatal(err)
542 | 		}
543 | 
544 | 		c := tx.Bucket([]byte("widgets")).Cursor()
545 | 		for k, v := c.First(); k != nil && index < len(items); k, v = c.Next() {
546 | 			if !bytes.Equal(k, items[index].Key) {
547 | 				t.Fatalf("unexpected key: %v", k)
548 | 			} else if !bytes.Equal(v, items[index].Value) {
549 | 				t.Fatalf("unexpected value: %v", v)
550 | 			}
551 | 			index++
552 | 		}
553 | 		if len(items) != index {
554 | 			t.Fatalf("unexpected item count: %v, expected %v", len(items), index)
555 | 		}
556 | 
557 | 		if err := tx.Rollback(); err != nil {
558 | 			t.Fatal(err)
559 | 		}
560 | 
561 | 		return true
562 | 	}
563 | 	if err := quick.Check(f, qconfig()); err != nil {
564 | 		t.Error(err)
565 | 	}
566 | }
567 | 
568 | // Ensure that a transaction can iterate over all elements in a bucket in reverse.
569 | func TestCursor_QuickCheck_Reverse(t *testing.T) {
570 | 	f := func(items testdata) bool {
571 | 		db := MustOpenDB()
572 | 		defer db.MustClose()
573 | 
574 | 		// Bulk insert all values.
575 | 		tx, err := db.Begin(true)
576 | 		if err != nil {
577 | 			t.Fatal(err)
578 | 		}
579 | 		b, err := tx.CreateBucket([]byte("widgets"))
580 | 		if err != nil {
581 | 			t.Fatal(err)
582 | 		}
583 | 		for _, item := range items {
584 | 			if err := b.Put(item.Key, item.Value); err != nil {
585 | 				t.Fatal(err)
586 | 			}
587 | 		}
588 | 		if err := tx.Commit(); err != nil {
589 | 			t.Fatal(err)
590 | 		}
591 | 
592 | 		// Sort test data.
593 | 		sort.Sort(revtestdata(items))
594 | 
595 | 		// Iterate over all items and check consistency.
596 | 		var index = 0
597 | 		tx, err = db.Begin(false)
598 | 		if err != nil {
599 | 			t.Fatal(err)
600 | 		}
601 | 		c := tx.Bucket([]byte("widgets")).Cursor()
602 | 		for k, v := c.Last(); k != nil && index < len(items); k, v = c.Prev() {
603 | 			if !bytes.Equal(k, items[index].Key) {
604 | 				t.Fatalf("unexpected key: %v", k)
605 | 			} else if !bytes.Equal(v, items[index].Value) {
606 | 				t.Fatalf("unexpected value: %v", v)
607 | 			}
608 | 			index++
609 | 		}
610 | 		if len(items) != index {
611 | 			t.Fatalf("unexpected item count: %v, expected %v", len(items), index)
612 | 		}
613 | 
614 | 		if err := tx.Rollback(); err != nil {
615 | 			t.Fatal(err)
616 | 		}
617 | 
618 | 		return true
619 | 	}
620 | 	if err := quick.Check(f, qconfig()); err != nil {
621 | 		t.Error(err)
622 | 	}
623 | }
624 | 
625 | // Ensure that a Tx cursor can iterate over subbuckets.
626 | func TestCursor_QuickCheck_BucketsOnly(t *testing.T) {
627 | 	db := MustOpenDB()
628 | 	defer db.MustClose()
629 | 
630 | 	if err := db.Update(func(tx *bolt.Tx) error {
631 | 		b, err := tx.CreateBucket([]byte("widgets"))
632 | 		if err != nil {
633 | 			t.Fatal(err)
634 | 		}
635 | 		if _, err := b.CreateBucket([]byte("foo")); err != nil {
636 | 			t.Fatal(err)
637 | 		}
638 | 		if _, err := b.CreateBucket([]byte("bar")); err != nil {
639 | 			t.Fatal(err)
640 | 		}
641 | 		if _, err := b.CreateBucket([]byte("baz")); err != nil {
642 | 			t.Fatal(err)
643 | 		}
644 | 		return nil
645 | 	}); err != nil {
646 | 		t.Fatal(err)
647 | 	}
648 | 
649 | 	if err := db.View(func(tx *bolt.Tx) error {
650 | 		var names []string
651 | 		c := tx.Bucket([]byte("widgets")).Cursor()
652 | 		for k, v := c.First(); k != nil; k, v = c.Next() {
653 | 			names = append(names, string(k))
654 | 			if v != nil {
655 | 				t.Fatalf("unexpected value: %v", v)
656 | 			}
657 | 		}
658 | 		if !reflect.DeepEqual(names, []string{"bar", "baz", "foo"}) {
659 | 			t.Fatalf("unexpected names: %+v", names)
660 | 		}
661 | 		return nil
662 | 	}); err != nil {
663 | 		t.Fatal(err)
664 | 	}
665 | }
666 | 
667 | // Ensure that a Tx cursor can reverse iterate over subbuckets.
668 | func TestCursor_QuickCheck_BucketsOnly_Reverse(t *testing.T) {
669 | 	db := MustOpenDB()
670 | 	defer db.MustClose()
671 | 
672 | 	if err := db.Update(func(tx *bolt.Tx) error {
673 | 		b, err := tx.CreateBucket([]byte("widgets"))
674 | 		if err != nil {
675 | 			t.Fatal(err)
676 | 		}
677 | 		if _, err := b.CreateBucket([]byte("foo")); err != nil {
678 | 			t.Fatal(err)
679 | 		}
680 | 		if _, err := b.CreateBucket([]byte("bar")); err != nil {
681 | 			t.Fatal(err)
682 | 		}
683 | 		if _, err := b.CreateBucket([]byte("baz")); err != nil {
684 | 			t.Fatal(err)
685 | 		}
686 | 		return nil
687 | 	}); err != nil {
688 | 		t.Fatal(err)
689 | 	}
690 | 
691 | 	if err := db.View(func(tx *bolt.Tx) error {
692 | 		var names []string
693 | 		c := tx.Bucket([]byte("widgets")).Cursor()
694 | 		for k, v := c.Last(); k != nil; k, v = c.Prev() {
695 | 			names = append(names, string(k))
696 | 			if v != nil {
697 | 				t.Fatalf("unexpected value: %v", v)
698 | 			}
699 | 		}
700 | 		if !reflect.DeepEqual(names, []string{"foo", "baz", "bar"}) {
701 | 			t.Fatalf("unexpected names: %+v", names)
702 | 		}
703 | 		return nil
704 | 	}); err != nil {
705 | 		t.Fatal(err)
706 | 	}
707 | }
708 | 
709 | func ExampleCursor() {
710 | 	// Open the database.
711 | 	db, err := bolt.Open(tempfile(), 0666, nil)
712 | 	if err != nil {
713 | 		log.Fatal(err)
714 | 	}
715 | 	defer os.Remove(db.Path())
716 | 
717 | 	// Start a read-write transaction.
718 | 	if err := db.Update(func(tx *bolt.Tx) error {
719 | 		// Create a new bucket.
720 | 		b, err := tx.CreateBucket([]byte("animals"))
721 | 		if err != nil {
722 | 			return err
723 | 		}
724 | 
725 | 		// Insert data into a bucket.
726 | 		if err := b.Put([]byte("dog"), []byte("fun")); err != nil {
727 | 			log.Fatal(err)
728 | 		}
729 | 		if err := b.Put([]byte("cat"), []byte("lame")); err != nil {
730 | 			log.Fatal(err)
731 | 		}
732 | 		if err := b.Put([]byte("liger"), []byte("awesome")); err != nil {
733 | 			log.Fatal(err)
734 | 		}
735 | 
736 | 		// Create a cursor for iteration.
737 | 		c := b.Cursor()
738 | 
739 | 		// Iterate over items in sorted key order. This starts from the
740 | 		// first key/value pair and updates the k/v variables to the
741 | 		// next key/value on each iteration.
742 | 		//
743 | 		// The loop finishes at the end of the cursor when a nil key is returned.
744 | 		for k, v := c.First(); k != nil; k, v = c.Next() {
745 | 			fmt.Printf("A %s is %s.\n", k, v)
746 | 		}
747 | 
748 | 		return nil
749 | 	}); err != nil {
750 | 		log.Fatal(err)
751 | 	}
752 | 
753 | 	if err := db.Close(); err != nil {
754 | 		log.Fatal(err)
755 | 	}
756 | 
757 | 	// Output:
758 | 	// A cat is lame.
759 | 	// A dog is fun.
760 | 	// A liger is awesome.
761 | }
762 | 
763 | func ExampleCursor_reverse() {
764 | 	// Open the database.
765 | 	db, err := bolt.Open(tempfile(), 0666, nil)
766 | 	if err != nil {
767 | 		log.Fatal(err)
768 | 	}
769 | 	defer os.Remove(db.Path())
770 | 
771 | 	// Start a read-write transaction.
772 | 	if err := db.Update(func(tx *bolt.Tx) error {
773 | 		// Create a new bucket.
774 | 		b, err := tx.CreateBucket([]byte("animals"))
775 | 		if err != nil {
776 | 			return err
777 | 		}
778 | 
779 | 		// Insert data into a bucket.
780 | 		if err := b.Put([]byte("dog"), []byte("fun")); err != nil {
781 | 			log.Fatal(err)
782 | 		}
783 | 		if err := b.Put([]byte("cat"), []byte("lame")); err != nil {
784 | 			log.Fatal(err)
785 | 		}
786 | 		if err := b.Put([]byte("liger"), []byte("awesome")); err != nil {
787 | 			log.Fatal(err)
788 | 		}
789 | 
790 | 		// Create a cursor for iteration.
791 | 		c := b.Cursor()
792 | 
793 | 		// Iterate over items in reverse sorted key order. This starts
794 | 		// from the last key/value pair and updates the k/v variables to
795 | 		// the previous key/value on each iteration.
796 | 		//
797 | 		// The loop finishes at the beginning of the cursor when a nil key
798 | 		// is returned.
799 | 		for k, v := c.Last(); k != nil; k, v = c.Prev() {
800 | 			fmt.Printf("A %s is %s.\n", k, v)
801 | 		}
802 | 
803 | 		return nil
804 | 	}); err != nil {
805 | 		log.Fatal(err)
806 | 	}
807 | 
808 | 	// Close the database to release the file lock.
809 | 	if err := db.Close(); err != nil {
810 | 		log.Fatal(err)
811 | 	}
812 | 
813 | 	// Output:
814 | 	// A liger is awesome.
815 | 	// A dog is fun.
816 | 	// A cat is lame.
817 | }
818 | 


--------------------------------------------------------------------------------
/db.go:
--------------------------------------------------------------------------------
   1 | package bolt
   2 | 
   3 | import (
   4 | 	"errors"
   5 | 	"fmt"
   6 | 	"hash/fnv"
   7 | 	"log"
   8 | 	"os"
   9 | 	"runtime"
  10 | 	"runtime/debug"
  11 | 	"strings"
  12 | 	"sync"
  13 | 	"time"
  14 | 	"unsafe"
  15 | )
  16 | 
  17 | // The largest step that can be taken when remapping the mmap.
  18 | const maxMmapStep = 1 << 30 // 1GB
  19 | 
  20 | // The data file format version.
  21 | const version = 2
  22 | 
  23 | // Represents a marker value to indicate that a file is a Bolt DB.
  24 | const magic uint32 = 0xED0CDAED
  25 | 
  26 | // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
  27 | // syncing changes to a file.  This is required as some operating systems,
  28 | // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
  29 | // must be synchronized using the msync(2) syscall.
  30 | const IgnoreNoSync = runtime.GOOS == "openbsd"
  31 | 
  32 | // Default values if not set in a DB instance.
  33 | const (
  34 | 	DefaultMaxBatchSize  int = 1000
  35 | 	DefaultMaxBatchDelay     = 10 * time.Millisecond
  36 | 	DefaultAllocSize         = 16 * 1024 * 1024
  37 | )
  38 | 
  39 | // default page size for db is set to the OS page size.
  40 | var defaultPageSize = os.Getpagesize()
  41 | 
  42 | // DB represents a collection of buckets persisted to a file on disk.
  43 | // All data access is performed through transactions which can be obtained through the DB.
  44 | // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
  45 | type DB struct {
  46 | 	// When enabled, the database will perform a Check() after every commit.
  47 | 	// A panic is issued if the database is in an inconsistent state. This
  48 | 	// flag has a large performance impact so it should only be used for
  49 | 	// debugging purposes.
  50 | 	StrictMode bool
  51 | 
  52 | 	// Setting the NoSync flag will cause the database to skip fsync()
  53 | 	// calls after each commit. This can be useful when bulk loading data
  54 | 	// into a database and you can restart the bulk load in the event of
  55 | 	// a system failure or database corruption. Do not set this flag for
  56 | 	// normal use.
  57 | 	//
  58 | 	// If the package global IgnoreNoSync constant is true, this value is
  59 | 	// ignored.  See the comment on that constant for more details.
  60 | 	//
  61 | 	// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
  62 | 	NoSync bool
  63 | 
  64 | 	// When true, skips the truncate call when growing the database.
  65 | 	// Setting this to true is only safe on non-ext3/ext4 systems.
  66 | 	// Skipping truncation avoids preallocation of hard drive space and
  67 | 	// bypasses a truncate() and fsync() syscall on remapping.
  68 | 	//
  69 | 	// https://github.com/boltdb/bolt/issues/284
  70 | 	NoGrowSync bool
  71 | 
  72 | 	// If you want to read the entire database fast, you can set MmapFlag to
  73 | 	// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
  74 | 	MmapFlags int
  75 | 
  76 | 	// MaxBatchSize is the maximum size of a batch. Default value is
  77 | 	// copied from DefaultMaxBatchSize in Open.
  78 | 	//
  79 | 	// If <=0, disables batching.
  80 | 	//
  81 | 	// Do not change concurrently with calls to Batch.
  82 | 	MaxBatchSize int
  83 | 
  84 | 	// MaxBatchDelay is the maximum delay before a batch starts.
  85 | 	// Default value is copied from DefaultMaxBatchDelay in Open.
  86 | 	//
  87 | 	// If <=0, effectively disables batching.
  88 | 	//
  89 | 	// Do not change concurrently with calls to Batch.
  90 | 	MaxBatchDelay time.Duration
  91 | 
  92 | 	// AllocSize is the amount of space allocated when the database
  93 | 	// needs to create new pages. This is done to amortize the cost
  94 | 	// of truncate() and fsync() when growing the data file.
  95 | 	AllocSize int
  96 | 
  97 | 	path     string
  98 | 	file     *os.File
  99 | 	lockfile *os.File // windows only
 100 | 	dataref  []byte   // mmap'ed readonly, write throws SEGV
 101 | 	data     *[maxMapSize]byte
 102 | 	datasz   int
 103 | 	filesz   int // current on disk file size
 104 | 	meta0    *meta
 105 | 	meta1    *meta
 106 | 	pageSize int
 107 | 	opened   bool
 108 | 	rwtx     *Tx
 109 | 	txs      []*Tx
 110 | 	freelist *freelist
 111 | 	stats    Stats
 112 | 
 113 | 	pagePool sync.Pool
 114 | 
 115 | 	batchMu sync.Mutex
 116 | 	batch   *batch
 117 | 
 118 | 	rwlock   sync.Mutex   // Allows only one writer at a time.
 119 | 	metalock sync.Mutex   // Protects meta page access.
 120 | 	mmaplock sync.RWMutex // Protects mmap access during remapping.
 121 | 	statlock sync.RWMutex // Protects stats access.
 122 | 
 123 | 	ops struct {
 124 | 		writeAt func(b []byte, off int64) (n int, err error)
 125 | 	}
 126 | 
 127 | 	// Read only mode.
 128 | 	// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
 129 | 	readOnly bool
 130 | }
 131 | 
 132 | // Path returns the path to currently open database file.
 133 | func (db *DB) Path() string {
 134 | 	return db.path
 135 | }
 136 | 
 137 | // GoString returns the Go string representation of the database.
 138 | func (db *DB) GoString() string {
 139 | 	return fmt.Sprintf("bolt.DB{path:%q}", db.path)
 140 | }
 141 | 
 142 | // String returns the string representation of the database.
 143 | func (db *DB) String() string {
 144 | 	return fmt.Sprintf("DB<%q>", db.path)
 145 | }
 146 | 
 147 | // Open creates and opens a database at the given path.
 148 | // If the file does not exist then it will be created automatically.
 149 | // Passing in nil options will cause Bolt to open the database with the default options.
 150 | func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
 151 | 	var db = &DB{opened: true}
 152 | 
 153 | 	// Set default options if no options are provided.
 154 | 	if options == nil {
 155 | 		options = DefaultOptions
 156 | 	}
 157 | 	db.NoGrowSync = options.NoGrowSync
 158 | 	db.MmapFlags = options.MmapFlags
 159 | 
 160 | 	// Set default values for later DB operations.
 161 | 	db.MaxBatchSize = DefaultMaxBatchSize
 162 | 	db.MaxBatchDelay = DefaultMaxBatchDelay
 163 | 	db.AllocSize = DefaultAllocSize
 164 | 
 165 | 	flag := os.O_RDWR
 166 | 	if options.ReadOnly {
 167 | 		flag = os.O_RDONLY
 168 | 		db.readOnly = true
 169 | 	}
 170 | 
 171 | 	// Open data file and separate sync handler for metadata writes.
 172 | 	db.path = path
 173 | 	var err error
 174 | 	if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
 175 | 		_ = db.close()
 176 | 		return nil, err
 177 | 	}
 178 | 
 179 | 	// Lock file so that other processes using Bolt in read-write mode cannot
 180 | 	// use the database  at the same time. This would cause corruption since
 181 | 	// the two processes would write meta pages and free pages separately.
 182 | 	// The database file is locked exclusively (only one process can grab the lock)
 183 | 	// if !options.ReadOnly.
 184 | 	// The database file is locked using the shared lock (more than one process may
 185 | 	// hold a lock at the same time) otherwise (options.ReadOnly is set).
 186 | 	if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
 187 | 		_ = db.close()
 188 | 		return nil, err
 189 | 	}
 190 | 
 191 | 	// Default values for test hooks
 192 | 	db.ops.writeAt = db.file.WriteAt
 193 | 
 194 | 	// Initialize the database if it doesn't exist.
 195 | 	if info, err := db.file.Stat(); err != nil {
 196 | 		return nil, err
 197 | 	} else if info.Size() == 0 {
 198 | 		// Initialize new files with meta pages.
 199 | 		if err := db.init(); err != nil {
 200 | 			return nil, err
 201 | 		}
 202 | 	} else {
 203 | 		// Read the first meta page to determine the page size.
 204 | 		var buf [0x1000]byte
 205 | 		if _, err := db.file.ReadAt(buf[:], 0); err == nil {
 206 | 			m := db.pageInBuffer(buf[:], 0).meta()
 207 | 			if err := m.validate(); err != nil {
 208 | 				// If we can't read the page size, we can assume it's the same
 209 | 				// as the OS -- since that's how the page size was chosen in the
 210 | 				// first place.
 211 | 				//
 212 | 				// If the first page is invalid and this OS uses a different
 213 | 				// page size than what the database was created with then we
 214 | 				// are out of luck and cannot access the database.
 215 | 				db.pageSize = os.Getpagesize()
 216 | 			} else {
 217 | 				db.pageSize = int(m.pageSize)
 218 | 			}
 219 | 		}
 220 | 	}
 221 | 
 222 | 	// Initialize page pool.
 223 | 	db.pagePool = sync.Pool{
 224 | 		New: func() interface{} {
 225 | 			return make([]byte, db.pageSize)
 226 | 		},
 227 | 	}
 228 | 
 229 | 	// Memory map the data file.
 230 | 	if err := db.mmap(options.InitialMmapSize); err != nil {
 231 | 		_ = db.close()
 232 | 		return nil, err
 233 | 	}
 234 | 
 235 | 	// Read in the freelist.
 236 | 	db.freelist = newFreelist()
 237 | 	db.freelist.read(db.page(db.meta().freelist))
 238 | 
 239 | 	// Mark the database as opened and return.
 240 | 	return db, nil
 241 | }
 242 | 
 243 | // mmap opens the underlying memory-mapped file and initializes the meta references.
 244 | // minsz is the minimum size that the new mmap can be.
 245 | func (db *DB) mmap(minsz int) error {
 246 | 	db.mmaplock.Lock()
 247 | 	defer db.mmaplock.Unlock()
 248 | 
 249 | 	info, err := db.file.Stat()
 250 | 	if err != nil {
 251 | 		return fmt.Errorf("mmap stat error: %s", err)
 252 | 	} else if int(info.Size()) < db.pageSize*2 {
 253 | 		return fmt.Errorf("file size too small")
 254 | 	}
 255 | 
 256 | 	// Ensure the size is at least the minimum size.
 257 | 	var size = int(info.Size())
 258 | 	if size < minsz {
 259 | 		size = minsz
 260 | 	}
 261 | 	size, err = db.mmapSize(size)
 262 | 	if err != nil {
 263 | 		return err
 264 | 	}
 265 | 
 266 | 	// Dereference all mmap references before unmapping.
 267 | 	if db.rwtx != nil {
 268 | 		db.rwtx.root.dereference()
 269 | 	}
 270 | 
 271 | 	// Unmap existing data before continuing.
 272 | 	if err := db.munmap(); err != nil {
 273 | 		return err
 274 | 	}
 275 | 
 276 | 	// Memory-map the data file as a byte slice.
 277 | 	if err := mmap(db, size); err != nil {
 278 | 		return err
 279 | 	}
 280 | 
 281 | 	// Save references to the meta pages.
 282 | 	db.meta0 = db.page(0).meta()
 283 | 	db.meta1 = db.page(1).meta()
 284 | 
 285 | 	// Validate the meta pages. We only return an error if both meta pages fail
 286 | 	// validation, since meta0 failing validation means that it wasn't saved
 287 | 	// properly -- but we can recover using meta1. And vice-versa.
 288 | 	err0 := db.meta0.validate()
 289 | 	err1 := db.meta1.validate()
 290 | 	if err0 != nil && err1 != nil {
 291 | 		return err0
 292 | 	}
 293 | 
 294 | 	return nil
 295 | }
 296 | 
 297 | // munmap unmaps the data file from memory.
 298 | func (db *DB) munmap() error {
 299 | 	if err := munmap(db); err != nil {
 300 | 		return fmt.Errorf("unmap error: " + err.Error())
 301 | 	}
 302 | 	return nil
 303 | }
 304 | 
 305 | // mmapSize determines the appropriate size for the mmap given the current size
 306 | // of the database. The minimum size is 32KB and doubles until it reaches 1GB.
 307 | // Returns an error if the new mmap size is greater than the max allowed.
 308 | func (db *DB) mmapSize(size int) (int, error) {
 309 | 	// Double the size from 32KB until 1GB.
 310 | 	for i := uint(15); i <= 30; i++ {
 311 | 		if size <= 1<<i {
 312 | 			return 1 << i, nil
 313 | 		}
 314 | 	}
 315 | 
 316 | 	// Verify the requested size is not above the maximum allowed.
 317 | 	if size > maxMapSize {
 318 | 		return 0, fmt.Errorf("mmap too large")
 319 | 	}
 320 | 
 321 | 	// If larger than 1GB then grow by 1GB at a time.
 322 | 	sz := int64(size)
 323 | 	if remainder := sz % int64(maxMmapStep); remainder > 0 {
 324 | 		sz += int64(maxMmapStep) - remainder
 325 | 	}
 326 | 
 327 | 	// Ensure that the mmap size is a multiple of the page size.
 328 | 	// This should always be true since we're incrementing in MBs.
 329 | 	pageSize := int64(db.pageSize)
 330 | 	if (sz % pageSize) != 0 {
 331 | 		sz = ((sz / pageSize) + 1) * pageSize
 332 | 	}
 333 | 
 334 | 	// If we've exceeded the max size then only grow up to the max size.
 335 | 	if sz > maxMapSize {
 336 | 		sz = maxMapSize
 337 | 	}
 338 | 
 339 | 	return int(sz), nil
 340 | }
 341 | 
 342 | // init creates a new database file and initializes its meta pages.
 343 | func (db *DB) init() error {
 344 | 	// Set the page size to the OS page size.
 345 | 	db.pageSize = os.Getpagesize()
 346 | 
 347 | 	// Create two meta pages on a buffer.
 348 | 	buf := make([]byte, db.pageSize*4)
 349 | 	for i := 0; i < 2; i++ {
 350 | 		p := db.pageInBuffer(buf[:], pgid(i))
 351 | 		p.id = pgid(i)
 352 | 		p.flags = metaPageFlag
 353 | 
 354 | 		// Initialize the meta page.
 355 | 		m := p.meta()
 356 | 		m.magic = magic
 357 | 		m.version = version
 358 | 		m.pageSize = uint32(db.pageSize)
 359 | 		m.freelist = 2
 360 | 		m.root = bucket{root: 3}
 361 | 		m.pgid = 4
 362 | 		m.txid = txid(i)
 363 | 		m.checksum = m.sum64()
 364 | 	}
 365 | 
 366 | 	// Write an empty freelist at page 3.
 367 | 	p := db.pageInBuffer(buf[:], pgid(2))
 368 | 	p.id = pgid(2)
 369 | 	p.flags = freelistPageFlag
 370 | 	p.count = 0
 371 | 
 372 | 	// Write an empty leaf page at page 4.
 373 | 	p = db.pageInBuffer(buf[:], pgid(3))
 374 | 	p.id = pgid(3)
 375 | 	p.flags = leafPageFlag
 376 | 	p.count = 0
 377 | 
 378 | 	// Write the buffer to our data file.
 379 | 	if _, err := db.ops.writeAt(buf, 0); err != nil {
 380 | 		return err
 381 | 	}
 382 | 	if err := fdatasync(db); err != nil {
 383 | 		return err
 384 | 	}
 385 | 
 386 | 	return nil
 387 | }
 388 | 
 389 | // Close releases all database resources.
 390 | // All transactions must be closed before closing the database.
 391 | func (db *DB) Close() error {
 392 | 	db.rwlock.Lock()
 393 | 	defer db.rwlock.Unlock()
 394 | 
 395 | 	db.metalock.Lock()
 396 | 	defer db.metalock.Unlock()
 397 | 
 398 | 	db.mmaplock.RLock()
 399 | 	defer db.mmaplock.RUnlock()
 400 | 
 401 | 	return db.close()
 402 | }
 403 | 
 404 | func (db *DB) close() error {
 405 | 	if !db.opened {
 406 | 		return nil
 407 | 	}
 408 | 
 409 | 	db.opened = false
 410 | 
 411 | 	db.freelist = nil
 412 | 
 413 | 	// Clear ops.
 414 | 	db.ops.writeAt = nil
 415 | 
 416 | 	// Close the mmap.
 417 | 	if err := db.munmap(); err != nil {
 418 | 		return err
 419 | 	}
 420 | 
 421 | 	// Close file handles.
 422 | 	if db.file != nil {
 423 | 		// No need to unlock read-only file.
 424 | 		if !db.readOnly {
 425 | 			// Unlock the file.
 426 | 			if err := funlock(db); err != nil {
 427 | 				log.Printf("bolt.Close(): funlock error: %s", err)
 428 | 			}
 429 | 		}
 430 | 
 431 | 		// Close the file descriptor.
 432 | 		if err := db.file.Close(); err != nil {
 433 | 			return fmt.Errorf("db file close: %s", err)
 434 | 		}
 435 | 		db.file = nil
 436 | 	}
 437 | 
 438 | 	db.path = ""
 439 | 	return nil
 440 | }
 441 | 
 442 | // Begin starts a new transaction.
 443 | // Multiple read-only transactions can be used concurrently but only one
 444 | // write transaction can be used at a time. Starting multiple write transactions
 445 | // will cause the calls to block and be serialized until the current write
 446 | // transaction finishes.
 447 | //
 448 | // Transactions should not be dependent on one another. Opening a read
 449 | // transaction and a write transaction in the same goroutine can cause the
 450 | // writer to deadlock because the database periodically needs to re-mmap itself
 451 | // as it grows and it cannot do that while a read transaction is open.
 452 | //
 453 | // If a long running read transaction (for example, a snapshot transaction) is
 454 | // needed, you might want to set DB.InitialMmapSize to a large enough value
 455 | // to avoid potential blocking of write transaction.
 456 | //
 457 | // IMPORTANT: You must close read-only transactions after you are finished or
 458 | // else the database will not reclaim old pages.
 459 | func (db *DB) Begin(writable bool) (*Tx, error) {
 460 | 	if writable {
 461 | 		return db.beginRWTx()
 462 | 	}
 463 | 	return db.beginTx()
 464 | }
 465 | 
 466 | func (db *DB) beginTx() (*Tx, error) {
 467 | 	// Lock the meta pages while we initialize the transaction. We obtain
 468 | 	// the meta lock before the mmap lock because that's the order that the
 469 | 	// write transaction will obtain them.
 470 | 	db.metalock.Lock()
 471 | 
 472 | 	// Obtain a read-only lock on the mmap. When the mmap is remapped it will
 473 | 	// obtain a write lock so all transactions must finish before it can be
 474 | 	// remapped.
 475 | 	db.mmaplock.RLock()
 476 | 
 477 | 	// Exit if the database is not open yet.
 478 | 	if !db.opened {
 479 | 		db.mmaplock.RUnlock()
 480 | 		db.metalock.Unlock()
 481 | 		return nil, ErrDatabaseNotOpen
 482 | 	}
 483 | 
 484 | 	// Create a transaction associated with the database.
 485 | 	t := &Tx{}
 486 | 	t.init(db)
 487 | 
 488 | 	// Keep track of transaction until it closes.
 489 | 	db.txs = append(db.txs, t)
 490 | 	n := len(db.txs)
 491 | 
 492 | 	// Unlock the meta pages.
 493 | 	db.metalock.Unlock()
 494 | 
 495 | 	// Update the transaction stats.
 496 | 	db.statlock.Lock()
 497 | 	db.stats.TxN++
 498 | 	db.stats.OpenTxN = n
 499 | 	db.statlock.Unlock()
 500 | 
 501 | 	return t, nil
 502 | }
 503 | 
 504 | func (db *DB) beginRWTx() (*Tx, error) {
 505 | 	// If the database was opened with Options.ReadOnly, return an error.
 506 | 	if db.readOnly {
 507 | 		return nil, ErrDatabaseReadOnly
 508 | 	}
 509 | 
 510 | 	// Obtain writer lock. This is released by the transaction when it closes.
 511 | 	// This enforces only one writer transaction at a time.
 512 | 	db.rwlock.Lock()
 513 | 
 514 | 	// Once we have the writer lock then we can lock the meta pages so that
 515 | 	// we can set up the transaction.
 516 | 	db.metalock.Lock()
 517 | 	defer db.metalock.Unlock()
 518 | 
 519 | 	// Exit if the database is not open yet.
 520 | 	if !db.opened {
 521 | 		db.rwlock.Unlock()
 522 | 		return nil, ErrDatabaseNotOpen
 523 | 	}
 524 | 
 525 | 	// Create a transaction associated with the database.
 526 | 	t := &Tx{writable: true}
 527 | 	t.init(db)
 528 | 	db.rwtx = t
 529 | 
 530 | 	// Free any pages associated with closed read-only transactions.
 531 | 	var minid txid = 0xFFFFFFFFFFFFFFFF
 532 | 	for _, t := range db.txs {
 533 | 		if t.meta.txid < minid {
 534 | 			minid = t.meta.txid
 535 | 		}
 536 | 	}
 537 | 	if minid > 0 {
 538 | 		db.freelist.release(minid - 1)
 539 | 	}
 540 | 
 541 | 	return t, nil
 542 | }
 543 | 
 544 | // removeTx removes a transaction from the database.
 545 | func (db *DB) removeTx(tx *Tx) {
 546 | 	// Release the read lock on the mmap.
 547 | 	db.mmaplock.RUnlock()
 548 | 
 549 | 	// Use the meta lock to restrict access to the DB object.
 550 | 	db.metalock.Lock()
 551 | 
 552 | 	// Remove the transaction.
 553 | 	for i, t := range db.txs {
 554 | 		if t == tx {
 555 | 			last := len(db.txs) - 1
 556 | 			db.txs[i] = db.txs[last]
 557 | 			db.txs[last] = nil
 558 | 			db.txs = db.txs[:last]
 559 | 			break
 560 | 		}
 561 | 	}
 562 | 	n := len(db.txs)
 563 | 
 564 | 	// Unlock the meta pages.
 565 | 	db.metalock.Unlock()
 566 | 
 567 | 	// Merge statistics.
 568 | 	db.statlock.Lock()
 569 | 	db.stats.OpenTxN = n
 570 | 	db.stats.TxStats.add(&tx.stats)
 571 | 	db.statlock.Unlock()
 572 | }
 573 | 
 574 | // Update executes a function within the context of a read-write managed transaction.
 575 | // If no error is returned from the function then the transaction is committed.
 576 | // If an error is returned then the entire transaction is rolled back.
 577 | // Any error that is returned from the function or returned from the commit is
 578 | // returned from the Update() method.
 579 | //
 580 | // Attempting to manually commit or rollback within the function will cause a panic.
 581 | func (db *DB) Update(fn func(*Tx) error) error {
 582 | 	t, err := db.Begin(true)
 583 | 	if err != nil {
 584 | 		return err
 585 | 	}
 586 | 
 587 | 	// Make sure the transaction rolls back in the event of a panic.
 588 | 	defer func() {
 589 | 		if t.db != nil {
 590 | 			t.rollback()
 591 | 		}
 592 | 	}()
 593 | 
 594 | 	// Mark as a managed tx so that the inner function cannot manually commit.
 595 | 	t.managed = true
 596 | 
 597 | 	// If an error is returned from the function then rollback and return error.
 598 | 	err = fn(t)
 599 | 	t.managed = false
 600 | 	if err != nil {
 601 | 		_ = t.Rollback()
 602 | 		return err
 603 | 	}
 604 | 
 605 | 	return t.Commit()
 606 | }
 607 | 
 608 | // View executes a function within the context of a managed read-only transaction.
 609 | // Any error that is returned from the function is returned from the View() method.
 610 | //
 611 | // Attempting to manually rollback within the function will cause a panic.
 612 | func (db *DB) View(fn func(*Tx) error) error {
 613 | 	t, err := db.Begin(false)
 614 | 	if err != nil {
 615 | 		return err
 616 | 	}
 617 | 
 618 | 	// Make sure the transaction rolls back in the event of a panic.
 619 | 	defer func() {
 620 | 		if t.db != nil {
 621 | 			t.rollback()
 622 | 		}
 623 | 	}()
 624 | 
 625 | 	// Mark as a managed tx so that the inner function cannot manually rollback.
 626 | 	t.managed = true
 627 | 
 628 | 	// If an error is returned from the function then pass it through.
 629 | 	err = fn(t)
 630 | 	t.managed = false
 631 | 	if err != nil {
 632 | 		_ = t.Rollback()
 633 | 		return err
 634 | 	}
 635 | 
 636 | 	if err := t.Rollback(); err != nil {
 637 | 		return err
 638 | 	}
 639 | 
 640 | 	return nil
 641 | }
 642 | 
 643 | // Batch calls fn as part of a batch. It behaves similar to Update,
 644 | // except:
 645 | //
 646 | // 1. concurrent Batch calls can be combined into a single Bolt
 647 | // transaction.
 648 | //
 649 | // 2. the function passed to Batch may be called multiple times,
 650 | // regardless of whether it returns error or not.
 651 | //
 652 | // This means that Batch function side effects must be idempotent and
 653 | // take permanent effect only after a successful return is seen in
 654 | // caller.
 655 | //
 656 | // The maximum batch size and delay can be adjusted with DB.MaxBatchSize
 657 | // and DB.MaxBatchDelay, respectively.
 658 | //
 659 | // Batch is only useful when there are multiple goroutines calling it.
 660 | func (db *DB) Batch(fn func(*Tx) error) error {
 661 | 	errCh := make(chan error, 1)
 662 | 
 663 | 	db.batchMu.Lock()
 664 | 	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
 665 | 		// There is no existing batch, or the existing batch is full; start a new one.
 666 | 		db.batch = &batch{
 667 | 			db: db,
 668 | 		}
 669 | 		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
 670 | 	}
 671 | 	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
 672 | 	if len(db.batch.calls) >= db.MaxBatchSize {
 673 | 		// wake up batch, it's ready to run
 674 | 		go db.batch.trigger()
 675 | 	}
 676 | 	db.batchMu.Unlock()
 677 | 
 678 | 	err := <-errCh
 679 | 	if err == trySolo {
 680 | 		err = db.Update(fn)
 681 | 	}
 682 | 	return err
 683 | }
 684 | 
 685 | type call struct {
 686 | 	fn  func(*Tx) error
 687 | 	err chan<- error
 688 | }
 689 | 
 690 | type batch struct {
 691 | 	db    *DB
 692 | 	timer *time.Timer
 693 | 	start sync.Once
 694 | 	calls []call
 695 | }
 696 | 
 697 | // trigger runs the batch if it hasn't already been run.
 698 | func (b *batch) trigger() {
 699 | 	b.start.Do(b.run)
 700 | }
 701 | 
 702 | // run performs the transactions in the batch and communicates results
 703 | // back to DB.Batch.
 704 | func (b *batch) run() {
 705 | 	b.db.batchMu.Lock()
 706 | 	b.timer.Stop()
 707 | 	// Make sure no new work is added to this batch, but don't break
 708 | 	// other batches.
 709 | 	if b.db.batch == b {
 710 | 		b.db.batch = nil
 711 | 	}
 712 | 	b.db.batchMu.Unlock()
 713 | 
 714 | retry:
 715 | 	for len(b.calls) > 0 {
 716 | 		var failIdx = -1
 717 | 		err := b.db.Update(func(tx *Tx) error {
 718 | 			for i, c := range b.calls {
 719 | 				if err := safelyCall(c.fn, tx); err != nil {
 720 | 					failIdx = i
 721 | 					return err
 722 | 				}
 723 | 			}
 724 | 			return nil
 725 | 		})
 726 | 
 727 | 		if failIdx >= 0 {
 728 | 			// take the failing transaction out of the batch. it's
 729 | 			// safe to shorten b.calls here because db.batch no longer
 730 | 			// points to us, and we hold the mutex anyway.
 731 | 			c := b.calls[failIdx]
 732 | 			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
 733 | 			// tell the submitter re-run it solo, continue with the rest of the batch
 734 | 			c.err <- trySolo
 735 | 			continue retry
 736 | 		}
 737 | 
 738 | 		// pass success, or bolt internal errors, to all callers
 739 | 		for _, c := range b.calls {
 740 | 			c.err <- err
 741 | 		}
 742 | 		break retry
 743 | 	}
 744 | }
 745 | 
 746 | // trySolo is a special sentinel error value used for signaling that a
 747 | // transaction function should be re-run. It should never be seen by
 748 | // callers.
 749 | var trySolo = errors.New("batch function returned an error and should be re-run solo")
 750 | 
 751 | type panicked struct {
 752 | 	reason interface{}
 753 | }
 754 | 
 755 | func (p panicked) Error() string {
 756 | 	if err, ok := p.reason.(error); ok {
 757 | 		return err.Error()
 758 | 	}
 759 | 	return fmt.Sprintf("panic: %v", p.reason)
 760 | }
 761 | 
 762 | func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
 763 | 	defer func() {
 764 | 		if p := recover(); p != nil {
 765 | 			err = panicked{p}
 766 | 		}
 767 | 	}()
 768 | 	return fn(tx)
 769 | }
 770 | 
 771 | // Sync executes fdatasync() against the database file handle.
 772 | //
 773 | // This is not necessary under normal operation, however, if you use NoSync
 774 | // then it allows you to force the database file to sync against the disk.
 775 | func (db *DB) Sync() error { return fdatasync(db) }
 776 | 
 777 | // Stats retrieves ongoing performance stats for the database.
 778 | // This is only updated when a transaction closes.
 779 | func (db *DB) Stats() Stats {
 780 | 	db.statlock.RLock()
 781 | 	defer db.statlock.RUnlock()
 782 | 	return db.stats
 783 | }
 784 | 
 785 | // This is for internal access to the raw data bytes from the C cursor, use
 786 | // carefully, or not at all.
 787 | func (db *DB) Info() *Info {
 788 | 	return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
 789 | }
 790 | 
 791 | // page retrieves a page reference from the mmap based on the current page size.
 792 | func (db *DB) page(id pgid) *page {
 793 | 	pos := id * pgid(db.pageSize)
 794 | 	return (*page)(unsafe.Pointer(&db.data[pos]))
 795 | }
 796 | 
 797 | // pageInBuffer retrieves a page reference from a given byte array based on the current page size.
 798 | func (db *DB) pageInBuffer(b []byte, id pgid) *page {
 799 | 	return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
 800 | }
 801 | 
 802 | // meta retrieves the current meta page reference.
 803 | func (db *DB) meta() *meta {
 804 | 	// We have to return the meta with the highest txid which doesn't fail
 805 | 	// validation. Otherwise, we can cause errors when in fact the database is
 806 | 	// in a consistent state. metaA is the one with the higher txid.
 807 | 	metaA := db.meta0
 808 | 	metaB := db.meta1
 809 | 	if db.meta1.txid > db.meta0.txid {
 810 | 		metaA = db.meta1
 811 | 		metaB = db.meta0
 812 | 	}
 813 | 
 814 | 	// Use higher meta page if valid. Otherwise fallback to previous, if valid.
 815 | 	if err := metaA.validate(); err == nil {
 816 | 		return metaA
 817 | 	} else if err := metaB.validate(); err == nil {
 818 | 		return metaB
 819 | 	}
 820 | 
 821 | 	// This should never be reached, because both meta1 and meta0 were validated
 822 | 	// on mmap() and we do fsync() on every write.
 823 | 	panic("bolt.DB.meta(): invalid meta pages")
 824 | }
 825 | 
 826 | // allocate returns a contiguous block of memory starting at a given page.
 827 | func (db *DB) allocate(count int) (*page, error) {
 828 | 	// Allocate a temporary buffer for the page.
 829 | 	var buf []byte
 830 | 	if count == 1 {
 831 | 		buf = db.pagePool.Get().([]byte)
 832 | 	} else {
 833 | 		buf = make([]byte, count*db.pageSize)
 834 | 	}
 835 | 	p := (*page)(unsafe.Pointer(&buf[0]))
 836 | 	p.overflow = uint32(count - 1)
 837 | 
 838 | 	// Use pages from the freelist if they are available.
 839 | 	if p.id = db.freelist.allocate(count); p.id != 0 {
 840 | 		return p, nil
 841 | 	}
 842 | 
 843 | 	// Resize mmap() if we're at the end.
 844 | 	p.id = db.rwtx.meta.pgid
 845 | 	var minsz = int((p.id+pgid(count))+1) * db.pageSize
 846 | 	if minsz >= db.datasz {
 847 | 		if err := db.mmap(minsz); err != nil {
 848 | 			return nil, fmt.Errorf("mmap allocate error: %s", err)
 849 | 		}
 850 | 	}
 851 | 
 852 | 	// Move the page id high water mark.
 853 | 	db.rwtx.meta.pgid += pgid(count)
 854 | 
 855 | 	return p, nil
 856 | }
 857 | 
 858 | // grow grows the size of the database to the given sz.
 859 | func (db *DB) grow(sz int) error {
 860 | 	// Ignore if the new size is less than available file size.
 861 | 	if sz <= db.filesz {
 862 | 		return nil
 863 | 	}
 864 | 
 865 | 	// If the data is smaller than the alloc size then only allocate what's needed.
 866 | 	// Once it goes over the allocation size then allocate in chunks.
 867 | 	if db.datasz < db.AllocSize {
 868 | 		sz = db.datasz
 869 | 	} else {
 870 | 		sz += db.AllocSize
 871 | 	}
 872 | 
 873 | 	// Truncate and fsync to ensure file size metadata is flushed.
 874 | 	// https://github.com/boltdb/bolt/issues/284
 875 | 	if !db.NoGrowSync && !db.readOnly {
 876 | 		if runtime.GOOS != "windows" {
 877 | 			if err := db.file.Truncate(int64(sz)); err != nil {
 878 | 				return fmt.Errorf("file resize error: %s", err)
 879 | 			}
 880 | 		}
 881 | 		if err := db.file.Sync(); err != nil {
 882 | 			return fmt.Errorf("file sync error: %s", err)
 883 | 		}
 884 | 	}
 885 | 
 886 | 	db.filesz = sz
 887 | 	return nil
 888 | }
 889 | 
 890 | func (db *DB) IsReadOnly() bool {
 891 | 	return db.readOnly
 892 | }
 893 | 
 894 | // Options represents the options that can be set when opening a database.
 895 | type Options struct {
 896 | 	// Timeout is the amount of time to wait to obtain a file lock.
 897 | 	// When set to zero it will wait indefinitely. This option is only
 898 | 	// available on Darwin and Linux.
 899 | 	Timeout time.Duration
 900 | 
 901 | 	// Sets the DB.NoGrowSync flag before memory mapping the file.
 902 | 	NoGrowSync bool
 903 | 
 904 | 	// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
 905 | 	// grab a shared lock (UNIX).
 906 | 	ReadOnly bool
 907 | 
 908 | 	// Sets the DB.MmapFlags flag before memory mapping the file.
 909 | 	MmapFlags int
 910 | 
 911 | 	// InitialMmapSize is the initial mmap size of the database
 912 | 	// in bytes. Read transactions won't block write transaction
 913 | 	// if the InitialMmapSize is large enough to hold database mmap
 914 | 	// size. (See DB.Begin for more information)
 915 | 	//
 916 | 	// If <=0, the initial map size is 0.
 917 | 	// If initialMmapSize is smaller than the previous database size,
 918 | 	// it takes no effect.
 919 | 	InitialMmapSize int
 920 | }
 921 | 
 922 | // DefaultOptions represent the options used if nil options are passed into Open().
 923 | // No timeout is used which will cause Bolt to wait indefinitely for a lock.
 924 | var DefaultOptions = &Options{
 925 | 	Timeout:    0,
 926 | 	NoGrowSync: false,
 927 | }
 928 | 
 929 | // Stats represents statistics about the database.
 930 | type Stats struct {
 931 | 	// Freelist stats
 932 | 	FreePageN     int // total number of free pages on the freelist
 933 | 	PendingPageN  int // total number of pending pages on the freelist
 934 | 	FreeAlloc     int // total bytes allocated in free pages
 935 | 	FreelistInuse int // total bytes used by the freelist
 936 | 
 937 | 	// Transaction stats
 938 | 	TxN     int // total number of started read transactions
 939 | 	OpenTxN int // number of currently open read transactions
 940 | 
 941 | 	TxStats TxStats // global, ongoing stats.
 942 | }
 943 | 
 944 | // Sub calculates and returns the difference between two sets of database stats.
 945 | // This is useful when obtaining stats at two different points and time and
 946 | // you need the performance counters that occurred within that time span.
 947 | func (s *Stats) Sub(other *Stats) Stats {
 948 | 	if other == nil {
 949 | 		return *s
 950 | 	}
 951 | 	var diff Stats
 952 | 	diff.FreePageN = s.FreePageN
 953 | 	diff.PendingPageN = s.PendingPageN
 954 | 	diff.FreeAlloc = s.FreeAlloc
 955 | 	diff.FreelistInuse = s.FreelistInuse
 956 | 	diff.TxN = s.TxN - other.TxN
 957 | 	diff.TxStats = s.TxStats.Sub(&other.TxStats)
 958 | 	return diff
 959 | }
 960 | 
 961 | func (s *Stats) add(other *Stats) {
 962 | 	s.TxStats.add(&other.TxStats)
 963 | }
 964 | 
 965 | type Info struct {
 966 | 	Data     uintptr
 967 | 	PageSize int
 968 | }
 969 | 
 970 | type meta struct {
 971 | 	magic    uint32
 972 | 	version  uint32
 973 | 	pageSize uint32
 974 | 	flags    uint32
 975 | 	root     bucket
 976 | 	freelist pgid
 977 | 	pgid     pgid
 978 | 	txid     txid
 979 | 	checksum uint64
 980 | }
 981 | 
 982 | // validate checks the marker bytes and version of the meta page to ensure it matches this binary.
 983 | func (m *meta) validate() error {
 984 | 	if m.magic != magic {
 985 | 		return ErrInvalid
 986 | 	} else if m.version != version {
 987 | 		return ErrVersionMismatch
 988 | 	} else if m.checksum != 0 && m.checksum != m.sum64() {
 989 | 		return ErrChecksum
 990 | 	}
 991 | 	return nil
 992 | }
 993 | 
 994 | // copy copies one meta object to another.
 995 | func (m *meta) copy(dest *meta) {
 996 | 	*dest = *m
 997 | }
 998 | 
 999 | // write writes the meta onto a page.
1000 | func (m *meta) write(p *page) {
1001 | 	if m.root.root >= m.pgid {
1002 | 		panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
1003 | 	} else if m.freelist >= m.pgid {
1004 | 		panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
1005 | 	}
1006 | 
1007 | 	// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
1008 | 	p.id = pgid(m.txid % 2)
1009 | 	p.flags |= metaPageFlag
1010 | 
1011 | 	// Calculate the checksum.
1012 | 	m.checksum = m.sum64()
1013 | 
1014 | 	m.copy(p.meta())
1015 | }
1016 | 
1017 | // generates the checksum for the meta.
1018 | func (m *meta) sum64() uint64 {
1019 | 	var h = fnv.New64a()
1020 | 	_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
1021 | 	return h.Sum64()
1022 | }
1023 | 
1024 | // _assert will panic with a given formatted message if the given condition is false.
1025 | func _assert(condition bool, msg string, v ...interface{}) {
1026 | 	if !condition {
1027 | 		panic(fmt.Sprintf("assertion failed: "+msg, v...))
1028 | 	}
1029 | }
1030 | 
1031 | func warn(v ...interface{})              { fmt.Fprintln(os.Stderr, v...) }
1032 | func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
1033 | 
1034 | func printstack() {
1035 | 	stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
1036 | 	fmt.Fprintln(os.Stderr, stack)
1037 | }
1038 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Package bolt implements a low-level key/value store in pure Go. It supports
 3 | fully serializable transactions, ACID semantics, and lock-free MVCC with
 4 | multiple readers and a single writer. Bolt can be used for projects that
 5 | want a simple data store without the need to add large dependencies such as
 6 | Postgres or MySQL.
 7 | 
 8 | Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
 9 | optimized for fast read access and does not require recovery in the event of a
10 | system crash. Transactions which have not finished committing will simply be
11 | rolled back in the event of a crash.
12 | 
13 | The design of Bolt is based on Howard Chu's LMDB database project.
14 | 
15 | Bolt currently works on Windows, Mac OS X, and Linux.
16 | 
17 | 
18 | Basics
19 | 
20 | There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
21 | a collection of buckets and is represented by a single file on disk. A bucket is
22 | a collection of unique keys that are associated with values.
23 | 
24 | Transactions provide either read-only or read-write access to the database.
25 | Read-only transactions can retrieve key/value pairs and can use Cursors to
26 | iterate over the dataset sequentially. Read-write transactions can create and
27 | delete buckets and can insert and remove keys. Only one read-write transaction
28 | is allowed at a time.
29 | 
30 | 
31 | Caveats
32 | 
33 | The database uses a read-only, memory-mapped data file to ensure that
34 | applications cannot corrupt the database, however, this means that keys and
35 | values returned from Bolt cannot be changed. Writing to a read-only byte slice
36 | will cause Go to panic.
37 | 
38 | Keys and values retrieved from the database are only valid for the life of
39 | the transaction. When used outside the transaction, these byte slices can
40 | point to different data or can point to invalid memory which will cause a panic.
41 | 
42 | 
43 | */
44 | package bolt
45 | 


--------------------------------------------------------------------------------
/errors.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | import "errors"
 4 | 
 5 | // These errors can be returned when opening or calling methods on a DB.
 6 | var (
 7 | 	// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
 8 | 	// is opened or after it is closed.
 9 | 	ErrDatabaseNotOpen = errors.New("database not open")
10 | 
11 | 	// ErrDatabaseOpen is returned when opening a database that is
12 | 	// already open.
13 | 	ErrDatabaseOpen = errors.New("database already open")
14 | 
15 | 	// ErrInvalid is returned when both meta pages on a database are invalid.
16 | 	// This typically occurs when a file is not a bolt database.
17 | 	ErrInvalid = errors.New("invalid database")
18 | 
19 | 	// ErrVersionMismatch is returned when the data file was created with a
20 | 	// different version of Bolt.
21 | 	ErrVersionMismatch = errors.New("version mismatch")
22 | 
23 | 	// ErrChecksum is returned when either meta page checksum does not match.
24 | 	ErrChecksum = errors.New("checksum error")
25 | 
26 | 	// ErrTimeout is returned when a database cannot obtain an exclusive lock
27 | 	// on the data file after the timeout passed to Open().
28 | 	ErrTimeout = errors.New("timeout")
29 | )
30 | 
31 | // These errors can occur when beginning or committing a Tx.
32 | var (
33 | 	// ErrTxNotWritable is returned when performing a write operation on a
34 | 	// read-only transaction.
35 | 	ErrTxNotWritable = errors.New("tx not writable")
36 | 
37 | 	// ErrTxClosed is returned when committing or rolling back a transaction
38 | 	// that has already been committed or rolled back.
39 | 	ErrTxClosed = errors.New("tx closed")
40 | 
41 | 	// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
42 | 	// read-only database.
43 | 	ErrDatabaseReadOnly = errors.New("database is in read-only mode")
44 | )
45 | 
46 | // These errors can occur when putting or deleting a value or a bucket.
47 | var (
48 | 	// ErrBucketNotFound is returned when trying to access a bucket that has
49 | 	// not been created yet.
50 | 	ErrBucketNotFound = errors.New("bucket not found")
51 | 
52 | 	// ErrBucketExists is returned when creating a bucket that already exists.
53 | 	ErrBucketExists = errors.New("bucket already exists")
54 | 
55 | 	// ErrBucketNameRequired is returned when creating a bucket with a blank name.
56 | 	ErrBucketNameRequired = errors.New("bucket name required")
57 | 
58 | 	// ErrKeyRequired is returned when inserting a zero-length key.
59 | 	ErrKeyRequired = errors.New("key required")
60 | 
61 | 	// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
62 | 	ErrKeyTooLarge = errors.New("key too large")
63 | 
64 | 	// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
65 | 	ErrValueTooLarge = errors.New("value too large")
66 | 
67 | 	// ErrIncompatibleValue is returned when trying create or delete a bucket
68 | 	// on an existing non-bucket key or when trying to create or delete a
69 | 	// non-bucket key on an existing bucket key.
70 | 	ErrIncompatibleValue = errors.New("incompatible value")
71 | )
72 | 


--------------------------------------------------------------------------------
/freelist.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 	"unsafe"
  7 | )
  8 | 
  9 | // freelist represents a list of all pages that are available for allocation.
 10 | // It also tracks pages that have been freed but are still in use by open transactions.
 11 | type freelist struct {
 12 | 	ids     []pgid          // all free and available free page ids.
 13 | 	pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
 14 | 	cache   map[pgid]bool   // fast lookup of all free and pending page ids.
 15 | }
 16 | 
 17 | // newFreelist returns an empty, initialized freelist.
 18 | func newFreelist() *freelist {
 19 | 	return &freelist{
 20 | 		pending: make(map[txid][]pgid),
 21 | 		cache:   make(map[pgid]bool),
 22 | 	}
 23 | }
 24 | 
 25 | // size returns the size of the page after serialization.
 26 | func (f *freelist) size() int {
 27 | 	n := f.count()
 28 | 	if n >= 0xFFFF {
 29 | 		// The first element will be used to store the count. See freelist.write.
 30 | 		n++
 31 | 	}
 32 | 	return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
 33 | }
 34 | 
 35 | // count returns count of pages on the freelist
 36 | func (f *freelist) count() int {
 37 | 	return f.free_count() + f.pending_count()
 38 | }
 39 | 
 40 | // free_count returns count of free pages
 41 | func (f *freelist) free_count() int {
 42 | 	return len(f.ids)
 43 | }
 44 | 
 45 | // pending_count returns count of pending pages
 46 | func (f *freelist) pending_count() int {
 47 | 	var count int
 48 | 	for _, list := range f.pending {
 49 | 		count += len(list)
 50 | 	}
 51 | 	return count
 52 | }
 53 | 
 54 | // copyall copies into dst a list of all free ids and all pending ids in one sorted list.
 55 | // f.count returns the minimum length required for dst.
 56 | func (f *freelist) copyall(dst []pgid) {
 57 | 	m := make(pgids, 0, f.pending_count())
 58 | 	for _, list := range f.pending {
 59 | 		m = append(m, list...)
 60 | 	}
 61 | 	sort.Sort(m)
 62 | 	mergepgids(dst, f.ids, m)
 63 | }
 64 | 
 65 | // allocate returns the starting page id of a contiguous list of pages of a given size.
 66 | // If a contiguous block cannot be found then 0 is returned.
 67 | func (f *freelist) allocate(n int) pgid {
 68 | 	if len(f.ids) == 0 {
 69 | 		return 0
 70 | 	}
 71 | 
 72 | 	var initial, previd pgid
 73 | 	for i, id := range f.ids {
 74 | 		if id <= 1 {
 75 | 			panic(fmt.Sprintf("invalid page allocation: %d", id))
 76 | 		}
 77 | 
 78 | 		// Reset initial page if this is not contiguous.
 79 | 		if previd == 0 || id-previd != 1 {
 80 | 			initial = id
 81 | 		}
 82 | 
 83 | 		// If we found a contiguous block then remove it and return it.
 84 | 		if (id-initial)+1 == pgid(n) {
 85 | 			// If we're allocating off the beginning then take the fast path
 86 | 			// and just adjust the existing slice. This will use extra memory
 87 | 			// temporarily but the append() in free() will realloc the slice
 88 | 			// as is necessary.
 89 | 			if (i + 1) == n {
 90 | 				f.ids = f.ids[i+1:]
 91 | 			} else {
 92 | 				copy(f.ids[i-n+1:], f.ids[i+1:])
 93 | 				f.ids = f.ids[:len(f.ids)-n]
 94 | 			}
 95 | 
 96 | 			// Remove from the free cache.
 97 | 			for i := pgid(0); i < pgid(n); i++ {
 98 | 				delete(f.cache, initial+i)
 99 | 			}
100 | 
101 | 			return initial
102 | 		}
103 | 
104 | 		previd = id
105 | 	}
106 | 	return 0
107 | }
108 | 
109 | // free releases a page and its overflow for a given transaction id.
110 | // If the page is already free then a panic will occur.
111 | func (f *freelist) free(txid txid, p *page) {
112 | 	if p.id <= 1 {
113 | 		panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
114 | 	}
115 | 
116 | 	// Free page and all its overflow pages.
117 | 	var ids = f.pending[txid]
118 | 	for id := p.id; id <= p.id+pgid(p.overflow); id++ {
119 | 		// Verify that page is not already free.
120 | 		if f.cache[id] {
121 | 			panic(fmt.Sprintf("page %d already freed", id))
122 | 		}
123 | 
124 | 		// Add to the freelist and cache.
125 | 		ids = append(ids, id)
126 | 		f.cache[id] = true
127 | 	}
128 | 	f.pending[txid] = ids
129 | }
130 | 
131 | // release moves all page ids for a transaction id (or older) to the freelist.
132 | func (f *freelist) release(txid txid) {
133 | 	m := make(pgids, 0)
134 | 	for tid, ids := range f.pending {
135 | 		if tid <= txid {
136 | 			// Move transaction's pending pages to the available freelist.
137 | 			// Don't remove from the cache since the page is still free.
138 | 			m = append(m, ids...)
139 | 			delete(f.pending, tid)
140 | 		}
141 | 	}
142 | 	sort.Sort(m)
143 | 	f.ids = pgids(f.ids).merge(m)
144 | }
145 | 
146 | // rollback removes the pages from a given pending tx.
147 | func (f *freelist) rollback(txid txid) {
148 | 	// Remove page ids from cache.
149 | 	for _, id := range f.pending[txid] {
150 | 		delete(f.cache, id)
151 | 	}
152 | 
153 | 	// Remove pages from pending list.
154 | 	delete(f.pending, txid)
155 | }
156 | 
157 | // freed returns whether a given page is in the free list.
158 | func (f *freelist) freed(pgid pgid) bool {
159 | 	return f.cache[pgid]
160 | }
161 | 
162 | // read initializes the freelist from a freelist page.
163 | func (f *freelist) read(p *page) {
164 | 	// If the page.count is at the max uint16 value (64k) then it's considered
165 | 	// an overflow and the size of the freelist is stored as the first element.
166 | 	idx, count := 0, int(p.count)
167 | 	if count == 0xFFFF {
168 | 		idx = 1
169 | 		count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
170 | 	}
171 | 
172 | 	// Copy the list of page ids from the freelist.
173 | 	if count == 0 {
174 | 		f.ids = nil
175 | 	} else {
176 | 		ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count]
177 | 		f.ids = make([]pgid, len(ids))
178 | 		copy(f.ids, ids)
179 | 
180 | 		// Make sure they're sorted.
181 | 		sort.Sort(pgids(f.ids))
182 | 	}
183 | 
184 | 	// Rebuild the page cache.
185 | 	f.reindex()
186 | }
187 | 
188 | // write writes the page ids onto a freelist page. All free and pending ids are
189 | // saved to disk since in the event of a program crash, all pending ids will
190 | // become free.
191 | func (f *freelist) write(p *page) error {
192 | 	// Combine the old free pgids and pgids waiting on an open transaction.
193 | 
194 | 	// Update the header flag.
195 | 	p.flags |= freelistPageFlag
196 | 
197 | 	// The page.count can only hold up to 64k elements so if we overflow that
198 | 	// number then we handle it by putting the size in the first element.
199 | 	lenids := f.count()
200 | 	if lenids == 0 {
201 | 		p.count = uint16(lenids)
202 | 	} else if lenids < 0xFFFF {
203 | 		p.count = uint16(lenids)
204 | 		f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
205 | 	} else {
206 | 		p.count = 0xFFFF
207 | 		((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
208 | 		f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
209 | 	}
210 | 
211 | 	return nil
212 | }
213 | 
214 | // reload reads the freelist from a page and filters out pending items.
215 | func (f *freelist) reload(p *page) {
216 | 	f.read(p)
217 | 
218 | 	// Build a cache of only pending pages.
219 | 	pcache := make(map[pgid]bool)
220 | 	for _, pendingIDs := range f.pending {
221 | 		for _, pendingID := range pendingIDs {
222 | 			pcache[pendingID] = true
223 | 		}
224 | 	}
225 | 
226 | 	// Check each page in the freelist and build a new available freelist
227 | 	// with any pages not in the pending lists.
228 | 	var a []pgid
229 | 	for _, id := range f.ids {
230 | 		if !pcache[id] {
231 | 			a = append(a, id)
232 | 		}
233 | 	}
234 | 	f.ids = a
235 | 
236 | 	// Once the available list is rebuilt then rebuild the free cache so that
237 | 	// it includes the available and pending free pages.
238 | 	f.reindex()
239 | }
240 | 
241 | // reindex rebuilds the free cache based on available and pending free lists.
242 | func (f *freelist) reindex() {
243 | 	f.cache = make(map[pgid]bool, len(f.ids))
244 | 	for _, id := range f.ids {
245 | 		f.cache[id] = true
246 | 	}
247 | 	for _, pendingIDs := range f.pending {
248 | 		for _, pendingID := range pendingIDs {
249 | 			f.cache[pendingID] = true
250 | 		}
251 | 	}
252 | }
253 | 


--------------------------------------------------------------------------------
/freelist_test.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"reflect"
  6 | 	"sort"
  7 | 	"testing"
  8 | 	"unsafe"
  9 | )
 10 | 
 11 | // Ensure that a page is added to a transaction's freelist.
 12 | func TestFreelist_free(t *testing.T) {
 13 | 	f := newFreelist()
 14 | 	f.free(100, &page{id: 12})
 15 | 	if !reflect.DeepEqual([]pgid{12}, f.pending[100]) {
 16 | 		t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100])
 17 | 	}
 18 | }
 19 | 
 20 | // Ensure that a page and its overflow is added to a transaction's freelist.
 21 | func TestFreelist_free_overflow(t *testing.T) {
 22 | 	f := newFreelist()
 23 | 	f.free(100, &page{id: 12, overflow: 3})
 24 | 	if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100]) {
 25 | 		t.Fatalf("exp=%v; got=%v", exp, f.pending[100])
 26 | 	}
 27 | }
 28 | 
 29 | // Ensure that a transaction's free pages can be released.
 30 | func TestFreelist_release(t *testing.T) {
 31 | 	f := newFreelist()
 32 | 	f.free(100, &page{id: 12, overflow: 1})
 33 | 	f.free(100, &page{id: 9})
 34 | 	f.free(102, &page{id: 39})
 35 | 	f.release(100)
 36 | 	f.release(101)
 37 | 	if exp := []pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.ids) {
 38 | 		t.Fatalf("exp=%v; got=%v", exp, f.ids)
 39 | 	}
 40 | 
 41 | 	f.release(102)
 42 | 	if exp := []pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.ids) {
 43 | 		t.Fatalf("exp=%v; got=%v", exp, f.ids)
 44 | 	}
 45 | }
 46 | 
 47 | // Ensure that a freelist can find contiguous blocks of pages.
 48 | func TestFreelist_allocate(t *testing.T) {
 49 | 	f := &freelist{ids: []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}}
 50 | 	if id := int(f.allocate(3)); id != 3 {
 51 | 		t.Fatalf("exp=3; got=%v", id)
 52 | 	}
 53 | 	if id := int(f.allocate(1)); id != 6 {
 54 | 		t.Fatalf("exp=6; got=%v", id)
 55 | 	}
 56 | 	if id := int(f.allocate(3)); id != 0 {
 57 | 		t.Fatalf("exp=0; got=%v", id)
 58 | 	}
 59 | 	if id := int(f.allocate(2)); id != 12 {
 60 | 		t.Fatalf("exp=12; got=%v", id)
 61 | 	}
 62 | 	if id := int(f.allocate(1)); id != 7 {
 63 | 		t.Fatalf("exp=7; got=%v", id)
 64 | 	}
 65 | 	if id := int(f.allocate(0)); id != 0 {
 66 | 		t.Fatalf("exp=0; got=%v", id)
 67 | 	}
 68 | 	if id := int(f.allocate(0)); id != 0 {
 69 | 		t.Fatalf("exp=0; got=%v", id)
 70 | 	}
 71 | 	if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.ids) {
 72 | 		t.Fatalf("exp=%v; got=%v", exp, f.ids)
 73 | 	}
 74 | 
 75 | 	if id := int(f.allocate(1)); id != 9 {
 76 | 		t.Fatalf("exp=9; got=%v", id)
 77 | 	}
 78 | 	if id := int(f.allocate(1)); id != 18 {
 79 | 		t.Fatalf("exp=18; got=%v", id)
 80 | 	}
 81 | 	if id := int(f.allocate(1)); id != 0 {
 82 | 		t.Fatalf("exp=0; got=%v", id)
 83 | 	}
 84 | 	if exp := []pgid{}; !reflect.DeepEqual(exp, f.ids) {
 85 | 		t.Fatalf("exp=%v; got=%v", exp, f.ids)
 86 | 	}
 87 | }
 88 | 
 89 | // Ensure that a freelist can deserialize from a freelist page.
 90 | func TestFreelist_read(t *testing.T) {
 91 | 	// Create a page.
 92 | 	var buf [4096]byte
 93 | 	page := (*page)(unsafe.Pointer(&buf[0]))
 94 | 	page.flags = freelistPageFlag
 95 | 	page.count = 2
 96 | 
 97 | 	// Insert 2 page ids.
 98 | 	ids := (*[3]pgid)(unsafe.Pointer(&page.ptr))
 99 | 	ids[0] = 23
100 | 	ids[1] = 50
101 | 
102 | 	// Deserialize page into a freelist.
103 | 	f := newFreelist()
104 | 	f.read(page)
105 | 
106 | 	// Ensure that there are two page ids in the freelist.
107 | 	if exp := []pgid{23, 50}; !reflect.DeepEqual(exp, f.ids) {
108 | 		t.Fatalf("exp=%v; got=%v", exp, f.ids)
109 | 	}
110 | }
111 | 
112 | // Ensure that a freelist can serialize into a freelist page.
113 | func TestFreelist_write(t *testing.T) {
114 | 	// Create a freelist and write it to a page.
115 | 	var buf [4096]byte
116 | 	f := &freelist{ids: []pgid{12, 39}, pending: make(map[txid][]pgid)}
117 | 	f.pending[100] = []pgid{28, 11}
118 | 	f.pending[101] = []pgid{3}
119 | 	p := (*page)(unsafe.Pointer(&buf[0]))
120 | 	if err := f.write(p); err != nil {
121 | 		t.Fatal(err)
122 | 	}
123 | 
124 | 	// Read the page back out.
125 | 	f2 := newFreelist()
126 | 	f2.read(p)
127 | 
128 | 	// Ensure that the freelist is correct.
129 | 	// All pages should be present and in reverse order.
130 | 	if exp := []pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.ids) {
131 | 		t.Fatalf("exp=%v; got=%v", exp, f2.ids)
132 | 	}
133 | }
134 | 
135 | func Benchmark_FreelistRelease10K(b *testing.B)    { benchmark_FreelistRelease(b, 10000) }
136 | func Benchmark_FreelistRelease100K(b *testing.B)   { benchmark_FreelistRelease(b, 100000) }
137 | func Benchmark_FreelistRelease1000K(b *testing.B)  { benchmark_FreelistRelease(b, 1000000) }
138 | func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) }
139 | 
140 | func benchmark_FreelistRelease(b *testing.B, size int) {
141 | 	ids := randomPgids(size)
142 | 	pending := randomPgids(len(ids) / 400)
143 | 	b.ResetTimer()
144 | 	for i := 0; i < b.N; i++ {
145 | 		f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}}
146 | 		f.release(1)
147 | 	}
148 | }
149 | 
150 | func randomPgids(n int) []pgid {
151 | 	rand.Seed(42)
152 | 	pgids := make(pgids, n)
153 | 	for i := range pgids {
154 | 		pgids[i] = pgid(rand.Int63())
155 | 	}
156 | 	sort.Sort(pgids)
157 | 	return pgids
158 | }
159 | 


--------------------------------------------------------------------------------
/node.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"sort"
  7 | 	"unsafe"
  8 | )
  9 | 
 10 | // node represents an in-memory, deserialized page.
 11 | type node struct {
 12 | 	bucket     *Bucket
 13 | 	isLeaf     bool
 14 | 	unbalanced bool
 15 | 	spilled    bool
 16 | 	key        []byte
 17 | 	pgid       pgid
 18 | 	parent     *node
 19 | 	children   nodes
 20 | 	inodes     inodes
 21 | }
 22 | 
 23 | // root returns the top-level node this node is attached to.
 24 | func (n *node) root() *node {
 25 | 	if n.parent == nil {
 26 | 		return n
 27 | 	}
 28 | 	return n.parent.root()
 29 | }
 30 | 
 31 | // minKeys returns the minimum number of inodes this node should have.
 32 | func (n *node) minKeys() int {
 33 | 	if n.isLeaf {
 34 | 		return 1
 35 | 	}
 36 | 	return 2
 37 | }
 38 | 
 39 | // size returns the size of the node after serialization.
 40 | func (n *node) size() int {
 41 | 	sz, elsz := pageHeaderSize, n.pageElementSize()
 42 | 	for i := 0; i < len(n.inodes); i++ {
 43 | 		item := &n.inodes[i]
 44 | 		sz += elsz + len(item.key) + len(item.value)
 45 | 	}
 46 | 	return sz
 47 | }
 48 | 
 49 | // sizeLessThan returns true if the node is less than a given size.
 50 | // This is an optimization to avoid calculating a large node when we only need
 51 | // to know if it fits inside a certain page size.
 52 | func (n *node) sizeLessThan(v int) bool {
 53 | 	sz, elsz := pageHeaderSize, n.pageElementSize()
 54 | 	for i := 0; i < len(n.inodes); i++ {
 55 | 		item := &n.inodes[i]
 56 | 		sz += elsz + len(item.key) + len(item.value)
 57 | 		if sz >= v {
 58 | 			return false
 59 | 		}
 60 | 	}
 61 | 	return true
 62 | }
 63 | 
 64 | // pageElementSize returns the size of each page element based on the type of node.
 65 | func (n *node) pageElementSize() int {
 66 | 	if n.isLeaf {
 67 | 		return leafPageElementSize
 68 | 	}
 69 | 	return branchPageElementSize
 70 | }
 71 | 
 72 | // childAt returns the child node at a given index.
 73 | func (n *node) childAt(index int) *node {
 74 | 	if n.isLeaf {
 75 | 		panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
 76 | 	}
 77 | 	return n.bucket.node(n.inodes[index].pgid, n)
 78 | }
 79 | 
 80 | // childIndex returns the index of a given child node.
 81 | func (n *node) childIndex(child *node) int {
 82 | 	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
 83 | 	return index
 84 | }
 85 | 
 86 | // numChildren returns the number of children.
 87 | func (n *node) numChildren() int {
 88 | 	return len(n.inodes)
 89 | }
 90 | 
 91 | // nextSibling returns the next node with the same parent.
 92 | func (n *node) nextSibling() *node {
 93 | 	if n.parent == nil {
 94 | 		return nil
 95 | 	}
 96 | 	index := n.parent.childIndex(n)
 97 | 	if index >= n.parent.numChildren()-1 {
 98 | 		return nil
 99 | 	}
100 | 	return n.parent.childAt(index + 1)
101 | }
102 | 
103 | // prevSibling returns the previous node with the same parent.
104 | func (n *node) prevSibling() *node {
105 | 	if n.parent == nil {
106 | 		return nil
107 | 	}
108 | 	index := n.parent.childIndex(n)
109 | 	if index == 0 {
110 | 		return nil
111 | 	}
112 | 	return n.parent.childAt(index - 1)
113 | }
114 | 
115 | // put inserts a key/value.
116 | func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
117 | 	if pgid >= n.bucket.tx.meta.pgid {
118 | 		panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
119 | 	} else if len(oldKey) <= 0 {
120 | 		panic("put: zero-length old key")
121 | 	} else if len(newKey) <= 0 {
122 | 		panic("put: zero-length new key")
123 | 	}
124 | 
125 | 	// Find insertion index.
126 | 	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
127 | 
128 | 	// Add capacity and shift nodes if we don't have an exact match and need to insert.
129 | 	exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
130 | 	if !exact {
131 | 		n.inodes = append(n.inodes, inode{})
132 | 		copy(n.inodes[index+1:], n.inodes[index:])
133 | 	}
134 | 
135 | 	inode := &n.inodes[index]
136 | 	inode.flags = flags
137 | 	inode.key = newKey
138 | 	inode.value = value
139 | 	inode.pgid = pgid
140 | 	_assert(len(inode.key) > 0, "put: zero-length inode key")
141 | }
142 | 
143 | // del removes a key from the node.
144 | func (n *node) del(key []byte) {
145 | 	// Find index of key.
146 | 	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
147 | 
148 | 	// Exit if the key isn't found.
149 | 	if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
150 | 		return
151 | 	}
152 | 
153 | 	// Delete inode from the node.
154 | 	n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
155 | 
156 | 	// Mark the node as needing rebalancing.
157 | 	n.unbalanced = true
158 | }
159 | 
160 | // read initializes the node from a page.
161 | func (n *node) read(p *page) {
162 | 	n.pgid = p.id
163 | 	n.isLeaf = ((p.flags & leafPageFlag) != 0)
164 | 	n.inodes = make(inodes, int(p.count))
165 | 
166 | 	for i := 0; i < int(p.count); i++ {
167 | 		inode := &n.inodes[i]
168 | 		if n.isLeaf {
169 | 			elem := p.leafPageElement(uint16(i))
170 | 			inode.flags = elem.flags
171 | 			inode.key = elem.key()
172 | 			inode.value = elem.value()
173 | 		} else {
174 | 			elem := p.branchPageElement(uint16(i))
175 | 			inode.pgid = elem.pgid
176 | 			inode.key = elem.key()
177 | 		}
178 | 		_assert(len(inode.key) > 0, "read: zero-length inode key")
179 | 	}
180 | 
181 | 	// Save first key so we can find the node in the parent when we spill.
182 | 	if len(n.inodes) > 0 {
183 | 		n.key = n.inodes[0].key
184 | 		_assert(len(n.key) > 0, "read: zero-length node key")
185 | 	} else {
186 | 		n.key = nil
187 | 	}
188 | }
189 | 
190 | // write writes the items onto one or more pages.
191 | func (n *node) write(p *page) {
192 | 	// Initialize page.
193 | 	if n.isLeaf {
194 | 		p.flags |= leafPageFlag
195 | 	} else {
196 | 		p.flags |= branchPageFlag
197 | 	}
198 | 
199 | 	if len(n.inodes) >= 0xFFFF {
200 | 		panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
201 | 	}
202 | 	p.count = uint16(len(n.inodes))
203 | 
204 | 	// Stop here if there are no items to write.
205 | 	if p.count == 0 {
206 | 		return
207 | 	}
208 | 
209 | 	// Loop over each item and write it to the page.
210 | 	b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
211 | 	for i, item := range n.inodes {
212 | 		_assert(len(item.key) > 0, "write: zero-length inode key")
213 | 
214 | 		// Write the page element.
215 | 		if n.isLeaf {
216 | 			elem := p.leafPageElement(uint16(i))
217 | 			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
218 | 			elem.flags = item.flags
219 | 			elem.ksize = uint32(len(item.key))
220 | 			elem.vsize = uint32(len(item.value))
221 | 		} else {
222 | 			elem := p.branchPageElement(uint16(i))
223 | 			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
224 | 			elem.ksize = uint32(len(item.key))
225 | 			elem.pgid = item.pgid
226 | 			_assert(elem.pgid != p.id, "write: circular dependency occurred")
227 | 		}
228 | 
229 | 		// If the length of key+value is larger than the max allocation size
230 | 		// then we need to reallocate the byte array pointer.
231 | 		//
232 | 		// See: https://github.com/boltdb/bolt/pull/335
233 | 		klen, vlen := len(item.key), len(item.value)
234 | 		if len(b) < klen+vlen {
235 | 			b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
236 | 		}
237 | 
238 | 		// Write data for the element to the end of the page.
239 | 		copy(b[0:], item.key)
240 | 		b = b[klen:]
241 | 		copy(b[0:], item.value)
242 | 		b = b[vlen:]
243 | 	}
244 | 
245 | 	// DEBUG ONLY: n.dump()
246 | }
247 | 
248 | // split breaks up a node into multiple smaller nodes, if appropriate.
249 | // This should only be called from the spill() function.
250 | func (n *node) split(pageSize int) []*node {
251 | 	var nodes []*node
252 | 
253 | 	node := n
254 | 	for {
255 | 		// Split node into two.
256 | 		a, b := node.splitTwo(pageSize)
257 | 		nodes = append(nodes, a)
258 | 
259 | 		// If we can't split then exit the loop.
260 | 		if b == nil {
261 | 			break
262 | 		}
263 | 
264 | 		// Set node to b so it gets split on the next iteration.
265 | 		node = b
266 | 	}
267 | 
268 | 	return nodes
269 | }
270 | 
271 | // splitTwo breaks up a node into two smaller nodes, if appropriate.
272 | // This should only be called from the split() function.
273 | func (n *node) splitTwo(pageSize int) (*node, *node) {
274 | 	// Ignore the split if the page doesn't have at least enough nodes for
275 | 	// two pages or if the nodes can fit in a single page.
276 | 	if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
277 | 		return n, nil
278 | 	}
279 | 
280 | 	// Determine the threshold before starting a new node.
281 | 	var fillPercent = n.bucket.FillPercent
282 | 	if fillPercent < minFillPercent {
283 | 		fillPercent = minFillPercent
284 | 	} else if fillPercent > maxFillPercent {
285 | 		fillPercent = maxFillPercent
286 | 	}
287 | 	threshold := int(float64(pageSize) * fillPercent)
288 | 
289 | 	// Determine split position and sizes of the two pages.
290 | 	splitIndex, _ := n.splitIndex(threshold)
291 | 
292 | 	// Split node into two separate nodes.
293 | 	// If there's no parent then we'll need to create one.
294 | 	if n.parent == nil {
295 | 		n.parent = &node{bucket: n.bucket, children: []*node{n}}
296 | 	}
297 | 
298 | 	// Create a new node and add it to the parent.
299 | 	next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
300 | 	n.parent.children = append(n.parent.children, next)
301 | 
302 | 	// Split inodes across two nodes.
303 | 	next.inodes = n.inodes[splitIndex:]
304 | 	n.inodes = n.inodes[:splitIndex]
305 | 
306 | 	// Update the statistics.
307 | 	n.bucket.tx.stats.Split++
308 | 
309 | 	return n, next
310 | }
311 | 
312 | // splitIndex finds the position where a page will fill a given threshold.
313 | // It returns the index as well as the size of the first page.
314 | // This is only be called from split().
315 | func (n *node) splitIndex(threshold int) (index, sz int) {
316 | 	sz = pageHeaderSize
317 | 
318 | 	// Loop until we only have the minimum number of keys required for the second page.
319 | 	for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
320 | 		index = i
321 | 		inode := n.inodes[i]
322 | 		elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
323 | 
324 | 		// If we have at least the minimum number of keys and adding another
325 | 		// node would put us over the threshold then exit and return.
326 | 		if i >= minKeysPerPage && sz+elsize > threshold {
327 | 			break
328 | 		}
329 | 
330 | 		// Add the element size to the total size.
331 | 		sz += elsize
332 | 	}
333 | 
334 | 	return
335 | }
336 | 
337 | // spill writes the nodes to dirty pages and splits nodes as it goes.
338 | // Returns an error if dirty pages cannot be allocated.
339 | func (n *node) spill() error {
340 | 	var tx = n.bucket.tx
341 | 	if n.spilled {
342 | 		return nil
343 | 	}
344 | 
345 | 	// Spill child nodes first. Child nodes can materialize sibling nodes in
346 | 	// the case of split-merge so we cannot use a range loop. We have to check
347 | 	// the children size on every loop iteration.
348 | 	sort.Sort(n.children)
349 | 	for i := 0; i < len(n.children); i++ {
350 | 		if err := n.children[i].spill(); err != nil {
351 | 			return err
352 | 		}
353 | 	}
354 | 
355 | 	// We no longer need the child list because it's only used for spill tracking.
356 | 	n.children = nil
357 | 
358 | 	// Split nodes into appropriate sizes. The first node will always be n.
359 | 	var nodes = n.split(tx.db.pageSize)
360 | 	for _, node := range nodes {
361 | 		// Add node's page to the freelist if it's not new.
362 | 		if node.pgid > 0 {
363 | 			tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
364 | 			node.pgid = 0
365 | 		}
366 | 
367 | 		// Allocate contiguous space for the node.
368 | 		p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
369 | 		if err != nil {
370 | 			return err
371 | 		}
372 | 
373 | 		// Write the node.
374 | 		if p.id >= tx.meta.pgid {
375 | 			panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
376 | 		}
377 | 		node.pgid = p.id
378 | 		node.write(p)
379 | 		node.spilled = true
380 | 
381 | 		// Insert into parent inodes.
382 | 		if node.parent != nil {
383 | 			var key = node.key
384 | 			if key == nil {
385 | 				key = node.inodes[0].key
386 | 			}
387 | 
388 | 			node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
389 | 			node.key = node.inodes[0].key
390 | 			_assert(len(node.key) > 0, "spill: zero-length node key")
391 | 		}
392 | 
393 | 		// Update the statistics.
394 | 		tx.stats.Spill++
395 | 	}
396 | 
397 | 	// If the root node split and created a new root then we need to spill that
398 | 	// as well. We'll clear out the children to make sure it doesn't try to respill.
399 | 	if n.parent != nil && n.parent.pgid == 0 {
400 | 		n.children = nil
401 | 		return n.parent.spill()
402 | 	}
403 | 
404 | 	return nil
405 | }
406 | 
407 | // rebalance attempts to combine the node with sibling nodes if the node fill
408 | // size is below a threshold or if there are not enough keys.
409 | func (n *node) rebalance() {
410 | 	if !n.unbalanced {
411 | 		return
412 | 	}
413 | 	n.unbalanced = false
414 | 
415 | 	// Update statistics.
416 | 	n.bucket.tx.stats.Rebalance++
417 | 
418 | 	// Ignore if node is above threshold (25%) and has enough keys.
419 | 	var threshold = n.bucket.tx.db.pageSize / 4
420 | 	if n.size() > threshold && len(n.inodes) > n.minKeys() {
421 | 		return
422 | 	}
423 | 
424 | 	// Root node has special handling.
425 | 	if n.parent == nil {
426 | 		// If root node is a branch and only has one node then collapse it.
427 | 		if !n.isLeaf && len(n.inodes) == 1 {
428 | 			// Move root's child up.
429 | 			child := n.bucket.node(n.inodes[0].pgid, n)
430 | 			n.isLeaf = child.isLeaf
431 | 			n.inodes = child.inodes[:]
432 | 			n.children = child.children
433 | 
434 | 			// Reparent all child nodes being moved.
435 | 			for _, inode := range n.inodes {
436 | 				if child, ok := n.bucket.nodes[inode.pgid]; ok {
437 | 					child.parent = n
438 | 				}
439 | 			}
440 | 
441 | 			// Remove old child.
442 | 			child.parent = nil
443 | 			delete(n.bucket.nodes, child.pgid)
444 | 			child.free()
445 | 		}
446 | 
447 | 		return
448 | 	}
449 | 
450 | 	// If node has no keys then just remove it.
451 | 	if n.numChildren() == 0 {
452 | 		n.parent.del(n.key)
453 | 		n.parent.removeChild(n)
454 | 		delete(n.bucket.nodes, n.pgid)
455 | 		n.free()
456 | 		n.parent.rebalance()
457 | 		return
458 | 	}
459 | 
460 | 	_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
461 | 
462 | 	// Destination node is right sibling if idx == 0, otherwise left sibling.
463 | 	var target *node
464 | 	var useNextSibling = (n.parent.childIndex(n) == 0)
465 | 	if useNextSibling {
466 | 		target = n.nextSibling()
467 | 	} else {
468 | 		target = n.prevSibling()
469 | 	}
470 | 
471 | 	// If both this node and the target node are too small then merge them.
472 | 	if useNextSibling {
473 | 		// Reparent all child nodes being moved.
474 | 		for _, inode := range target.inodes {
475 | 			if child, ok := n.bucket.nodes[inode.pgid]; ok {
476 | 				child.parent.removeChild(child)
477 | 				child.parent = n
478 | 				child.parent.children = append(child.parent.children, child)
479 | 			}
480 | 		}
481 | 
482 | 		// Copy over inodes from target and remove target.
483 | 		n.inodes = append(n.inodes, target.inodes...)
484 | 		n.parent.del(target.key)
485 | 		n.parent.removeChild(target)
486 | 		delete(n.bucket.nodes, target.pgid)
487 | 		target.free()
488 | 	} else {
489 | 		// Reparent all child nodes being moved.
490 | 		for _, inode := range n.inodes {
491 | 			if child, ok := n.bucket.nodes[inode.pgid]; ok {
492 | 				child.parent.removeChild(child)
493 | 				child.parent = target
494 | 				child.parent.children = append(child.parent.children, child)
495 | 			}
496 | 		}
497 | 
498 | 		// Copy over inodes to target and remove node.
499 | 		target.inodes = append(target.inodes, n.inodes...)
500 | 		n.parent.del(n.key)
501 | 		n.parent.removeChild(n)
502 | 		delete(n.bucket.nodes, n.pgid)
503 | 		n.free()
504 | 	}
505 | 
506 | 	// Either this node or the target node was deleted from the parent so rebalance it.
507 | 	n.parent.rebalance()
508 | }
509 | 
510 | // removes a node from the list of in-memory children.
511 | // This does not affect the inodes.
512 | func (n *node) removeChild(target *node) {
513 | 	for i, child := range n.children {
514 | 		if child == target {
515 | 			n.children = append(n.children[:i], n.children[i+1:]...)
516 | 			return
517 | 		}
518 | 	}
519 | }
520 | 
521 | // dereference causes the node to copy all its inode key/value references to heap memory.
522 | // This is required when the mmap is reallocated so inodes are not pointing to stale data.
523 | func (n *node) dereference() {
524 | 	if n.key != nil {
525 | 		key := make([]byte, len(n.key))
526 | 		copy(key, n.key)
527 | 		n.key = key
528 | 		_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
529 | 	}
530 | 
531 | 	for i := range n.inodes {
532 | 		inode := &n.inodes[i]
533 | 
534 | 		key := make([]byte, len(inode.key))
535 | 		copy(key, inode.key)
536 | 		inode.key = key
537 | 		_assert(len(inode.key) > 0, "dereference: zero-length inode key")
538 | 
539 | 		value := make([]byte, len(inode.value))
540 | 		copy(value, inode.value)
541 | 		inode.value = value
542 | 	}
543 | 
544 | 	// Recursively dereference children.
545 | 	for _, child := range n.children {
546 | 		child.dereference()
547 | 	}
548 | 
549 | 	// Update statistics.
550 | 	n.bucket.tx.stats.NodeDeref++
551 | }
552 | 
553 | // free adds the node's underlying page to the freelist.
554 | func (n *node) free() {
555 | 	if n.pgid != 0 {
556 | 		n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
557 | 		n.pgid = 0
558 | 	}
559 | }
560 | 
561 | // dump writes the contents of the node to STDERR for debugging purposes.
562 | /*
563 | func (n *node) dump() {
564 | 	// Write node header.
565 | 	var typ = "branch"
566 | 	if n.isLeaf {
567 | 		typ = "leaf"
568 | 	}
569 | 	warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
570 | 
571 | 	// Write out abbreviated version of each item.
572 | 	for _, item := range n.inodes {
573 | 		if n.isLeaf {
574 | 			if item.flags&bucketLeafFlag != 0 {
575 | 				bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
576 | 				warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
577 | 			} else {
578 | 				warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
579 | 			}
580 | 		} else {
581 | 			warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
582 | 		}
583 | 	}
584 | 	warn("")
585 | }
586 | */
587 | 
588 | type nodes []*node
589 | 
590 | func (s nodes) Len() int           { return len(s) }
591 | func (s nodes) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
592 | func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
593 | 
594 | // inode represents an internal node inside of a node.
595 | // It can be used to point to elements in a page or point
596 | // to an element which hasn't been added to a page yet.
597 | type inode struct {
598 | 	flags uint32
599 | 	pgid  pgid
600 | 	key   []byte
601 | 	value []byte
602 | }
603 | 
604 | type inodes []inode
605 | 


--------------------------------------------------------------------------------
/node_test.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"unsafe"
  6 | )
  7 | 
  8 | // Ensure that a node can insert a key/value.
  9 | func TestNode_put(t *testing.T) {
 10 | 	n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: &meta{pgid: 1}}}}
 11 | 	n.put([]byte("baz"), []byte("baz"), []byte("2"), 0, 0)
 12 | 	n.put([]byte("foo"), []byte("foo"), []byte("0"), 0, 0)
 13 | 	n.put([]byte("bar"), []byte("bar"), []byte("1"), 0, 0)
 14 | 	n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, leafPageFlag)
 15 | 
 16 | 	if len(n.inodes) != 3 {
 17 | 		t.Fatalf("exp=3; got=%d", len(n.inodes))
 18 | 	}
 19 | 	if k, v := n.inodes[0].key, n.inodes[0].value; string(k) != "bar" || string(v) != "1" {
 20 | 		t.Fatalf("exp=<bar,1>; got=<%s,%s>", k, v)
 21 | 	}
 22 | 	if k, v := n.inodes[1].key, n.inodes[1].value; string(k) != "baz" || string(v) != "2" {
 23 | 		t.Fatalf("exp=<baz,2>; got=<%s,%s>", k, v)
 24 | 	}
 25 | 	if k, v := n.inodes[2].key, n.inodes[2].value; string(k) != "foo" || string(v) != "3" {
 26 | 		t.Fatalf("exp=<foo,3>; got=<%s,%s>", k, v)
 27 | 	}
 28 | 	if n.inodes[2].flags != uint32(leafPageFlag) {
 29 | 		t.Fatalf("not a leaf: %d", n.inodes[2].flags)
 30 | 	}
 31 | }
 32 | 
 33 | // Ensure that a node can deserialize from a leaf page.
 34 | func TestNode_read_LeafPage(t *testing.T) {
 35 | 	// Create a page.
 36 | 	var buf [4096]byte
 37 | 	page := (*page)(unsafe.Pointer(&buf[0]))
 38 | 	page.flags = leafPageFlag
 39 | 	page.count = 2
 40 | 
 41 | 	// Insert 2 elements at the beginning. sizeof(leafPageElement) == 16
 42 | 	nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr))
 43 | 	nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4}  // pos = sizeof(leafPageElement) * 2
 44 | 	nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
 45 | 
 46 | 	// Write data for the nodes at the end.
 47 | 	data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
 48 | 	copy(data[:], []byte("barfooz"))
 49 | 	copy(data[7:], []byte("helloworldbye"))
 50 | 
 51 | 	// Deserialize page into a leaf.
 52 | 	n := &node{}
 53 | 	n.read(page)
 54 | 
 55 | 	// Check that there are two inodes with correct data.
 56 | 	if !n.isLeaf {
 57 | 		t.Fatal("expected leaf")
 58 | 	}
 59 | 	if len(n.inodes) != 2 {
 60 | 		t.Fatalf("exp=2; got=%d", len(n.inodes))
 61 | 	}
 62 | 	if k, v := n.inodes[0].key, n.inodes[0].value; string(k) != "bar" || string(v) != "fooz" {
 63 | 		t.Fatalf("exp=<bar,fooz>; got=<%s,%s>", k, v)
 64 | 	}
 65 | 	if k, v := n.inodes[1].key, n.inodes[1].value; string(k) != "helloworld" || string(v) != "bye" {
 66 | 		t.Fatalf("exp=<helloworld,bye>; got=<%s,%s>", k, v)
 67 | 	}
 68 | }
 69 | 
 70 | // Ensure that a node can serialize into a leaf page.
 71 | func TestNode_write_LeafPage(t *testing.T) {
 72 | 	// Create a node.
 73 | 	n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
 74 | 	n.put([]byte("susy"), []byte("susy"), []byte("que"), 0, 0)
 75 | 	n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0, 0)
 76 | 	n.put([]byte("john"), []byte("john"), []byte("johnson"), 0, 0)
 77 | 
 78 | 	// Write it to a page.
 79 | 	var buf [4096]byte
 80 | 	p := (*page)(unsafe.Pointer(&buf[0]))
 81 | 	n.write(p)
 82 | 
 83 | 	// Read the page back in.
 84 | 	n2 := &node{}
 85 | 	n2.read(p)
 86 | 
 87 | 	// Check that the two pages are the same.
 88 | 	if len(n2.inodes) != 3 {
 89 | 		t.Fatalf("exp=3; got=%d", len(n2.inodes))
 90 | 	}
 91 | 	if k, v := n2.inodes[0].key, n2.inodes[0].value; string(k) != "john" || string(v) != "johnson" {
 92 | 		t.Fatalf("exp=<john,johnson>; got=<%s,%s>", k, v)
 93 | 	}
 94 | 	if k, v := n2.inodes[1].key, n2.inodes[1].value; string(k) != "ricki" || string(v) != "lake" {
 95 | 		t.Fatalf("exp=<ricki,lake>; got=<%s,%s>", k, v)
 96 | 	}
 97 | 	if k, v := n2.inodes[2].key, n2.inodes[2].value; string(k) != "susy" || string(v) != "que" {
 98 | 		t.Fatalf("exp=<susy,que>; got=<%s,%s>", k, v)
 99 | 	}
100 | }
101 | 
102 | // Ensure that a node can split into appropriate subgroups.
103 | func TestNode_split(t *testing.T) {
104 | 	// Create a node.
105 | 	n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
106 | 	n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
107 | 	n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
108 | 	n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0)
109 | 	n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0, 0)
110 | 	n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0, 0)
111 | 
112 | 	// Split between 2 & 3.
113 | 	n.split(100)
114 | 
115 | 	var parent = n.parent
116 | 	if len(parent.children) != 2 {
117 | 		t.Fatalf("exp=2; got=%d", len(parent.children))
118 | 	}
119 | 	if len(parent.children[0].inodes) != 2 {
120 | 		t.Fatalf("exp=2; got=%d", len(parent.children[0].inodes))
121 | 	}
122 | 	if len(parent.children[1].inodes) != 3 {
123 | 		t.Fatalf("exp=3; got=%d", len(parent.children[1].inodes))
124 | 	}
125 | }
126 | 
127 | // Ensure that a page with the minimum number of inodes just returns a single node.
128 | func TestNode_split_MinKeys(t *testing.T) {
129 | 	// Create a node.
130 | 	n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
131 | 	n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
132 | 	n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
133 | 
134 | 	// Split.
135 | 	n.split(20)
136 | 	if n.parent != nil {
137 | 		t.Fatalf("expected nil parent")
138 | 	}
139 | }
140 | 
141 | // Ensure that a node that has keys that all fit on a page just returns one leaf.
142 | func TestNode_split_SinglePage(t *testing.T) {
143 | 	// Create a node.
144 | 	n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
145 | 	n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
146 | 	n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
147 | 	n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0)
148 | 	n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0, 0)
149 | 	n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0, 0)
150 | 
151 | 	// Split.
152 | 	n.split(4096)
153 | 	if n.parent != nil {
154 | 		t.Fatalf("expected nil parent")
155 | 	}
156 | }
157 | 


--------------------------------------------------------------------------------
/page.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"sort"
  7 | 	"unsafe"
  8 | )
  9 | 
 10 | const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
 11 | 
 12 | const minKeysPerPage = 2
 13 | 
 14 | const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
 15 | const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
 16 | 
 17 | const (
 18 | 	branchPageFlag   = 0x01
 19 | 	leafPageFlag     = 0x02
 20 | 	metaPageFlag     = 0x04
 21 | 	freelistPageFlag = 0x10
 22 | )
 23 | 
 24 | const (
 25 | 	bucketLeafFlag = 0x01
 26 | )
 27 | 
 28 | type pgid uint64
 29 | 
 30 | type page struct {
 31 | 	id       pgid
 32 | 	flags    uint16
 33 | 	count    uint16
 34 | 	overflow uint32
 35 | 	ptr      uintptr
 36 | }
 37 | 
 38 | // typ returns a human readable page type string used for debugging.
 39 | func (p *page) typ() string {
 40 | 	if (p.flags & branchPageFlag) != 0 {
 41 | 		return "branch"
 42 | 	} else if (p.flags & leafPageFlag) != 0 {
 43 | 		return "leaf"
 44 | 	} else if (p.flags & metaPageFlag) != 0 {
 45 | 		return "meta"
 46 | 	} else if (p.flags & freelistPageFlag) != 0 {
 47 | 		return "freelist"
 48 | 	}
 49 | 	return fmt.Sprintf("unknown<%02x>", p.flags)
 50 | }
 51 | 
 52 | // meta returns a pointer to the metadata section of the page.
 53 | func (p *page) meta() *meta {
 54 | 	return (*meta)(unsafe.Pointer(&p.ptr))
 55 | }
 56 | 
 57 | // leafPageElement retrieves the leaf node by index
 58 | func (p *page) leafPageElement(index uint16) *leafPageElement {
 59 | 	n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
 60 | 	return n
 61 | }
 62 | 
 63 | // leafPageElements retrieves a list of leaf nodes.
 64 | func (p *page) leafPageElements() []leafPageElement {
 65 | 	if p.count == 0 {
 66 | 		return nil
 67 | 	}
 68 | 	return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
 69 | }
 70 | 
 71 | // branchPageElement retrieves the branch node by index
 72 | func (p *page) branchPageElement(index uint16) *branchPageElement {
 73 | 	return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
 74 | }
 75 | 
 76 | // branchPageElements retrieves a list of branch nodes.
 77 | func (p *page) branchPageElements() []branchPageElement {
 78 | 	if p.count == 0 {
 79 | 		return nil
 80 | 	}
 81 | 	return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
 82 | }
 83 | 
 84 | // dump writes n bytes of the page to STDERR as hex output.
 85 | func (p *page) hexdump(n int) {
 86 | 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
 87 | 	fmt.Fprintf(os.Stderr, "%x\n", buf)
 88 | }
 89 | 
 90 | type pages []*page
 91 | 
 92 | func (s pages) Len() int           { return len(s) }
 93 | func (s pages) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
 94 | func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
 95 | 
 96 | // branchPageElement represents a node on a branch page.
 97 | type branchPageElement struct {
 98 | 	pos   uint32
 99 | 	ksize uint32
100 | 	pgid  pgid
101 | }
102 | 
103 | // key returns a byte slice of the node key.
104 | func (n *branchPageElement) key() []byte {
105 | 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
106 | 	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
107 | }
108 | 
109 | // leafPageElement represents a node on a leaf page.
110 | type leafPageElement struct {
111 | 	flags uint32
112 | 	pos   uint32
113 | 	ksize uint32
114 | 	vsize uint32
115 | }
116 | 
117 | // key returns a byte slice of the node key.
118 | func (n *leafPageElement) key() []byte {
119 | 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
120 | 	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
121 | }
122 | 
123 | // value returns a byte slice of the node value.
124 | func (n *leafPageElement) value() []byte {
125 | 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
126 | 	return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
127 | }
128 | 
129 | // PageInfo represents human readable information about a page.
130 | type PageInfo struct {
131 | 	ID            int
132 | 	Type          string
133 | 	Count         int
134 | 	OverflowCount int
135 | }
136 | 
137 | type pgids []pgid
138 | 
139 | func (s pgids) Len() int           { return len(s) }
140 | func (s pgids) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
141 | func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
142 | 
143 | // merge returns the sorted union of a and b.
144 | func (a pgids) merge(b pgids) pgids {
145 | 	// Return the opposite slice if one is nil.
146 | 	if len(a) == 0 {
147 | 		return b
148 | 	}
149 | 	if len(b) == 0 {
150 | 		return a
151 | 	}
152 | 	merged := make(pgids, len(a)+len(b))
153 | 	mergepgids(merged, a, b)
154 | 	return merged
155 | }
156 | 
157 | // mergepgids copies the sorted union of a and b into dst.
158 | // If dst is too small, it panics.
159 | func mergepgids(dst, a, b pgids) {
160 | 	if len(dst) < len(a)+len(b) {
161 | 		panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
162 | 	}
163 | 	// Copy in the opposite slice if one is nil.
164 | 	if len(a) == 0 {
165 | 		copy(dst, b)
166 | 		return
167 | 	}
168 | 	if len(b) == 0 {
169 | 		copy(dst, a)
170 | 		return
171 | 	}
172 | 
173 | 	// Merged will hold all elements from both lists.
174 | 	merged := dst[:0]
175 | 
176 | 	// Assign lead to the slice with a lower starting value, follow to the higher value.
177 | 	lead, follow := a, b
178 | 	if b[0] < a[0] {
179 | 		lead, follow = b, a
180 | 	}
181 | 
182 | 	// Continue while there are elements in the lead.
183 | 	for len(lead) > 0 {
184 | 		// Merge largest prefix of lead that is ahead of follow[0].
185 | 		n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
186 | 		merged = append(merged, lead[:n]...)
187 | 		if n >= len(lead) {
188 | 			break
189 | 		}
190 | 
191 | 		// Swap lead and follow.
192 | 		lead, follow = follow, lead[n:]
193 | 	}
194 | 
195 | 	// Append what's left in follow.
196 | 	_ = append(merged, follow...)
197 | }
198 | 


--------------------------------------------------------------------------------
/page_test.go:
--------------------------------------------------------------------------------
 1 | package bolt
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"sort"
 6 | 	"testing"
 7 | 	"testing/quick"
 8 | )
 9 | 
10 | // Ensure that the page type can be returned in human readable format.
11 | func TestPage_typ(t *testing.T) {
12 | 	if typ := (&page{flags: branchPageFlag}).typ(); typ != "branch" {
13 | 		t.Fatalf("exp=branch; got=%v", typ)
14 | 	}
15 | 	if typ := (&page{flags: leafPageFlag}).typ(); typ != "leaf" {
16 | 		t.Fatalf("exp=leaf; got=%v", typ)
17 | 	}
18 | 	if typ := (&page{flags: metaPageFlag}).typ(); typ != "meta" {
19 | 		t.Fatalf("exp=meta; got=%v", typ)
20 | 	}
21 | 	if typ := (&page{flags: freelistPageFlag}).typ(); typ != "freelist" {
22 | 		t.Fatalf("exp=freelist; got=%v", typ)
23 | 	}
24 | 	if typ := (&page{flags: 20000}).typ(); typ != "unknown<4e20>" {
25 | 		t.Fatalf("exp=unknown<4e20>; got=%v", typ)
26 | 	}
27 | }
28 | 
29 | // Ensure that the hexdump debugging function doesn't blow up.
30 | func TestPage_dump(t *testing.T) {
31 | 	(&page{id: 256}).hexdump(16)
32 | }
33 | 
34 | func TestPgids_merge(t *testing.T) {
35 | 	a := pgids{4, 5, 6, 10, 11, 12, 13, 27}
36 | 	b := pgids{1, 3, 8, 9, 25, 30}
37 | 	c := a.merge(b)
38 | 	if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
39 | 		t.Errorf("mismatch: %v", c)
40 | 	}
41 | 
42 | 	a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
43 | 	b = pgids{8, 9, 25, 30}
44 | 	c = a.merge(b)
45 | 	if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
46 | 		t.Errorf("mismatch: %v", c)
47 | 	}
48 | }
49 | 
50 | func TestPgids_merge_quick(t *testing.T) {
51 | 	if err := quick.Check(func(a, b pgids) bool {
52 | 		// Sort incoming lists.
53 | 		sort.Sort(a)
54 | 		sort.Sort(b)
55 | 
56 | 		// Merge the two lists together.
57 | 		got := a.merge(b)
58 | 
59 | 		// The expected value should be the two lists combined and sorted.
60 | 		exp := append(a, b...)
61 | 		sort.Sort(exp)
62 | 
63 | 		if !reflect.DeepEqual(exp, got) {
64 | 			t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
65 | 			return false
66 | 		}
67 | 
68 | 		return true
69 | 	}, nil); err != nil {
70 | 		t.Fatal(err)
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/quick_test.go:
--------------------------------------------------------------------------------
 1 | package bolt_test
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"math/rand"
 8 | 	"os"
 9 | 	"reflect"
10 | 	"testing/quick"
11 | 	"time"
12 | )
13 | 
14 | // testing/quick defaults to 5 iterations and a random seed.
15 | // You can override these settings from the command line:
16 | //
17 | //   -quick.count     The number of iterations to perform.
18 | //   -quick.seed      The seed to use for randomizing.
19 | //   -quick.maxitems  The maximum number of items to insert into a DB.
20 | //   -quick.maxksize  The maximum size of a key.
21 | //   -quick.maxvsize  The maximum size of a value.
22 | //
23 | 
24 | var qcount, qseed, qmaxitems, qmaxksize, qmaxvsize int
25 | 
26 | func init() {
27 | 	flag.IntVar(&qcount, "quick.count", 5, "")
28 | 	flag.IntVar(&qseed, "quick.seed", int(time.Now().UnixNano())%100000, "")
29 | 	flag.IntVar(&qmaxitems, "quick.maxitems", 1000, "")
30 | 	flag.IntVar(&qmaxksize, "quick.maxksize", 1024, "")
31 | 	flag.IntVar(&qmaxvsize, "quick.maxvsize", 1024, "")
32 | 	flag.Parse()
33 | 	fmt.Fprintln(os.Stderr, "seed:", qseed)
34 | 	fmt.Fprintf(os.Stderr, "quick settings: count=%v, items=%v, ksize=%v, vsize=%v\n", qcount, qmaxitems, qmaxksize, qmaxvsize)
35 | }
36 | 
37 | func qconfig() *quick.Config {
38 | 	return &quick.Config{
39 | 		MaxCount: qcount,
40 | 		Rand:     rand.New(rand.NewSource(int64(qseed))),
41 | 	}
42 | }
43 | 
44 | type testdata []testdataitem
45 | 
46 | func (t testdata) Len() int           { return len(t) }
47 | func (t testdata) Swap(i, j int)      { t[i], t[j] = t[j], t[i] }
48 | func (t testdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == -1 }
49 | 
50 | func (t testdata) Generate(rand *rand.Rand, size int) reflect.Value {
51 | 	n := rand.Intn(qmaxitems-1) + 1
52 | 	items := make(testdata, n)
53 | 	used := make(map[string]bool)
54 | 	for i := 0; i < n; i++ {
55 | 		item := &items[i]
56 | 		// Ensure that keys are unique by looping until we find one that we have not already used.
57 | 		for {
58 | 			item.Key = randByteSlice(rand, 1, qmaxksize)
59 | 			if !used[string(item.Key)] {
60 | 				used[string(item.Key)] = true
61 | 				break
62 | 			}
63 | 		}
64 | 		item.Value = randByteSlice(rand, 0, qmaxvsize)
65 | 	}
66 | 	return reflect.ValueOf(items)
67 | }
68 | 
69 | type revtestdata []testdataitem
70 | 
71 | func (t revtestdata) Len() int           { return len(t) }
72 | func (t revtestdata) Swap(i, j int)      { t[i], t[j] = t[j], t[i] }
73 | func (t revtestdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == 1 }
74 | 
75 | type testdataitem struct {
76 | 	Key   []byte
77 | 	Value []byte
78 | }
79 | 
80 | func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte {
81 | 	n := rand.Intn(maxSize-minSize) + minSize
82 | 	b := make([]byte, n)
83 | 	for i := 0; i < n; i++ {
84 | 		b[i] = byte(rand.Intn(255))
85 | 	}
86 | 	return b
87 | }
88 | 


--------------------------------------------------------------------------------
/simulation_test.go:
--------------------------------------------------------------------------------
  1 | package bolt_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"math/rand"
  7 | 	"sync"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/boltdb/bolt"
 11 | )
 12 | 
 13 | func TestSimulate_1op_1p(t *testing.T)     { testSimulate(t, 1, 1) }
 14 | func TestSimulate_10op_1p(t *testing.T)    { testSimulate(t, 10, 1) }
 15 | func TestSimulate_100op_1p(t *testing.T)   { testSimulate(t, 100, 1) }
 16 | func TestSimulate_1000op_1p(t *testing.T)  { testSimulate(t, 1000, 1) }
 17 | func TestSimulate_10000op_1p(t *testing.T) { testSimulate(t, 10000, 1) }
 18 | 
 19 | func TestSimulate_10op_10p(t *testing.T)    { testSimulate(t, 10, 10) }
 20 | func TestSimulate_100op_10p(t *testing.T)   { testSimulate(t, 100, 10) }
 21 | func TestSimulate_1000op_10p(t *testing.T)  { testSimulate(t, 1000, 10) }
 22 | func TestSimulate_10000op_10p(t *testing.T) { testSimulate(t, 10000, 10) }
 23 | 
 24 | func TestSimulate_100op_100p(t *testing.T)   { testSimulate(t, 100, 100) }
 25 | func TestSimulate_1000op_100p(t *testing.T)  { testSimulate(t, 1000, 100) }
 26 | func TestSimulate_10000op_100p(t *testing.T) { testSimulate(t, 10000, 100) }
 27 | 
 28 | func TestSimulate_10000op_1000p(t *testing.T) { testSimulate(t, 10000, 1000) }
 29 | 
 30 | // Randomly generate operations on a given database with multiple clients to ensure consistency and thread safety.
 31 | func testSimulate(t *testing.T, threadCount, parallelism int) {
 32 | 	if testing.Short() {
 33 | 		t.Skip("skipping test in short mode.")
 34 | 	}
 35 | 
 36 | 	rand.Seed(int64(qseed))
 37 | 
 38 | 	// A list of operations that readers and writers can perform.
 39 | 	var readerHandlers = []simulateHandler{simulateGetHandler}
 40 | 	var writerHandlers = []simulateHandler{simulateGetHandler, simulatePutHandler}
 41 | 
 42 | 	var versions = make(map[int]*QuickDB)
 43 | 	versions[1] = NewQuickDB()
 44 | 
 45 | 	db := MustOpenDB()
 46 | 	defer db.MustClose()
 47 | 
 48 | 	var mutex sync.Mutex
 49 | 
 50 | 	// Run n threads in parallel, each with their own operation.
 51 | 	var wg sync.WaitGroup
 52 | 	var threads = make(chan bool, parallelism)
 53 | 	var i int
 54 | 	for {
 55 | 		threads <- true
 56 | 		wg.Add(1)
 57 | 		writable := ((rand.Int() % 100) < 20) // 20% writers
 58 | 
 59 | 		// Choose an operation to execute.
 60 | 		var handler simulateHandler
 61 | 		if writable {
 62 | 			handler = writerHandlers[rand.Intn(len(writerHandlers))]
 63 | 		} else {
 64 | 			handler = readerHandlers[rand.Intn(len(readerHandlers))]
 65 | 		}
 66 | 
 67 | 		// Execute a thread for the given operation.
 68 | 		go func(writable bool, handler simulateHandler) {
 69 | 			defer wg.Done()
 70 | 
 71 | 			// Start transaction.
 72 | 			tx, err := db.Begin(writable)
 73 | 			if err != nil {
 74 | 				t.Fatal("tx begin: ", err)
 75 | 			}
 76 | 
 77 | 			// Obtain current state of the dataset.
 78 | 			mutex.Lock()
 79 | 			var qdb = versions[tx.ID()]
 80 | 			if writable {
 81 | 				qdb = versions[tx.ID()-1].Copy()
 82 | 			}
 83 | 			mutex.Unlock()
 84 | 
 85 | 			// Make sure we commit/rollback the tx at the end and update the state.
 86 | 			if writable {
 87 | 				defer func() {
 88 | 					mutex.Lock()
 89 | 					versions[tx.ID()] = qdb
 90 | 					mutex.Unlock()
 91 | 
 92 | 					if err := tx.Commit(); err != nil {
 93 | 						t.Fatal(err)
 94 | 					}
 95 | 				}()
 96 | 			} else {
 97 | 				defer func() { _ = tx.Rollback() }()
 98 | 			}
 99 | 
100 | 			// Ignore operation if we don't have data yet.
101 | 			if qdb == nil {
102 | 				return
103 | 			}
104 | 
105 | 			// Execute handler.
106 | 			handler(tx, qdb)
107 | 
108 | 			// Release a thread back to the scheduling loop.
109 | 			<-threads
110 | 		}(writable, handler)
111 | 
112 | 		i++
113 | 		if i > threadCount {
114 | 			break
115 | 		}
116 | 	}
117 | 
118 | 	// Wait until all threads are done.
119 | 	wg.Wait()
120 | }
121 | 
122 | type simulateHandler func(tx *bolt.Tx, qdb *QuickDB)
123 | 
124 | // Retrieves a key from the database and verifies that it is what is expected.
125 | func simulateGetHandler(tx *bolt.Tx, qdb *QuickDB) {
126 | 	// Randomly retrieve an existing exist.
127 | 	keys := qdb.Rand()
128 | 	if len(keys) == 0 {
129 | 		return
130 | 	}
131 | 
132 | 	// Retrieve root bucket.
133 | 	b := tx.Bucket(keys[0])
134 | 	if b == nil {
135 | 		panic(fmt.Sprintf("bucket[0] expected: %08x\n", trunc(keys[0], 4)))
136 | 	}
137 | 
138 | 	// Drill into nested buckets.
139 | 	for _, key := range keys[1 : len(keys)-1] {
140 | 		b = b.Bucket(key)
141 | 		if b == nil {
142 | 			panic(fmt.Sprintf("bucket[n] expected: %v -> %v\n", keys, key))
143 | 		}
144 | 	}
145 | 
146 | 	// Verify key/value on the final bucket.
147 | 	expected := qdb.Get(keys)
148 | 	actual := b.Get(keys[len(keys)-1])
149 | 	if !bytes.Equal(actual, expected) {
150 | 		fmt.Println("=== EXPECTED ===")
151 | 		fmt.Println(expected)
152 | 		fmt.Println("=== ACTUAL ===")
153 | 		fmt.Println(actual)
154 | 		fmt.Println("=== END ===")
155 | 		panic("value mismatch")
156 | 	}
157 | }
158 | 
159 | // Inserts a key into the database.
160 | func simulatePutHandler(tx *bolt.Tx, qdb *QuickDB) {
161 | 	var err error
162 | 	keys, value := randKeys(), randValue()
163 | 
164 | 	// Retrieve root bucket.
165 | 	b := tx.Bucket(keys[0])
166 | 	if b == nil {
167 | 		b, err = tx.CreateBucket(keys[0])
168 | 		if err != nil {
169 | 			panic("create bucket: " + err.Error())
170 | 		}
171 | 	}
172 | 
173 | 	// Create nested buckets, if necessary.
174 | 	for _, key := range keys[1 : len(keys)-1] {
175 | 		child := b.Bucket(key)
176 | 		if child != nil {
177 | 			b = child
178 | 		} else {
179 | 			b, err = b.CreateBucket(key)
180 | 			if err != nil {
181 | 				panic("create bucket: " + err.Error())
182 | 			}
183 | 		}
184 | 	}
185 | 
186 | 	// Insert into database.
187 | 	if err := b.Put(keys[len(keys)-1], value); err != nil {
188 | 		panic("put: " + err.Error())
189 | 	}
190 | 
191 | 	// Insert into in-memory database.
192 | 	qdb.Put(keys, value)
193 | }
194 | 
195 | // QuickDB is an in-memory database that replicates the functionality of the
196 | // Bolt DB type except that it is entirely in-memory. It is meant for testing
197 | // that the Bolt database is consistent.
198 | type QuickDB struct {
199 | 	sync.RWMutex
200 | 	m map[string]interface{}
201 | }
202 | 
203 | // NewQuickDB returns an instance of QuickDB.
204 | func NewQuickDB() *QuickDB {
205 | 	return &QuickDB{m: make(map[string]interface{})}
206 | }
207 | 
208 | // Get retrieves the value at a key path.
209 | func (db *QuickDB) Get(keys [][]byte) []byte {
210 | 	db.RLock()
211 | 	defer db.RUnlock()
212 | 
213 | 	m := db.m
214 | 	for _, key := range keys[:len(keys)-1] {
215 | 		value := m[string(key)]
216 | 		if value == nil {
217 | 			return nil
218 | 		}
219 | 		switch value := value.(type) {
220 | 		case map[string]interface{}:
221 | 			m = value
222 | 		case []byte:
223 | 			return nil
224 | 		}
225 | 	}
226 | 
227 | 	// Only return if it's a simple value.
228 | 	if value, ok := m[string(keys[len(keys)-1])].([]byte); ok {
229 | 		return value
230 | 	}
231 | 	return nil
232 | }
233 | 
234 | // Put inserts a value into a key path.
235 | func (db *QuickDB) Put(keys [][]byte, value []byte) {
236 | 	db.Lock()
237 | 	defer db.Unlock()
238 | 
239 | 	// Build buckets all the way down the key path.
240 | 	m := db.m
241 | 	for _, key := range keys[:len(keys)-1] {
242 | 		if _, ok := m[string(key)].([]byte); ok {
243 | 			return // Keypath intersects with a simple value. Do nothing.
244 | 		}
245 | 
246 | 		if m[string(key)] == nil {
247 | 			m[string(key)] = make(map[string]interface{})
248 | 		}
249 | 		m = m[string(key)].(map[string]interface{})
250 | 	}
251 | 
252 | 	// Insert value into the last key.
253 | 	m[string(keys[len(keys)-1])] = value
254 | }
255 | 
256 | // Rand returns a random key path that points to a simple value.
257 | func (db *QuickDB) Rand() [][]byte {
258 | 	db.RLock()
259 | 	defer db.RUnlock()
260 | 	if len(db.m) == 0 {
261 | 		return nil
262 | 	}
263 | 	var keys [][]byte
264 | 	db.rand(db.m, &keys)
265 | 	return keys
266 | }
267 | 
268 | func (db *QuickDB) rand(m map[string]interface{}, keys *[][]byte) {
269 | 	i, index := 0, rand.Intn(len(m))
270 | 	for k, v := range m {
271 | 		if i == index {
272 | 			*keys = append(*keys, []byte(k))
273 | 			if v, ok := v.(map[string]interface{}); ok {
274 | 				db.rand(v, keys)
275 | 			}
276 | 			return
277 | 		}
278 | 		i++
279 | 	}
280 | 	panic("quickdb rand: out-of-range")
281 | }
282 | 
283 | // Copy copies the entire database.
284 | func (db *QuickDB) Copy() *QuickDB {
285 | 	db.RLock()
286 | 	defer db.RUnlock()
287 | 	return &QuickDB{m: db.copy(db.m)}
288 | }
289 | 
290 | func (db *QuickDB) copy(m map[string]interface{}) map[string]interface{} {
291 | 	clone := make(map[string]interface{}, len(m))
292 | 	for k, v := range m {
293 | 		switch v := v.(type) {
294 | 		case map[string]interface{}:
295 | 			clone[k] = db.copy(v)
296 | 		default:
297 | 			clone[k] = v
298 | 		}
299 | 	}
300 | 	return clone
301 | }
302 | 
303 | func randKey() []byte {
304 | 	var min, max = 1, 1024
305 | 	n := rand.Intn(max-min) + min
306 | 	b := make([]byte, n)
307 | 	for i := 0; i < n; i++ {
308 | 		b[i] = byte(rand.Intn(255))
309 | 	}
310 | 	return b
311 | }
312 | 
313 | func randKeys() [][]byte {
314 | 	var keys [][]byte
315 | 	var count = rand.Intn(2) + 2
316 | 	for i := 0; i < count; i++ {
317 | 		keys = append(keys, randKey())
318 | 	}
319 | 	return keys
320 | }
321 | 
322 | func randValue() []byte {
323 | 	n := rand.Intn(8192)
324 | 	b := make([]byte, n)
325 | 	for i := 0; i < n; i++ {
326 | 		b[i] = byte(rand.Intn(255))
327 | 	}
328 | 	return b
329 | }
330 | 


--------------------------------------------------------------------------------
/tx.go:
--------------------------------------------------------------------------------
  1 | package bolt
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"os"
  7 | 	"sort"
  8 | 	"strings"
  9 | 	"time"
 10 | 	"unsafe"
 11 | )
 12 | 
 13 | // txid represents the internal transaction identifier.
 14 | type txid uint64
 15 | 
 16 | // Tx represents a read-only or read/write transaction on the database.
 17 | // Read-only transactions can be used for retrieving values for keys and creating cursors.
 18 | // Read/write transactions can create and remove buckets and create and remove keys.
 19 | //
 20 | // IMPORTANT: You must commit or rollback transactions when you are done with
 21 | // them. Pages can not be reclaimed by the writer until no more transactions
 22 | // are using them. A long running read transaction can cause the database to
 23 | // quickly grow.
 24 | type Tx struct {
 25 | 	writable       bool
 26 | 	managed        bool
 27 | 	db             *DB
 28 | 	meta           *meta
 29 | 	root           Bucket
 30 | 	pages          map[pgid]*page
 31 | 	stats          TxStats
 32 | 	commitHandlers []func()
 33 | 
 34 | 	// WriteFlag specifies the flag for write-related methods like WriteTo().
 35 | 	// Tx opens the database file with the specified flag to copy the data.
 36 | 	//
 37 | 	// By default, the flag is unset, which works well for mostly in-memory
 38 | 	// workloads. For databases that are much larger than available RAM,
 39 | 	// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
 40 | 	WriteFlag int
 41 | }
 42 | 
 43 | // init initializes the transaction.
 44 | func (tx *Tx) init(db *DB) {
 45 | 	tx.db = db
 46 | 	tx.pages = nil
 47 | 
 48 | 	// Copy the meta page since it can be changed by the writer.
 49 | 	tx.meta = &meta{}
 50 | 	db.meta().copy(tx.meta)
 51 | 
 52 | 	// Copy over the root bucket.
 53 | 	tx.root = newBucket(tx)
 54 | 	tx.root.bucket = &bucket{}
 55 | 	*tx.root.bucket = tx.meta.root
 56 | 
 57 | 	// Increment the transaction id and add a page cache for writable transactions.
 58 | 	if tx.writable {
 59 | 		tx.pages = make(map[pgid]*page)
 60 | 		tx.meta.txid += txid(1)
 61 | 	}
 62 | }
 63 | 
 64 | // ID returns the transaction id.
 65 | func (tx *Tx) ID() int {
 66 | 	return int(tx.meta.txid)
 67 | }
 68 | 
 69 | // DB returns a reference to the database that created the transaction.
 70 | func (tx *Tx) DB() *DB {
 71 | 	return tx.db
 72 | }
 73 | 
 74 | // Size returns current database size in bytes as seen by this transaction.
 75 | func (tx *Tx) Size() int64 {
 76 | 	return int64(tx.meta.pgid) * int64(tx.db.pageSize)
 77 | }
 78 | 
 79 | // Writable returns whether the transaction can perform write operations.
 80 | func (tx *Tx) Writable() bool {
 81 | 	return tx.writable
 82 | }
 83 | 
 84 | // Cursor creates a cursor associated with the root bucket.
 85 | // All items in the cursor will return a nil value because all root bucket keys point to buckets.
 86 | // The cursor is only valid as long as the transaction is open.
 87 | // Do not use a cursor after the transaction is closed.
 88 | func (tx *Tx) Cursor() *Cursor {
 89 | 	return tx.root.Cursor()
 90 | }
 91 | 
 92 | // Stats retrieves a copy of the current transaction statistics.
 93 | func (tx *Tx) Stats() TxStats {
 94 | 	return tx.stats
 95 | }
 96 | 
 97 | // Bucket retrieves a bucket by name.
 98 | // Returns nil if the bucket does not exist.
 99 | // The bucket instance is only valid for the lifetime of the transaction.
100 | func (tx *Tx) Bucket(name []byte) *Bucket {
101 | 	return tx.root.Bucket(name)
102 | }
103 | 
104 | // CreateBucket creates a new bucket.
105 | // Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
106 | // The bucket instance is only valid for the lifetime of the transaction.
107 | func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
108 | 	return tx.root.CreateBucket(name)
109 | }
110 | 
111 | // CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
112 | // Returns an error if the bucket name is blank, or if the bucket name is too long.
113 | // The bucket instance is only valid for the lifetime of the transaction.
114 | func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
115 | 	return tx.root.CreateBucketIfNotExists(name)
116 | }
117 | 
118 | // DeleteBucket deletes a bucket.
119 | // Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
120 | func (tx *Tx) DeleteBucket(name []byte) error {
121 | 	return tx.root.DeleteBucket(name)
122 | }
123 | 
124 | // ForEach executes a function for each bucket in the root.
125 | // If the provided function returns an error then the iteration is stopped and
126 | // the error is returned to the caller.
127 | func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
128 | 	return tx.root.ForEach(func(k, v []byte) error {
129 | 		if err := fn(k, tx.root.Bucket(k)); err != nil {
130 | 			return err
131 | 		}
132 | 		return nil
133 | 	})
134 | }
135 | 
136 | // OnCommit adds a handler function to be executed after the transaction successfully commits.
137 | func (tx *Tx) OnCommit(fn func()) {
138 | 	tx.commitHandlers = append(tx.commitHandlers, fn)
139 | }
140 | 
141 | // Commit writes all changes to disk and updates the meta page.
142 | // Returns an error if a disk write error occurs, or if Commit is
143 | // called on a read-only transaction.
144 | func (tx *Tx) Commit() error {
145 | 	_assert(!tx.managed, "managed tx commit not allowed")
146 | 	if tx.db == nil {
147 | 		return ErrTxClosed
148 | 	} else if !tx.writable {
149 | 		return ErrTxNotWritable
150 | 	}
151 | 
152 | 	// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
153 | 
154 | 	// Rebalance nodes which have had deletions.
155 | 	var startTime = time.Now()
156 | 	tx.root.rebalance()
157 | 	if tx.stats.Rebalance > 0 {
158 | 		tx.stats.RebalanceTime += time.Since(startTime)
159 | 	}
160 | 
161 | 	// spill data onto dirty pages.
162 | 	startTime = time.Now()
163 | 	if err := tx.root.spill(); err != nil {
164 | 		tx.rollback()
165 | 		return err
166 | 	}
167 | 	tx.stats.SpillTime += time.Since(startTime)
168 | 
169 | 	// Free the old root bucket.
170 | 	tx.meta.root.root = tx.root.root
171 | 
172 | 	opgid := tx.meta.pgid
173 | 
174 | 	// Free the freelist and allocate new pages for it. This will overestimate
175 | 	// the size of the freelist but not underestimate the size (which would be bad).
176 | 	tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
177 | 	p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
178 | 	if err != nil {
179 | 		tx.rollback()
180 | 		return err
181 | 	}
182 | 	if err := tx.db.freelist.write(p); err != nil {
183 | 		tx.rollback()
184 | 		return err
185 | 	}
186 | 	tx.meta.freelist = p.id
187 | 
188 | 	// If the high water mark has moved up then attempt to grow the database.
189 | 	if tx.meta.pgid > opgid {
190 | 		if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
191 | 			tx.rollback()
192 | 			return err
193 | 		}
194 | 	}
195 | 
196 | 	// Write dirty pages to disk.
197 | 	startTime = time.Now()
198 | 	if err := tx.write(); err != nil {
199 | 		tx.rollback()
200 | 		return err
201 | 	}
202 | 
203 | 	// If strict mode is enabled then perform a consistency check.
204 | 	// Only the first consistency error is reported in the panic.
205 | 	if tx.db.StrictMode {
206 | 		ch := tx.Check()
207 | 		var errs []string
208 | 		for {
209 | 			err, ok := <-ch
210 | 			if !ok {
211 | 				break
212 | 			}
213 | 			errs = append(errs, err.Error())
214 | 		}
215 | 		if len(errs) > 0 {
216 | 			panic("check fail: " + strings.Join(errs, "\n"))
217 | 		}
218 | 	}
219 | 
220 | 	// Write meta to disk.
221 | 	if err := tx.writeMeta(); err != nil {
222 | 		tx.rollback()
223 | 		return err
224 | 	}
225 | 	tx.stats.WriteTime += time.Since(startTime)
226 | 
227 | 	// Finalize the transaction.
228 | 	tx.close()
229 | 
230 | 	// Execute commit handlers now that the locks have been removed.
231 | 	for _, fn := range tx.commitHandlers {
232 | 		fn()
233 | 	}
234 | 
235 | 	return nil
236 | }
237 | 
238 | // Rollback closes the transaction and ignores all previous updates. Read-only
239 | // transactions must be rolled back and not committed.
240 | func (tx *Tx) Rollback() error {
241 | 	_assert(!tx.managed, "managed tx rollback not allowed")
242 | 	if tx.db == nil {
243 | 		return ErrTxClosed
244 | 	}
245 | 	tx.rollback()
246 | 	return nil
247 | }
248 | 
249 | func (tx *Tx) rollback() {
250 | 	if tx.db == nil {
251 | 		return
252 | 	}
253 | 	if tx.writable {
254 | 		tx.db.freelist.rollback(tx.meta.txid)
255 | 		tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
256 | 	}
257 | 	tx.close()
258 | }
259 | 
260 | func (tx *Tx) close() {
261 | 	if tx.db == nil {
262 | 		return
263 | 	}
264 | 	if tx.writable {
265 | 		// Grab freelist stats.
266 | 		var freelistFreeN = tx.db.freelist.free_count()
267 | 		var freelistPendingN = tx.db.freelist.pending_count()
268 | 		var freelistAlloc = tx.db.freelist.size()
269 | 
270 | 		// Remove transaction ref & writer lock.
271 | 		tx.db.rwtx = nil
272 | 		tx.db.rwlock.Unlock()
273 | 
274 | 		// Merge statistics.
275 | 		tx.db.statlock.Lock()
276 | 		tx.db.stats.FreePageN = freelistFreeN
277 | 		tx.db.stats.PendingPageN = freelistPendingN
278 | 		tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
279 | 		tx.db.stats.FreelistInuse = freelistAlloc
280 | 		tx.db.stats.TxStats.add(&tx.stats)
281 | 		tx.db.statlock.Unlock()
282 | 	} else {
283 | 		tx.db.removeTx(tx)
284 | 	}
285 | 
286 | 	// Clear all references.
287 | 	tx.db = nil
288 | 	tx.meta = nil
289 | 	tx.root = Bucket{tx: tx}
290 | 	tx.pages = nil
291 | }
292 | 
293 | // Copy writes the entire database to a writer.
294 | // This function exists for backwards compatibility.
295 | //
296 | // Deprecated; Use WriteTo() instead.
297 | func (tx *Tx) Copy(w io.Writer) error {
298 | 	_, err := tx.WriteTo(w)
299 | 	return err
300 | }
301 | 
302 | // WriteTo writes the entire database to a writer.
303 | // If err == nil then exactly tx.Size() bytes will be written into the writer.
304 | func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
305 | 	// Attempt to open reader with WriteFlag
306 | 	f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
307 | 	if err != nil {
308 | 		return 0, err
309 | 	}
310 | 	defer func() { _ = f.Close() }()
311 | 
312 | 	// Generate a meta page. We use the same page data for both meta pages.
313 | 	buf := make([]byte, tx.db.pageSize)
314 | 	page := (*page)(unsafe.Pointer(&buf[0]))
315 | 	page.flags = metaPageFlag
316 | 	*page.meta() = *tx.meta
317 | 
318 | 	// Write meta 0.
319 | 	page.id = 0
320 | 	page.meta().checksum = page.meta().sum64()
321 | 	nn, err := w.Write(buf)
322 | 	n += int64(nn)
323 | 	if err != nil {
324 | 		return n, fmt.Errorf("meta 0 copy: %s", err)
325 | 	}
326 | 
327 | 	// Write meta 1 with a lower transaction id.
328 | 	page.id = 1
329 | 	page.meta().txid -= 1
330 | 	page.meta().checksum = page.meta().sum64()
331 | 	nn, err = w.Write(buf)
332 | 	n += int64(nn)
333 | 	if err != nil {
334 | 		return n, fmt.Errorf("meta 1 copy: %s", err)
335 | 	}
336 | 
337 | 	// Move past the meta pages in the file.
338 | 	if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil {
339 | 		return n, fmt.Errorf("seek: %s", err)
340 | 	}
341 | 
342 | 	// Copy data pages.
343 | 	wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
344 | 	n += wn
345 | 	if err != nil {
346 | 		return n, err
347 | 	}
348 | 
349 | 	return n, f.Close()
350 | }
351 | 
352 | // CopyFile copies the entire database to file at the given path.
353 | // A reader transaction is maintained during the copy so it is safe to continue
354 | // using the database while a copy is in progress.
355 | func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
356 | 	f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
357 | 	if err != nil {
358 | 		return err
359 | 	}
360 | 
361 | 	err = tx.Copy(f)
362 | 	if err != nil {
363 | 		_ = f.Close()
364 | 		return err
365 | 	}
366 | 	return f.Close()
367 | }
368 | 
369 | // Check performs several consistency checks on the database for this transaction.
370 | // An error is returned if any inconsistency is found.
371 | //
372 | // It can be safely run concurrently on a writable transaction. However, this
373 | // incurs a high cost for large databases and databases with a lot of subbuckets
374 | // because of caching. This overhead can be removed if running on a read-only
375 | // transaction, however, it is not safe to execute other writer transactions at
376 | // the same time.
377 | func (tx *Tx) Check() <-chan error {
378 | 	ch := make(chan error)
379 | 	go tx.check(ch)
380 | 	return ch
381 | }
382 | 
383 | func (tx *Tx) check(ch chan error) {
384 | 	// Check if any pages are double freed.
385 | 	freed := make(map[pgid]bool)
386 | 	all := make([]pgid, tx.db.freelist.count())
387 | 	tx.db.freelist.copyall(all)
388 | 	for _, id := range all {
389 | 		if freed[id] {
390 | 			ch <- fmt.Errorf("page %d: already freed", id)
391 | 		}
392 | 		freed[id] = true
393 | 	}
394 | 
395 | 	// Track every reachable page.
396 | 	reachable := make(map[pgid]*page)
397 | 	reachable[0] = tx.page(0) // meta0
398 | 	reachable[1] = tx.page(1) // meta1
399 | 	for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
400 | 		reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
401 | 	}
402 | 
403 | 	// Recursively check buckets.
404 | 	tx.checkBucket(&tx.root, reachable, freed, ch)
405 | 
406 | 	// Ensure all pages below high water mark are either reachable or freed.
407 | 	for i := pgid(0); i < tx.meta.pgid; i++ {
408 | 		_, isReachable := reachable[i]
409 | 		if !isReachable && !freed[i] {
410 | 			ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
411 | 		}
412 | 	}
413 | 
414 | 	// Close the channel to signal completion.
415 | 	close(ch)
416 | }
417 | 
418 | func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
419 | 	// Ignore inline buckets.
420 | 	if b.root == 0 {
421 | 		return
422 | 	}
423 | 
424 | 	// Check every page used by this bucket.
425 | 	b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
426 | 		if p.id > tx.meta.pgid {
427 | 			ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
428 | 		}
429 | 
430 | 		// Ensure each page is only referenced once.
431 | 		for i := pgid(0); i <= pgid(p.overflow); i++ {
432 | 			var id = p.id + i
433 | 			if _, ok := reachable[id]; ok {
434 | 				ch <- fmt.Errorf("page %d: multiple references", int(id))
435 | 			}
436 | 			reachable[id] = p
437 | 		}
438 | 
439 | 		// We should only encounter un-freed leaf and branch pages.
440 | 		if freed[p.id] {
441 | 			ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
442 | 		} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
443 | 			ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
444 | 		}
445 | 	})
446 | 
447 | 	// Check each bucket within this bucket.
448 | 	_ = b.ForEach(func(k, v []byte) error {
449 | 		if child := b.Bucket(k); child != nil {
450 | 			tx.checkBucket(child, reachable, freed, ch)
451 | 		}
452 | 		return nil
453 | 	})
454 | }
455 | 
456 | // allocate returns a contiguous block of memory starting at a given page.
457 | func (tx *Tx) allocate(count int) (*page, error) {
458 | 	p, err := tx.db.allocate(count)
459 | 	if err != nil {
460 | 		return nil, err
461 | 	}
462 | 
463 | 	// Save to our page cache.
464 | 	tx.pages[p.id] = p
465 | 
466 | 	// Update statistics.
467 | 	tx.stats.PageCount++
468 | 	tx.stats.PageAlloc += count * tx.db.pageSize
469 | 
470 | 	return p, nil
471 | }
472 | 
473 | // write writes any dirty pages to disk.
474 | func (tx *Tx) write() error {
475 | 	// Sort pages by id.
476 | 	pages := make(pages, 0, len(tx.pages))
477 | 	for _, p := range tx.pages {
478 | 		pages = append(pages, p)
479 | 	}
480 | 	// Clear out page cache early.
481 | 	tx.pages = make(map[pgid]*page)
482 | 	sort.Sort(pages)
483 | 
484 | 	// Write pages to disk in order.
485 | 	for _, p := range pages {
486 | 		size := (int(p.overflow) + 1) * tx.db.pageSize
487 | 		offset := int64(p.id) * int64(tx.db.pageSize)
488 | 
489 | 		// Write out page in "max allocation" sized chunks.
490 | 		ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
491 | 		for {
492 | 			// Limit our write to our max allocation size.
493 | 			sz := size
494 | 			if sz > maxAllocSize-1 {
495 | 				sz = maxAllocSize - 1
496 | 			}
497 | 
498 | 			// Write chunk to disk.
499 | 			buf := ptr[:sz]
500 | 			if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
501 | 				return err
502 | 			}
503 | 
504 | 			// Update statistics.
505 | 			tx.stats.Write++
506 | 
507 | 			// Exit inner for loop if we've written all the chunks.
508 | 			size -= sz
509 | 			if size == 0 {
510 | 				break
511 | 			}
512 | 
513 | 			// Otherwise move offset forward and move pointer to next chunk.
514 | 			offset += int64(sz)
515 | 			ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
516 | 		}
517 | 	}
518 | 
519 | 	// Ignore file sync if flag is set on DB.
520 | 	if !tx.db.NoSync || IgnoreNoSync {
521 | 		if err := fdatasync(tx.db); err != nil {
522 | 			return err
523 | 		}
524 | 	}
525 | 
526 | 	// Put small pages back to page pool.
527 | 	for _, p := range pages {
528 | 		// Ignore page sizes over 1 page.
529 | 		// These are allocated using make() instead of the page pool.
530 | 		if int(p.overflow) != 0 {
531 | 			continue
532 | 		}
533 | 
534 | 		buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
535 | 
536 | 		// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
537 | 		for i := range buf {
538 | 			buf[i] = 0
539 | 		}
540 | 		tx.db.pagePool.Put(buf)
541 | 	}
542 | 
543 | 	return nil
544 | }
545 | 
546 | // writeMeta writes the meta to the disk.
547 | func (tx *Tx) writeMeta() error {
548 | 	// Create a temporary buffer for the meta page.
549 | 	buf := make([]byte, tx.db.pageSize)
550 | 	p := tx.db.pageInBuffer(buf, 0)
551 | 	tx.meta.write(p)
552 | 
553 | 	// Write the meta page to file.
554 | 	if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
555 | 		return err
556 | 	}
557 | 	if !tx.db.NoSync || IgnoreNoSync {
558 | 		if err := fdatasync(tx.db); err != nil {
559 | 			return err
560 | 		}
561 | 	}
562 | 
563 | 	// Update statistics.
564 | 	tx.stats.Write++
565 | 
566 | 	return nil
567 | }
568 | 
569 | // page returns a reference to the page with a given id.
570 | // If page has been written to then a temporary buffered page is returned.
571 | func (tx *Tx) page(id pgid) *page {
572 | 	// Check the dirty pages first.
573 | 	if tx.pages != nil {
574 | 		if p, ok := tx.pages[id]; ok {
575 | 			return p
576 | 		}
577 | 	}
578 | 
579 | 	// Otherwise return directly from the mmap.
580 | 	return tx.db.page(id)
581 | }
582 | 
583 | // forEachPage iterates over every page within a given page and executes a function.
584 | func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
585 | 	p := tx.page(pgid)
586 | 
587 | 	// Execute function.
588 | 	fn(p, depth)
589 | 
590 | 	// Recursively loop over children.
591 | 	if (p.flags & branchPageFlag) != 0 {
592 | 		for i := 0; i < int(p.count); i++ {
593 | 			elem := p.branchPageElement(uint16(i))
594 | 			tx.forEachPage(elem.pgid, depth+1, fn)
595 | 		}
596 | 	}
597 | }
598 | 
599 | // Page returns page information for a given page number.
600 | // This is only safe for concurrent use when used by a writable transaction.
601 | func (tx *Tx) Page(id int) (*PageInfo, error) {
602 | 	if tx.db == nil {
603 | 		return nil, ErrTxClosed
604 | 	} else if pgid(id) >= tx.meta.pgid {
605 | 		return nil, nil
606 | 	}
607 | 
608 | 	// Build the page info.
609 | 	p := tx.db.page(pgid(id))
610 | 	info := &PageInfo{
611 | 		ID:            id,
612 | 		Count:         int(p.count),
613 | 		OverflowCount: int(p.overflow),
614 | 	}
615 | 
616 | 	// Determine the type (or if it's free).
617 | 	if tx.db.freelist.freed(pgid(id)) {
618 | 		info.Type = "free"
619 | 	} else {
620 | 		info.Type = p.typ()
621 | 	}
622 | 
623 | 	return info, nil
624 | }
625 | 
626 | // TxStats represents statistics about the actions performed by the transaction.
627 | type TxStats struct {
628 | 	// Page statistics.
629 | 	PageCount int // number of page allocations
630 | 	PageAlloc int // total bytes allocated
631 | 
632 | 	// Cursor statistics.
633 | 	CursorCount int // number of cursors created
634 | 
635 | 	// Node statistics
636 | 	NodeCount int // number of node allocations
637 | 	NodeDeref int // number of node dereferences
638 | 
639 | 	// Rebalance statistics.
640 | 	Rebalance     int           // number of node rebalances
641 | 	RebalanceTime time.Duration // total time spent rebalancing
642 | 
643 | 	// Split/Spill statistics.
644 | 	Split     int           // number of nodes split
645 | 	Spill     int           // number of nodes spilled
646 | 	SpillTime time.Duration // total time spent spilling
647 | 
648 | 	// Write statistics.
649 | 	Write     int           // number of writes performed
650 | 	WriteTime time.Duration // total time spent writing to disk
651 | }
652 | 
653 | func (s *TxStats) add(other *TxStats) {
654 | 	s.PageCount += other.PageCount
655 | 	s.PageAlloc += other.PageAlloc
656 | 	s.CursorCount += other.CursorCount
657 | 	s.NodeCount += other.NodeCount
658 | 	s.NodeDeref += other.NodeDeref
659 | 	s.Rebalance += other.Rebalance
660 | 	s.RebalanceTime += other.RebalanceTime
661 | 	s.Split += other.Split
662 | 	s.Spill += other.Spill
663 | 	s.SpillTime += other.SpillTime
664 | 	s.Write += other.Write
665 | 	s.WriteTime += other.WriteTime
666 | }
667 | 
668 | // Sub calculates and returns the difference between two sets of transaction stats.
669 | // This is useful when obtaining stats at two different points and time and
670 | // you need the performance counters that occurred within that time span.
671 | func (s *TxStats) Sub(other *TxStats) TxStats {
672 | 	var diff TxStats
673 | 	diff.PageCount = s.PageCount - other.PageCount
674 | 	diff.PageAlloc = s.PageAlloc - other.PageAlloc
675 | 	diff.CursorCount = s.CursorCount - other.CursorCount
676 | 	diff.NodeCount = s.NodeCount - other.NodeCount
677 | 	diff.NodeDeref = s.NodeDeref - other.NodeDeref
678 | 	diff.Rebalance = s.Rebalance - other.Rebalance
679 | 	diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
680 | 	diff.Split = s.Split - other.Split
681 | 	diff.Spill = s.Spill - other.Spill
682 | 	diff.SpillTime = s.SpillTime - other.SpillTime
683 | 	diff.Write = s.Write - other.Write
684 | 	diff.WriteTime = s.WriteTime - other.WriteTime
685 | 	return diff
686 | }
687 | 


--------------------------------------------------------------------------------
/tx_test.go:
--------------------------------------------------------------------------------
  1 | package bolt_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"os"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/boltdb/bolt"
 12 | )
 13 | 
 14 | // Ensure that committing a closed transaction returns an error.
 15 | func TestTx_Commit_ErrTxClosed(t *testing.T) {
 16 | 	db := MustOpenDB()
 17 | 	defer db.MustClose()
 18 | 	tx, err := db.Begin(true)
 19 | 	if err != nil {
 20 | 		t.Fatal(err)
 21 | 	}
 22 | 
 23 | 	if _, err := tx.CreateBucket([]byte("foo")); err != nil {
 24 | 		t.Fatal(err)
 25 | 	}
 26 | 
 27 | 	if err := tx.Commit(); err != nil {
 28 | 		t.Fatal(err)
 29 | 	}
 30 | 
 31 | 	if err := tx.Commit(); err != bolt.ErrTxClosed {
 32 | 		t.Fatalf("unexpected error: %s", err)
 33 | 	}
 34 | }
 35 | 
 36 | // Ensure that rolling back a closed transaction returns an error.
 37 | func TestTx_Rollback_ErrTxClosed(t *testing.T) {
 38 | 	db := MustOpenDB()
 39 | 	defer db.MustClose()
 40 | 
 41 | 	tx, err := db.Begin(true)
 42 | 	if err != nil {
 43 | 		t.Fatal(err)
 44 | 	}
 45 | 
 46 | 	if err := tx.Rollback(); err != nil {
 47 | 		t.Fatal(err)
 48 | 	}
 49 | 	if err := tx.Rollback(); err != bolt.ErrTxClosed {
 50 | 		t.Fatalf("unexpected error: %s", err)
 51 | 	}
 52 | }
 53 | 
 54 | // Ensure that committing a read-only transaction returns an error.
 55 | func TestTx_Commit_ErrTxNotWritable(t *testing.T) {
 56 | 	db := MustOpenDB()
 57 | 	defer db.MustClose()
 58 | 	tx, err := db.Begin(false)
 59 | 	if err != nil {
 60 | 		t.Fatal(err)
 61 | 	}
 62 | 	if err := tx.Commit(); err != bolt.ErrTxNotWritable {
 63 | 		t.Fatal(err)
 64 | 	}
 65 | }
 66 | 
 67 | // Ensure that a transaction can retrieve a cursor on the root bucket.
 68 | func TestTx_Cursor(t *testing.T) {
 69 | 	db := MustOpenDB()
 70 | 	defer db.MustClose()
 71 | 	if err := db.Update(func(tx *bolt.Tx) error {
 72 | 		if _, err := tx.CreateBucket([]byte("widgets")); err != nil {
 73 | 			t.Fatal(err)
 74 | 		}
 75 | 
 76 | 		if _, err := tx.CreateBucket([]byte("woojits")); err != nil {
 77 | 			t.Fatal(err)
 78 | 		}
 79 | 
 80 | 		c := tx.Cursor()
 81 | 		if k, v := c.First(); !bytes.Equal(k, []byte("widgets")) {
 82 | 			t.Fatalf("unexpected key: %v", k)
 83 | 		} else if v != nil {
 84 | 			t.Fatalf("unexpected value: %v", v)
 85 | 		}
 86 | 
 87 | 		if k, v := c.Next(); !bytes.Equal(k, []byte("woojits")) {
 88 | 			t.Fatalf("unexpected key: %v", k)
 89 | 		} else if v != nil {
 90 | 			t.Fatalf("unexpected value: %v", v)
 91 | 		}
 92 | 
 93 | 		if k, v := c.Next(); k != nil {
 94 | 			t.Fatalf("unexpected key: %v", k)
 95 | 		} else if v != nil {
 96 | 			t.Fatalf("unexpected value: %v", k)
 97 | 		}
 98 | 
 99 | 		return nil
100 | 	}); err != nil {
101 | 		t.Fatal(err)
102 | 	}
103 | }
104 | 
105 | // Ensure that creating a bucket with a read-only transaction returns an error.
106 | func TestTx_CreateBucket_ErrTxNotWritable(t *testing.T) {
107 | 	db := MustOpenDB()
108 | 	defer db.MustClose()
109 | 	if err := db.View(func(tx *bolt.Tx) error {
110 | 		_, err := tx.CreateBucket([]byte("foo"))
111 | 		if err != bolt.ErrTxNotWritable {
112 | 			t.Fatalf("unexpected error: %s", err)
113 | 		}
114 | 		return nil
115 | 	}); err != nil {
116 | 		t.Fatal(err)
117 | 	}
118 | }
119 | 
120 | // Ensure that creating a bucket on a closed transaction returns an error.
121 | func TestTx_CreateBucket_ErrTxClosed(t *testing.T) {
122 | 	db := MustOpenDB()
123 | 	defer db.MustClose()
124 | 	tx, err := db.Begin(true)
125 | 	if err != nil {
126 | 		t.Fatal(err)
127 | 	}
128 | 	if err := tx.Commit(); err != nil {
129 | 		t.Fatal(err)
130 | 	}
131 | 
132 | 	if _, err := tx.CreateBucket([]byte("foo")); err != bolt.ErrTxClosed {
133 | 		t.Fatalf("unexpected error: %s", err)
134 | 	}
135 | }
136 | 
137 | // Ensure that a Tx can retrieve a bucket.
138 | func TestTx_Bucket(t *testing.T) {
139 | 	db := MustOpenDB()
140 | 	defer db.MustClose()
141 | 	if err := db.Update(func(tx *bolt.Tx) error {
142 | 		if _, err := tx.CreateBucket([]byte("widgets")); err != nil {
143 | 			t.Fatal(err)
144 | 		}
145 | 		if tx.Bucket([]byte("widgets")) == nil {
146 | 			t.Fatal("expected bucket")
147 | 		}
148 | 		return nil
149 | 	}); err != nil {
150 | 		t.Fatal(err)
151 | 	}
152 | }
153 | 
154 | // Ensure that a Tx retrieving a non-existent key returns nil.
155 | func TestTx_Get_NotFound(t *testing.T) {
156 | 	db := MustOpenDB()
157 | 	defer db.MustClose()
158 | 	if err := db.Update(func(tx *bolt.Tx) error {
159 | 		b, err := tx.CreateBucket([]byte("widgets"))
160 | 		if err != nil {
161 | 			t.Fatal(err)
162 | 		}
163 | 
164 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
165 | 			t.Fatal(err)
166 | 		}
167 | 		if b.Get([]byte("no_such_key")) != nil {
168 | 			t.Fatal("expected nil value")
169 | 		}
170 | 		return nil
171 | 	}); err != nil {
172 | 		t.Fatal(err)
173 | 	}
174 | }
175 | 
176 | // Ensure that a bucket can be created and retrieved.
177 | func TestTx_CreateBucket(t *testing.T) {
178 | 	db := MustOpenDB()
179 | 	defer db.MustClose()
180 | 
181 | 	// Create a bucket.
182 | 	if err := db.Update(func(tx *bolt.Tx) error {
183 | 		b, err := tx.CreateBucket([]byte("widgets"))
184 | 		if err != nil {
185 | 			t.Fatal(err)
186 | 		} else if b == nil {
187 | 			t.Fatal("expected bucket")
188 | 		}
189 | 		return nil
190 | 	}); err != nil {
191 | 		t.Fatal(err)
192 | 	}
193 | 
194 | 	// Read the bucket through a separate transaction.
195 | 	if err := db.View(func(tx *bolt.Tx) error {
196 | 		if tx.Bucket([]byte("widgets")) == nil {
197 | 			t.Fatal("expected bucket")
198 | 		}
199 | 		return nil
200 | 	}); err != nil {
201 | 		t.Fatal(err)
202 | 	}
203 | }
204 | 
205 | // Ensure that a bucket can be created if it doesn't already exist.
206 | func TestTx_CreateBucketIfNotExists(t *testing.T) {
207 | 	db := MustOpenDB()
208 | 	defer db.MustClose()
209 | 	if err := db.Update(func(tx *bolt.Tx) error {
210 | 		// Create bucket.
211 | 		if b, err := tx.CreateBucketIfNotExists([]byte("widgets")); err != nil {
212 | 			t.Fatal(err)
213 | 		} else if b == nil {
214 | 			t.Fatal("expected bucket")
215 | 		}
216 | 
217 | 		// Create bucket again.
218 | 		if b, err := tx.CreateBucketIfNotExists([]byte("widgets")); err != nil {
219 | 			t.Fatal(err)
220 | 		} else if b == nil {
221 | 			t.Fatal("expected bucket")
222 | 		}
223 | 
224 | 		return nil
225 | 	}); err != nil {
226 | 		t.Fatal(err)
227 | 	}
228 | 
229 | 	// Read the bucket through a separate transaction.
230 | 	if err := db.View(func(tx *bolt.Tx) error {
231 | 		if tx.Bucket([]byte("widgets")) == nil {
232 | 			t.Fatal("expected bucket")
233 | 		}
234 | 		return nil
235 | 	}); err != nil {
236 | 		t.Fatal(err)
237 | 	}
238 | }
239 | 
240 | // Ensure transaction returns an error if creating an unnamed bucket.
241 | func TestTx_CreateBucketIfNotExists_ErrBucketNameRequired(t *testing.T) {
242 | 	db := MustOpenDB()
243 | 	defer db.MustClose()
244 | 	if err := db.Update(func(tx *bolt.Tx) error {
245 | 		if _, err := tx.CreateBucketIfNotExists([]byte{}); err != bolt.ErrBucketNameRequired {
246 | 			t.Fatalf("unexpected error: %s", err)
247 | 		}
248 | 
249 | 		if _, err := tx.CreateBucketIfNotExists(nil); err != bolt.ErrBucketNameRequired {
250 | 			t.Fatalf("unexpected error: %s", err)
251 | 		}
252 | 
253 | 		return nil
254 | 	}); err != nil {
255 | 		t.Fatal(err)
256 | 	}
257 | }
258 | 
259 | // Ensure that a bucket cannot be created twice.
260 | func TestTx_CreateBucket_ErrBucketExists(t *testing.T) {
261 | 	db := MustOpenDB()
262 | 	defer db.MustClose()
263 | 
264 | 	// Create a bucket.
265 | 	if err := db.Update(func(tx *bolt.Tx) error {
266 | 		if _, err := tx.CreateBucket([]byte("widgets")); err != nil {
267 | 			t.Fatal(err)
268 | 		}
269 | 		return nil
270 | 	}); err != nil {
271 | 		t.Fatal(err)
272 | 	}
273 | 
274 | 	// Create the same bucket again.
275 | 	if err := db.Update(func(tx *bolt.Tx) error {
276 | 		if _, err := tx.CreateBucket([]byte("widgets")); err != bolt.ErrBucketExists {
277 | 			t.Fatalf("unexpected error: %s", err)
278 | 		}
279 | 		return nil
280 | 	}); err != nil {
281 | 		t.Fatal(err)
282 | 	}
283 | }
284 | 
285 | // Ensure that a bucket is created with a non-blank name.
286 | func TestTx_CreateBucket_ErrBucketNameRequired(t *testing.T) {
287 | 	db := MustOpenDB()
288 | 	defer db.MustClose()
289 | 	if err := db.Update(func(tx *bolt.Tx) error {
290 | 		if _, err := tx.CreateBucket(nil); err != bolt.ErrBucketNameRequired {
291 | 			t.Fatalf("unexpected error: %s", err)
292 | 		}
293 | 		return nil
294 | 	}); err != nil {
295 | 		t.Fatal(err)
296 | 	}
297 | }
298 | 
299 | // Ensure that a bucket can be deleted.
300 | func TestTx_DeleteBucket(t *testing.T) {
301 | 	db := MustOpenDB()
302 | 	defer db.MustClose()
303 | 
304 | 	// Create a bucket and add a value.
305 | 	if err := db.Update(func(tx *bolt.Tx) error {
306 | 		b, err := tx.CreateBucket([]byte("widgets"))
307 | 		if err != nil {
308 | 			t.Fatal(err)
309 | 		}
310 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
311 | 			t.Fatal(err)
312 | 		}
313 | 		return nil
314 | 	}); err != nil {
315 | 		t.Fatal(err)
316 | 	}
317 | 
318 | 	// Delete the bucket and make sure we can't get the value.
319 | 	if err := db.Update(func(tx *bolt.Tx) error {
320 | 		if err := tx.DeleteBucket([]byte("widgets")); err != nil {
321 | 			t.Fatal(err)
322 | 		}
323 | 		if tx.Bucket([]byte("widgets")) != nil {
324 | 			t.Fatal("unexpected bucket")
325 | 		}
326 | 		return nil
327 | 	}); err != nil {
328 | 		t.Fatal(err)
329 | 	}
330 | 
331 | 	if err := db.Update(func(tx *bolt.Tx) error {
332 | 		// Create the bucket again and make sure there's not a phantom value.
333 | 		b, err := tx.CreateBucket([]byte("widgets"))
334 | 		if err != nil {
335 | 			t.Fatal(err)
336 | 		}
337 | 		if v := b.Get([]byte("foo")); v != nil {
338 | 			t.Fatalf("unexpected phantom value: %v", v)
339 | 		}
340 | 		return nil
341 | 	}); err != nil {
342 | 		t.Fatal(err)
343 | 	}
344 | }
345 | 
346 | // Ensure that deleting a bucket on a closed transaction returns an error.
347 | func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) {
348 | 	db := MustOpenDB()
349 | 	defer db.MustClose()
350 | 	tx, err := db.Begin(true)
351 | 	if err != nil {
352 | 		t.Fatal(err)
353 | 	}
354 | 	if err := tx.Commit(); err != nil {
355 | 		t.Fatal(err)
356 | 	}
357 | 	if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxClosed {
358 | 		t.Fatalf("unexpected error: %s", err)
359 | 	}
360 | }
361 | 
362 | // Ensure that deleting a bucket with a read-only transaction returns an error.
363 | func TestTx_DeleteBucket_ReadOnly(t *testing.T) {
364 | 	db := MustOpenDB()
365 | 	defer db.MustClose()
366 | 	if err := db.View(func(tx *bolt.Tx) error {
367 | 		if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxNotWritable {
368 | 			t.Fatalf("unexpected error: %s", err)
369 | 		}
370 | 		return nil
371 | 	}); err != nil {
372 | 		t.Fatal(err)
373 | 	}
374 | }
375 | 
376 | // Ensure that nothing happens when deleting a bucket that doesn't exist.
377 | func TestTx_DeleteBucket_NotFound(t *testing.T) {
378 | 	db := MustOpenDB()
379 | 	defer db.MustClose()
380 | 	if err := db.Update(func(tx *bolt.Tx) error {
381 | 		if err := tx.DeleteBucket([]byte("widgets")); err != bolt.ErrBucketNotFound {
382 | 			t.Fatalf("unexpected error: %s", err)
383 | 		}
384 | 		return nil
385 | 	}); err != nil {
386 | 		t.Fatal(err)
387 | 	}
388 | }
389 | 
390 | // Ensure that no error is returned when a tx.ForEach function does not return
391 | // an error.
392 | func TestTx_ForEach_NoError(t *testing.T) {
393 | 	db := MustOpenDB()
394 | 	defer db.MustClose()
395 | 	if err := db.Update(func(tx *bolt.Tx) error {
396 | 		b, err := tx.CreateBucket([]byte("widgets"))
397 | 		if err != nil {
398 | 			t.Fatal(err)
399 | 		}
400 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
401 | 			t.Fatal(err)
402 | 		}
403 | 
404 | 		if err := tx.ForEach(func(name []byte, b *bolt.Bucket) error {
405 | 			return nil
406 | 		}); err != nil {
407 | 			t.Fatal(err)
408 | 		}
409 | 		return nil
410 | 	}); err != nil {
411 | 		t.Fatal(err)
412 | 	}
413 | }
414 | 
415 | // Ensure that an error is returned when a tx.ForEach function returns an error.
416 | func TestTx_ForEach_WithError(t *testing.T) {
417 | 	db := MustOpenDB()
418 | 	defer db.MustClose()
419 | 	if err := db.Update(func(tx *bolt.Tx) error {
420 | 		b, err := tx.CreateBucket([]byte("widgets"))
421 | 		if err != nil {
422 | 			t.Fatal(err)
423 | 		}
424 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
425 | 			t.Fatal(err)
426 | 		}
427 | 
428 | 		marker := errors.New("marker")
429 | 		if err := tx.ForEach(func(name []byte, b *bolt.Bucket) error {
430 | 			return marker
431 | 		}); err != marker {
432 | 			t.Fatalf("unexpected error: %s", err)
433 | 		}
434 | 		return nil
435 | 	}); err != nil {
436 | 		t.Fatal(err)
437 | 	}
438 | }
439 | 
440 | // Ensure that Tx commit handlers are called after a transaction successfully commits.
441 | func TestTx_OnCommit(t *testing.T) {
442 | 	db := MustOpenDB()
443 | 	defer db.MustClose()
444 | 
445 | 	var x int
446 | 	if err := db.Update(func(tx *bolt.Tx) error {
447 | 		tx.OnCommit(func() { x += 1 })
448 | 		tx.OnCommit(func() { x += 2 })
449 | 		if _, err := tx.CreateBucket([]byte("widgets")); err != nil {
450 | 			t.Fatal(err)
451 | 		}
452 | 		return nil
453 | 	}); err != nil {
454 | 		t.Fatal(err)
455 | 	} else if x != 3 {
456 | 		t.Fatalf("unexpected x: %d", x)
457 | 	}
458 | }
459 | 
460 | // Ensure that Tx commit handlers are NOT called after a transaction rolls back.
461 | func TestTx_OnCommit_Rollback(t *testing.T) {
462 | 	db := MustOpenDB()
463 | 	defer db.MustClose()
464 | 
465 | 	var x int
466 | 	if err := db.Update(func(tx *bolt.Tx) error {
467 | 		tx.OnCommit(func() { x += 1 })
468 | 		tx.OnCommit(func() { x += 2 })
469 | 		if _, err := tx.CreateBucket([]byte("widgets")); err != nil {
470 | 			t.Fatal(err)
471 | 		}
472 | 		return errors.New("rollback this commit")
473 | 	}); err == nil || err.Error() != "rollback this commit" {
474 | 		t.Fatalf("unexpected error: %s", err)
475 | 	} else if x != 0 {
476 | 		t.Fatalf("unexpected x: %d", x)
477 | 	}
478 | }
479 | 
480 | // Ensure that the database can be copied to a file path.
481 | func TestTx_CopyFile(t *testing.T) {
482 | 	db := MustOpenDB()
483 | 	defer db.MustClose()
484 | 
485 | 	path := tempfile()
486 | 	if err := db.Update(func(tx *bolt.Tx) error {
487 | 		b, err := tx.CreateBucket([]byte("widgets"))
488 | 		if err != nil {
489 | 			t.Fatal(err)
490 | 		}
491 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
492 | 			t.Fatal(err)
493 | 		}
494 | 		if err := b.Put([]byte("baz"), []byte("bat")); err != nil {
495 | 			t.Fatal(err)
496 | 		}
497 | 		return nil
498 | 	}); err != nil {
499 | 		t.Fatal(err)
500 | 	}
501 | 
502 | 	if err := db.View(func(tx *bolt.Tx) error {
503 | 		return tx.CopyFile(path, 0600)
504 | 	}); err != nil {
505 | 		t.Fatal(err)
506 | 	}
507 | 
508 | 	db2, err := bolt.Open(path, 0600, nil)
509 | 	if err != nil {
510 | 		t.Fatal(err)
511 | 	}
512 | 
513 | 	if err := db2.View(func(tx *bolt.Tx) error {
514 | 		if v := tx.Bucket([]byte("widgets")).Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) {
515 | 			t.Fatalf("unexpected value: %v", v)
516 | 		}
517 | 		if v := tx.Bucket([]byte("widgets")).Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) {
518 | 			t.Fatalf("unexpected value: %v", v)
519 | 		}
520 | 		return nil
521 | 	}); err != nil {
522 | 		t.Fatal(err)
523 | 	}
524 | 
525 | 	if err := db2.Close(); err != nil {
526 | 		t.Fatal(err)
527 | 	}
528 | }
529 | 
530 | type failWriterError struct{}
531 | 
532 | func (failWriterError) Error() string {
533 | 	return "error injected for tests"
534 | }
535 | 
536 | type failWriter struct {
537 | 	// fail after this many bytes
538 | 	After int
539 | }
540 | 
541 | func (f *failWriter) Write(p []byte) (n int, err error) {
542 | 	n = len(p)
543 | 	if n > f.After {
544 | 		n = f.After
545 | 		err = failWriterError{}
546 | 	}
547 | 	f.After -= n
548 | 	return n, err
549 | }
550 | 
551 | // Ensure that Copy handles write errors right.
552 | func TestTx_CopyFile_Error_Meta(t *testing.T) {
553 | 	db := MustOpenDB()
554 | 	defer db.MustClose()
555 | 	if err := db.Update(func(tx *bolt.Tx) error {
556 | 		b, err := tx.CreateBucket([]byte("widgets"))
557 | 		if err != nil {
558 | 			t.Fatal(err)
559 | 		}
560 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
561 | 			t.Fatal(err)
562 | 		}
563 | 		if err := b.Put([]byte("baz"), []byte("bat")); err != nil {
564 | 			t.Fatal(err)
565 | 		}
566 | 		return nil
567 | 	}); err != nil {
568 | 		t.Fatal(err)
569 | 	}
570 | 
571 | 	if err := db.View(func(tx *bolt.Tx) error {
572 | 		return tx.Copy(&failWriter{})
573 | 	}); err == nil || err.Error() != "meta 0 copy: error injected for tests" {
574 | 		t.Fatalf("unexpected error: %v", err)
575 | 	}
576 | }
577 | 
578 | // Ensure that Copy handles write errors right.
579 | func TestTx_CopyFile_Error_Normal(t *testing.T) {
580 | 	db := MustOpenDB()
581 | 	defer db.MustClose()
582 | 	if err := db.Update(func(tx *bolt.Tx) error {
583 | 		b, err := tx.CreateBucket([]byte("widgets"))
584 | 		if err != nil {
585 | 			t.Fatal(err)
586 | 		}
587 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
588 | 			t.Fatal(err)
589 | 		}
590 | 		if err := b.Put([]byte("baz"), []byte("bat")); err != nil {
591 | 			t.Fatal(err)
592 | 		}
593 | 		return nil
594 | 	}); err != nil {
595 | 		t.Fatal(err)
596 | 	}
597 | 
598 | 	if err := db.View(func(tx *bolt.Tx) error {
599 | 		return tx.Copy(&failWriter{3 * db.Info().PageSize})
600 | 	}); err == nil || err.Error() != "error injected for tests" {
601 | 		t.Fatalf("unexpected error: %v", err)
602 | 	}
603 | }
604 | 
605 | func ExampleTx_Rollback() {
606 | 	// Open the database.
607 | 	db, err := bolt.Open(tempfile(), 0666, nil)
608 | 	if err != nil {
609 | 		log.Fatal(err)
610 | 	}
611 | 	defer os.Remove(db.Path())
612 | 
613 | 	// Create a bucket.
614 | 	if err := db.Update(func(tx *bolt.Tx) error {
615 | 		_, err := tx.CreateBucket([]byte("widgets"))
616 | 		return err
617 | 	}); err != nil {
618 | 		log.Fatal(err)
619 | 	}
620 | 
621 | 	// Set a value for a key.
622 | 	if err := db.Update(func(tx *bolt.Tx) error {
623 | 		return tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar"))
624 | 	}); err != nil {
625 | 		log.Fatal(err)
626 | 	}
627 | 
628 | 	// Update the key but rollback the transaction so it never saves.
629 | 	tx, err := db.Begin(true)
630 | 	if err != nil {
631 | 		log.Fatal(err)
632 | 	}
633 | 	b := tx.Bucket([]byte("widgets"))
634 | 	if err := b.Put([]byte("foo"), []byte("baz")); err != nil {
635 | 		log.Fatal(err)
636 | 	}
637 | 	if err := tx.Rollback(); err != nil {
638 | 		log.Fatal(err)
639 | 	}
640 | 
641 | 	// Ensure that our original value is still set.
642 | 	if err := db.View(func(tx *bolt.Tx) error {
643 | 		value := tx.Bucket([]byte("widgets")).Get([]byte("foo"))
644 | 		fmt.Printf("The value for 'foo' is still: %s\n", value)
645 | 		return nil
646 | 	}); err != nil {
647 | 		log.Fatal(err)
648 | 	}
649 | 
650 | 	// Close database to release file lock.
651 | 	if err := db.Close(); err != nil {
652 | 		log.Fatal(err)
653 | 	}
654 | 
655 | 	// Output:
656 | 	// The value for 'foo' is still: bar
657 | }
658 | 
659 | func ExampleTx_CopyFile() {
660 | 	// Open the database.
661 | 	db, err := bolt.Open(tempfile(), 0666, nil)
662 | 	if err != nil {
663 | 		log.Fatal(err)
664 | 	}
665 | 	defer os.Remove(db.Path())
666 | 
667 | 	// Create a bucket and a key.
668 | 	if err := db.Update(func(tx *bolt.Tx) error {
669 | 		b, err := tx.CreateBucket([]byte("widgets"))
670 | 		if err != nil {
671 | 			return err
672 | 		}
673 | 		if err := b.Put([]byte("foo"), []byte("bar")); err != nil {
674 | 			return err
675 | 		}
676 | 		return nil
677 | 	}); err != nil {
678 | 		log.Fatal(err)
679 | 	}
680 | 
681 | 	// Copy the database to another file.
682 | 	toFile := tempfile()
683 | 	if err := db.View(func(tx *bolt.Tx) error {
684 | 		return tx.CopyFile(toFile, 0666)
685 | 	}); err != nil {
686 | 		log.Fatal(err)
687 | 	}
688 | 	defer os.Remove(toFile)
689 | 
690 | 	// Open the cloned database.
691 | 	db2, err := bolt.Open(toFile, 0666, nil)
692 | 	if err != nil {
693 | 		log.Fatal(err)
694 | 	}
695 | 
696 | 	// Ensure that the key exists in the copy.
697 | 	if err := db2.View(func(tx *bolt.Tx) error {
698 | 		value := tx.Bucket([]byte("widgets")).Get([]byte("foo"))
699 | 		fmt.Printf("The value for 'foo' in the clone is: %s\n", value)
700 | 		return nil
701 | 	}); err != nil {
702 | 		log.Fatal(err)
703 | 	}
704 | 
705 | 	// Close database to release file lock.
706 | 	if err := db.Close(); err != nil {
707 | 		log.Fatal(err)
708 | 	}
709 | 
710 | 	if err := db2.Close(); err != nil {
711 | 		log.Fatal(err)
712 | 	}
713 | 
714 | 	// Output:
715 | 	// The value for 'foo' in the clone is: bar
716 | }
717 | 


--------------------------------------------------------------------------------