├── .gitignore
├── journal
├── Makefile
├── journal_test.go
├── helpers.go
├── mapping.go
├── meta.go
├── manager.go
├── journal.go
└── meta_gen.go
├── docs
├── logo.png
├── cluster-view-0.png
└── cluster-view-1.png
├── deploy
├── build.sh
├── Dockerfile
└── Makefile
├── cluster
├── events.go
├── client.go
└── manager.go
├── glide.yaml
├── storage
├── helpers.go
├── local.go
└── remote.go
├── LICENSE
├── helpers.go
├── glide.lock
├── api
├── public.go
└── private.go
├── cmd
└── objstore
│ └── main.go
├── README.md
└── objstore.go
/.gitignore:
--------------------------------------------------------------------------------
1 | var/
2 | .DS_Store
3 | vendor/
4 |
--------------------------------------------------------------------------------
/journal/Makefile:
--------------------------------------------------------------------------------
1 | msgp:
2 | msgp -file meta.go -tests=false
3 |
--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SphereSoftware/objstore/HEAD/docs/logo.png
--------------------------------------------------------------------------------
/docs/cluster-view-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SphereSoftware/objstore/HEAD/docs/cluster-view-0.png
--------------------------------------------------------------------------------
/docs/cluster-view-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SphereSoftware/objstore/HEAD/docs/cluster-view-1.png
--------------------------------------------------------------------------------
/deploy/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | go get github.com/Masterminds/glide
4 | cd $GOPATH/src/sphere.software/objstore
5 | $GOPATH/bin/glide install
6 | go build -o /out/objstore sphere.software/objstore/cmd/objstore
7 |
--------------------------------------------------------------------------------
/deploy/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gliderlabs/alpine
2 |
3 | RUN apk add --no-cache ca-certificates
4 | COPY out/objstore /objstore/bin/objstore
5 |
6 | ENV APP_DEBUG_LEVEL=1
7 | ENV APP_CLUSTER_TAGNAME=default
8 |
9 | VOLUME /objstore/data
10 | WORKDIR /objstore/data
11 |
12 | EXPOSE 10999 10080
13 |
14 | CMD ["/objstore/bin/objstore"]
15 |
--------------------------------------------------------------------------------
/cluster/events.go:
--------------------------------------------------------------------------------
1 | package cluster
2 |
3 | import "sphere.software/objstore/journal"
4 |
5 | type EventType int
6 |
7 | const (
8 | EventUnknown EventType = 0
9 | EventFileAdded EventType = 1
10 | EventFileDeleted EventType = 2
11 | EventOpaqueData EventType = 3
12 | EventStopAnnounce EventType = 999
13 | )
14 |
15 | type EventAnnounce struct {
16 | Type EventType `json:"type"`
17 |
18 | FileMeta *journal.FileMeta `json:"meta"`
19 | OpaqueData []byte `json:"data"`
20 | }
21 |
--------------------------------------------------------------------------------
/journal/journal_test.go:
--------------------------------------------------------------------------------
1 | package journal
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | var noID ID
10 |
11 | func TestBtreeDiffBtree(t *testing.T) {
12 | assert := assert.New(t)
13 |
14 | j1 := MakeJournal(noID, []*Meta{
15 | {ID: "000"}, {ID: "001"}, {ID: "002"}, {ID: "003"}, {ID: "005"},
16 | })
17 | j2 := MakeJournal(noID, []*Meta{
18 | {ID: "000"}, {ID: "002"}, {ID: "003"}, {ID: "004"}, {ID: "005"},
19 | })
20 |
21 | added, deleted := j1.Diff(j2)
22 | assert.Equal([]*Meta{{ID: "004"}}, added)
23 | assert.Equal([]*Meta{{ID: "001"}}, deleted)
24 |
25 | added, deleted = j1.Diff(j1)
26 | assert.Empty(added)
27 | assert.Empty(deleted)
28 | }
29 |
--------------------------------------------------------------------------------
/deploy/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |
3 | build:
4 | go get github.com/Masterminds/glide
5 | cd $(GOPATH)/src/sphere.software/objstore
6 | glide install
7 | go build
8 | docker build -t spheresoftware/objstore .
9 |
10 | OUT ?= $(shell pwd)/out
11 | PROJECT ?= $(GOPATH)/src/sphere.software/objstore
12 |
13 | local:
14 | mkdir -p $(OUT)
15 | docker run -a stdout -a stderr --rm \
16 | -v $(OUT):/out -v $(PROJECT):/go/src/sphere.software/objstore \
17 | -e GOPATH=/go \
18 | golang:alpine /go/src/sphere.software/objstore/deploy/build.sh
19 | docker build -t spheresoftware/objstore .
20 | rm $(OUT)/objstore
21 | rmdir $(OUT)
22 |
23 | docker:
24 | make local
25 | docker save -o objstore.tar spheresoftware/objstore
26 | zip objstore.tar.zip objstore.tar
27 |
--------------------------------------------------------------------------------
/glide.yaml:
--------------------------------------------------------------------------------
1 | package: sphere.software/objstore
2 | import:
3 | - package: github.com/astranet/astranet
4 | subpackages:
5 | - addr
6 | - package: github.com/aws/aws-sdk-go
7 | version: ^1.10.3
8 | subpackages:
9 | - aws
10 | - aws/session
11 | - service/s3
12 | - package: github.com/oklog/ulid
13 | - package: github.com/boltdb/bolt
14 | version: ^1.3.0
15 | - package: github.com/cznic/b
16 | - package: github.com/gin-gonic/gin
17 | version: ^1.1.4
18 | - package: github.com/jawher/mow.cli
19 | - package: github.com/tinylib/msgp
20 | version: ^1.0.1
21 | subpackages:
22 | - msgp
23 | - package: github.com/xlab/closer
24 | testImport:
25 | - package: github.com/stretchr/testify
26 | version: ^1.1.4
27 | subpackages:
28 | - assert
29 |
--------------------------------------------------------------------------------
/journal/helpers.go:
--------------------------------------------------------------------------------
1 | package journal
2 |
3 | import (
4 | "math/rand"
5 | "sync"
6 | "time"
7 |
8 | "github.com/oklog/ulid"
9 | )
10 |
11 | var globalRand = rand.New(&lockedSource{
12 | src: rand.NewSource(time.Now().UnixNano()),
13 | })
14 |
15 | // GetULID constucts an Universally Unique Lexicographically Sortable Identifier.
16 | // See https://github.com/oklog/ulid
17 | func GetULID() string {
18 | return ulid.MustNew(ulid.Timestamp(time.Now()), globalRand).String()
19 | }
20 |
21 | type lockedSource struct {
22 | lk sync.Mutex
23 | src rand.Source
24 | }
25 |
26 | func (r *lockedSource) Int63() (n int64) {
27 | r.lk.Lock()
28 | n = r.src.Int63()
29 | r.lk.Unlock()
30 | return
31 | }
32 |
33 | func (r *lockedSource) Seed(seed int64) {
34 | r.lk.Lock()
35 | r.src.Seed(seed)
36 | r.lk.Unlock()
37 | }
38 |
--------------------------------------------------------------------------------
/journal/mapping.go:
--------------------------------------------------------------------------------
1 | package journal
2 |
3 | import "github.com/boltdb/bolt"
4 |
5 | type Mapping interface {
6 | Get(id ID) *JournalMeta
7 | Set(id ID, meta *JournalMeta) error
8 | SetBytes(k, v []byte) error
9 | }
10 |
11 | type mapping struct {
12 | tx *bolt.Tx
13 | b *bolt.Bucket
14 | }
15 |
16 | func NewMapping(tx *bolt.Tx) (Mapping, error) {
17 | b, err := tx.CreateBucketIfNotExists(mappingBucket)
18 | if err != nil {
19 | return nil, err
20 | }
21 | m := &mapping{
22 | tx: tx,
23 | b: b,
24 | }
25 | return m, nil
26 | }
27 |
28 | func (m *mapping) Get(id ID) *JournalMeta {
29 | data := m.b.Get([]byte(id))
30 | if data == nil {
31 | return nil
32 | }
33 | meta := new(JournalMeta)
34 | meta.UnmarshalMsg(data)
35 | return meta
36 | }
37 |
38 | func (m *mapping) Set(id ID, meta *JournalMeta) error {
39 | v, err := meta.MarshalMsg(nil)
40 | if err != nil {
41 | return err
42 | }
43 | return m.b.Put([]byte(id), v)
44 | }
45 |
46 | func (m *mapping) SetBytes(k, v []byte) error {
47 | return m.b.Put(k, v)
48 | }
49 |
--------------------------------------------------------------------------------
/storage/helpers.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "bytes"
5 | "io"
6 | )
7 |
8 | type readSeeker struct {
9 | io.Reader
10 |
11 | buf []byte
12 | offset int
13 | }
14 |
15 | func newReadSeeker(buf []byte) io.ReadSeeker {
16 | return &readSeeker{
17 | Reader: bytes.NewReader(buf),
18 | buf: buf,
19 | }
20 | }
21 |
22 | func (r *readSeeker) Seek(off int64, whence int) (int64, error) {
23 | offset := int(off)
24 | switch whence {
25 | case io.SeekStart:
26 | if offset < 0 || offset > len(r.buf) {
27 | return 0, io.EOF
28 | }
29 | r.offset = offset
30 | r.Reader = bytes.NewReader(r.buf[offset:])
31 | case io.SeekEnd:
32 | if offset < 0 || offset > len(r.buf) {
33 | return 0, io.EOF
34 | }
35 | r.offset = len(r.buf) - offset
36 | r.Reader = bytes.NewReader(r.buf[len(r.buf)-offset:])
37 | case io.SeekCurrent:
38 | if offset+r.offset > len(r.buf) ||
39 | offset+r.offset < 0 {
40 | return 0, io.EOF
41 | }
42 | r.offset = r.offset + offset
43 | r.Reader = bytes.NewReader(r.buf[r.offset:])
44 | default:
45 | panic("wrong whence arg")
46 | }
47 | return int64(r.offset), nil
48 | }
49 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Sphere Software
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/helpers.go:
--------------------------------------------------------------------------------
1 | package objstore
2 |
3 | import "sphere.software/objstore/cluster"
4 |
5 | // PUMP CODE — a circular buffer
6 | // Copyright 2014 The Go Authors
7 | //
8 | // pumpEventAnnounces returns a channel src such that sending on src will eventually send on
9 | // dst, in order, but that src will always be ready to send/receive soon, even
10 | // if dst currently isn't. It is effectively an infinitely buffered channel.
11 | //
12 | // In particular, goroutine A sending on src will not deadlock even if goroutine
13 | // B that's responsible for receiving on dst is currently blocked trying to
14 | // send to A on a separate channel.
15 | //
16 | // Send a EventStopAnnounce event on the src channel to close the dst channel after all queued
17 | // events are sent on dst. After that, other goroutines can still send to src,
18 | // so that such sends won't block forever, but such events will be ignored.
19 | func pumpEventAnnounces(dst chan *EventAnnounce) (src chan *EventAnnounce) {
20 | src = make(chan *EventAnnounce)
21 | go func() {
22 | // initialSize is the initial size of the circular buffer. It must be a
23 | // power of 2.
24 | const initialSize = 16
25 | i, j, buf, mask := 0, 0, make([]*EventAnnounce, initialSize), initialSize-1
26 |
27 | maybeSrc := src
28 | for {
29 | maybeDst := dst
30 | if i == j {
31 | maybeDst = nil
32 | }
33 | if maybeDst == nil && maybeSrc == nil {
34 | break
35 | }
36 |
37 | select {
38 | case maybeDst <- buf[i&mask]:
39 | buf[i&mask] = nil
40 | i++
41 |
42 | case e := <-maybeSrc:
43 | if e.Type == cluster.EventStopAnnounce {
44 | maybeSrc = nil
45 | continue
46 | }
47 |
48 | // Allocate a bigger buffer if necessary.
49 | if i+len(buf) == j {
50 | b := make([]*EventAnnounce, 2*len(buf))
51 | n := copy(b, buf[j&mask:])
52 | copy(b[n:], buf[:j&mask])
53 | i, j = 0, len(buf)
54 | buf, mask = b, len(b)-1
55 | }
56 |
57 | buf[j&mask] = e
58 | j++
59 | }
60 | }
61 |
62 | close(dst)
63 | // Block forever.
64 | for range src {
65 | }
66 | }()
67 | return src
68 | }
69 |
--------------------------------------------------------------------------------
/storage/local.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "bytes"
5 | "io"
6 | "os"
7 | "path/filepath"
8 | "syscall"
9 | "time"
10 | )
11 |
12 | // LocalStorage provides access to the local filesystem.
13 | type LocalStorage interface {
14 | Prefix() string
15 | Read(key string) (*os.File, error)
16 | Stat(key string) (os.FileInfo, error)
17 | Delete(key string) error
18 | Write(key string, body io.Reader) (int64, error)
19 | ListFiles(prefix string) ([]os.FileInfo, error)
20 | CheckAccess(prefix string) error
21 | DiskStats() (*DiskStats, error)
22 | }
23 |
24 | type localStorage struct {
25 | prefix string
26 | }
27 |
28 | func NewLocalStorage(prefix string) LocalStorage {
29 | return &localStorage{
30 | prefix: prefix,
31 | }
32 | }
33 |
34 | func (l *localStorage) Prefix() string {
35 | return l.prefix
36 | }
37 |
38 | func (l *localStorage) Read(key string) (*os.File, error) {
39 | return os.OpenFile(filepath.Join(l.prefix, key), os.O_RDONLY, 0600)
40 | }
41 |
42 | func (l *localStorage) Stat(key string) (os.FileInfo, error) {
43 | return os.Stat(filepath.Join(l.prefix, key))
44 | }
45 |
46 | func (l *localStorage) Delete(key string) error {
47 | return os.Remove(filepath.Join(l.prefix, key))
48 | }
49 |
50 | func (l *localStorage) Write(key string, body io.Reader) (int64, error) {
51 | f, err := os.OpenFile(filepath.Join(l.prefix, key), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0600)
52 | if err != nil {
53 | return 0, err
54 | }
55 | defer f.Close()
56 | return io.Copy(f, body)
57 | }
58 |
59 | func (l *localStorage) ListFiles(path string) ([]os.FileInfo, error) {
60 | var infos []os.FileInfo
61 | path = filepath.Join(l.prefix, path)
62 | err := filepath.Walk(path, func(name string, info os.FileInfo, err error) error {
63 | if err != nil {
64 | return err
65 | } else if info.IsDir() {
66 | if path == name {
67 | return nil
68 | }
69 | return filepath.SkipDir
70 | }
71 | infos = append(infos, info)
72 | return nil
73 | })
74 | if err != nil {
75 | return nil, err
76 | }
77 | return infos, nil
78 | }
79 |
80 | func (l *localStorage) CheckAccess(path string) error {
81 | body := []byte(time.Now().UTC().String())
82 | key := filepath.Join(path, "_objstore_touch")
83 | _, err := l.Write(key, bytes.NewReader(body))
84 | return err
85 | }
86 |
87 | type DiskStats struct {
88 | BytesAll uint64 `json:"bytes_all"`
89 | BytesUsed uint64 `json:"bytes_used"`
90 | BytesFree uint64 `json:"bytes_free"`
91 | }
92 |
93 | func (l *localStorage) DiskStats() (*DiskStats, error) {
94 | var fs syscall.Statfs_t
95 | if err := syscall.Statfs(l.prefix, &fs); err != nil {
96 | return nil, err
97 | }
98 | ds := &DiskStats{
99 | BytesAll: fs.Blocks * uint64(fs.Bsize),
100 | BytesFree: fs.Bfree * uint64(fs.Bsize),
101 | }
102 | ds.BytesUsed = ds.BytesAll - ds.BytesFree
103 | return ds, nil
104 | }
105 |
--------------------------------------------------------------------------------
/cluster/client.go:
--------------------------------------------------------------------------------
1 | package cluster
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "io"
8 | "net"
9 | "net/http"
10 | "strings"
11 |
12 | "github.com/astranet/astranet"
13 | "github.com/astranet/astranet/addr"
14 | )
15 |
16 | type PrivateClient struct {
17 | router astranet.AstraNet
18 | cli *http.Client
19 | }
20 |
21 | // NewPrivateClient initializes a new client for the virtual network.
22 | // Obtain router handle from an initialized private API server.
23 | func NewPrivateClient(router astranet.AstraNet) *PrivateClient {
24 | return &PrivateClient{
25 | router: router,
26 | cli: &http.Client{
27 | Transport: newHTTPTransport(router),
28 | },
29 | }
30 | }
31 |
32 | func newHTTPTransport(router astranet.AstraNet) *http.Transport {
33 | return &http.Transport{
34 | DisableKeepAlives: true,
35 | Dial: func(network, addr string) (net.Conn, error) {
36 | host, _, err := net.SplitHostPort(addr)
37 | if err != nil {
38 | return nil, err
39 | }
40 | return router.Dial(network, host)
41 | },
42 | }
43 | }
44 |
45 | func nodeURI(id string) string {
46 | return "http://objstore-" + id
47 | }
48 |
49 | type NodeIter func(id, addr, vaddr string) error
50 |
51 | func (p *PrivateClient) ForEachNode(iterFunc NodeIter) error {
52 | return forEachNode(p.router, iterFunc)
53 | }
54 |
55 | func forEachNode(router astranet.AstraNet, iterFunc NodeIter) error {
56 | services := router.Services()
57 | seen := make(map[string]bool)
58 | for _, info := range services {
59 | if !strings.HasPrefix(info.Service, "objstore-") {
60 | continue
61 | }
62 | if info.Upstream == nil {
63 | continue
64 | }
65 | nodeID := strings.TrimPrefix(strings.Split(info.Service, ".")[0], "objstore-")
66 | host, _, _ := net.SplitHostPort(info.Upstream.RAddr().String())
67 | if seen[nodeID+host] {
68 | continue
69 | } else {
70 | seen[nodeID+host] = true
71 | }
72 | vaddr := getAddr(info.Host, info.Port)
73 | if err := iterFunc(nodeID, host, vaddr); err == RangeStop {
74 | return nil
75 | } else if err != nil {
76 | return err
77 | }
78 | }
79 | return nil
80 | }
81 |
82 | func getAddr(host uint64, port uint32) string {
83 | return fmt.Sprintf("%s:%d", addr.Uint2Host(host), port)
84 | }
85 |
86 | var (
87 | RangeStop = errors.New("stop")
88 | ForEachStop = RangeStop
89 | )
90 |
91 | func (p *PrivateClient) GET(ctx context.Context, nodeID, path string, body io.Reader) (*http.Response, error) {
92 | req, err := http.NewRequest("GET", nodeURI(nodeID)+path, nil)
93 | req = req.WithContext(ctx)
94 | if err != nil {
95 | return nil, err
96 | }
97 | return p.cli.Do(req)
98 | }
99 |
100 | func (p *PrivateClient) POST(ctx context.Context, nodeID, path string, body io.Reader) (*http.Response, error) {
101 | req, err := http.NewRequest("POST", nodeURI(nodeID)+path, body)
102 | req = req.WithContext(ctx)
103 | if err != nil {
104 | return nil, err
105 | }
106 | return p.cli.Do(req)
107 | }
108 |
--------------------------------------------------------------------------------
/journal/meta.go:
--------------------------------------------------------------------------------
1 | package journal
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 | "strings"
7 | "time"
8 | )
9 |
10 | type FileMeta struct {
11 | ID string `msgp:"0" json:"id"`
12 | Name string `msgp:"1" json:"name"`
13 | Size int64 `msgp:"2" json:"size"`
14 | Timestamp int64 `msgp:"3" json:"timestamp"`
15 | UserMeta map[string]string `msgp:"4" json:"user_meta"`
16 | IsSymlink bool `msgp:"5" json:"is_symlink"`
17 | Consistency ConsistencyLevel `msgp:"6" json:"consistency"`
18 | IsDeleted bool `msgp:"7" json:"is_deleted"`
19 | IsFetched bool `msgp:"8" json:"is_fetched"`
20 | }
21 |
22 | func (f *FileMeta) Map() map[string]string {
23 | m := map[string]string{
24 | "id": f.ID,
25 | "name": f.Name,
26 | "size": strconv.FormatInt(f.Size, 10),
27 | "timestamp": strconv.FormatInt(f.Timestamp, 10),
28 | "consistency": strconv.Itoa(int(f.Consistency)),
29 | }
30 | for k, v := range f.UserMeta {
31 | m["usermeta-"+k] = v
32 | }
33 | return m
34 | }
35 |
36 | func (f *FileMeta) Unmap(m map[string]string) {
37 | userMeta := make(map[string]string, len(m))
38 | for k, v := range m {
39 | switch k = strings.ToLower(k); k {
40 | case "id":
41 | f.ID = v
42 | case "name":
43 | f.Name = v
44 | case "size":
45 | f.Size, _ = strconv.ParseInt(v, 10, 64)
46 | case "timestamp":
47 | f.Timestamp, _ = strconv.ParseInt(v, 10, 64)
48 | case "consistency":
49 | level, _ := strconv.Atoi(v)
50 | if level == 0 {
51 | // at least
52 | f.Consistency = ConsistencyS3
53 | } else {
54 | f.Consistency = (ConsistencyLevel)(level)
55 | }
56 | default:
57 | if !strings.HasPrefix(k, "usermeta-") {
58 | continue
59 | }
60 | k = strings.TrimPrefix(k, "usermeta-")
61 | userMeta[k] = v
62 | }
63 | }
64 | f.UserMeta = userMeta
65 | }
66 |
67 | type FileMetaList []*FileMeta
68 |
69 | func (m FileMeta) String() string {
70 | if m.IsDeleted {
71 | return fmt.Sprintf("%s: %s (deleted)", m.ID, m.Name)
72 | }
73 | return fmt.Sprintf("%s: %s (%db->%v)", m.ID, m.Name, m.Size, m.IsSymlink)
74 | }
75 |
76 | type ConsistencyLevel int
77 |
78 | const (
79 | // ConsistencyLocal flags file for local persistence only, implying
80 | // that the file body will be stored on a single node. Default.
81 | ConsistencyLocal ConsistencyLevel = 0
82 | // ConsistencyS3 flags file for local+S3 persistence, implying that the file
83 | // body will be stored on a single node and Amazon S3.
84 | ConsistencyS3 ConsistencyLevel = 1
85 | // ConsistencyFull flags file to be replicated across all existing nodes in cluster and S3.
86 | ConsistencyFull ConsistencyLevel = 2
87 | )
88 |
89 | type ID string
90 |
91 | type JournalMeta struct {
92 | ID ID `msgp:"0" json:"journal_id"`
93 | CreatedAt int64 `msgp:"1" json:"created_at"`
94 | JoinedAt int64 `msgp:"2" json:"joined_at"`
95 | FirstKey string `msgp:"3" json:"first_key"`
96 | LastKey string `msgp:"4" json:"last_key"`
97 | CountTotal int `msgp:"5" json:"count_total"`
98 | }
99 |
100 | func (j JournalMeta) String() string {
101 | if len(j.FirstKey) == 0 {
102 | j.FirstKey = "?"
103 | }
104 | if len(j.LastKey) == 0 {
105 | j.LastKey = "?"
106 | }
107 | ts := time.Unix(0, j.CreatedAt).UTC().Format(time.StampMilli)
108 | return fmt.Sprintf("%s (%s): %s-%s (count: %d) joined: %v",
109 | j.ID, ts, j.FirstKey, j.LastKey, j.CountTotal, j.JoinedAt > 0)
110 | }
111 |
--------------------------------------------------------------------------------
/cluster/manager.go:
--------------------------------------------------------------------------------
1 | package cluster
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "encoding/json"
7 | "errors"
8 | "io"
9 | "io/ioutil"
10 |
11 | "sphere.software/objstore/journal"
12 | )
13 |
14 | type ClusterManager interface {
15 | ListNodes() ([]*NodeInfo, error)
16 | Announce(ctx context.Context, nodeID string, event *EventAnnounce) error
17 | GetObject(ctx context.Context, nodeID string, id string) (io.ReadCloser, error)
18 | Sync(ctx context.Context, nodeID string,
19 | list journal.FileMetaList) (added, deleted journal.FileMetaList, err error)
20 | }
21 |
22 | type NodeInfo struct {
23 | ID string `json:"id"`
24 | Addr string `json:"addr"`
25 | VAddr string `json:"vaddr"`
26 | }
27 |
28 | func NewClusterManager(cli *PrivateClient, nodeID string) ClusterManager {
29 | return &clusterManager{
30 | cli: cli,
31 | nodeID: nodeID,
32 | }
33 | }
34 |
35 | type clusterManager struct {
36 | nodeID string
37 | cli *PrivateClient
38 | }
39 |
40 | func (c *clusterManager) ListNodes() ([]*NodeInfo, error) {
41 | var nodes []*NodeInfo
42 | if err := c.cli.ForEachNode(func(id, addr, vaddr string) error {
43 | nodes = append(nodes, &NodeInfo{
44 | ID: id,
45 | Addr: addr,
46 | VAddr: vaddr,
47 | })
48 | return nil
49 | }); err != nil {
50 | return nil, err
51 | }
52 | return nodes, nil
53 | }
54 |
55 | func (c *clusterManager) Announce(ctx context.Context, nodeID string, event *EventAnnounce) error {
56 | body, _ := json.Marshal(event)
57 | resp, err := c.cli.POST(ctx, nodeID, "/private/v1/announce", bytes.NewReader(body))
58 | if err != nil {
59 | return err
60 | }
61 | respBody, _ := ioutil.ReadAll(resp.Body)
62 | resp.Body.Close()
63 | if resp.StatusCode != 200 {
64 | if len(respBody) > 0 {
65 | err := errors.New(string(respBody))
66 | return err
67 | }
68 | return errors.New(resp.Status)
69 | }
70 | return nil
71 | }
72 |
73 | var ErrNotFound = errors.New("not found")
74 |
75 | func (c *clusterManager) GetObject(ctx context.Context, nodeID string, id string) (io.ReadCloser, error) {
76 | resp, err := c.cli.GET(ctx, nodeID, "/private/v1/get/"+id, nil)
77 | if err != nil {
78 | return nil, err
79 | }
80 | if resp.StatusCode == 404 {
81 | resp.Body.Close()
82 | return nil, ErrNotFound
83 | } else if resp.StatusCode != 200 {
84 | respBody, _ := ioutil.ReadAll(resp.Body)
85 | resp.Body.Close()
86 | if len(respBody) > 0 {
87 | err := errors.New(string(respBody))
88 | return nil, err
89 | }
90 | err := errors.New(resp.Status)
91 | return nil, err
92 | }
93 | return resp.Body, nil
94 | }
95 |
96 | type SyncResponse struct {
97 | Added journal.FileMetaList `json:"list_added"`
98 | Deleted journal.FileMetaList `json:"list_deleted"`
99 | }
100 |
101 | func (c *clusterManager) Sync(ctx context.Context, nodeID string,
102 | list journal.FileMetaList) (added, deleted journal.FileMetaList, err error) {
103 |
104 | body, _ := json.Marshal(list)
105 | resp, err := c.cli.POST(ctx, nodeID, "/private/v1/sync", bytes.NewReader(body))
106 | if err != nil {
107 | return nil, nil, err
108 | }
109 | respBody, _ := ioutil.ReadAll(resp.Body)
110 | resp.Body.Close()
111 | if resp.StatusCode != 200 {
112 | if len(respBody) > 0 {
113 | err := errors.New(string(respBody))
114 | return nil, nil, err
115 | }
116 | return nil, nil, errors.New(resp.Status)
117 | }
118 | var syncResp SyncResponse
119 | if err := json.Unmarshal(respBody, &syncResp); err != nil {
120 | return nil, nil, err
121 | }
122 | return syncResp.Added, syncResp.Deleted, nil
123 | }
124 |
--------------------------------------------------------------------------------
/glide.lock:
--------------------------------------------------------------------------------
1 | hash: dac827ec9b936d98e9d57a9cbb2a4b436a4ed0a8a20f331a805cf0d9a66c041b
2 | updated: 2017-08-29T01:39:08.521273101+03:00
3 | imports:
4 | - name: github.com/astranet/astranet
5 | version: ededf87e9f24d6482bf900c46bcf57e52cb73cee
6 | subpackages:
7 | - addr
8 | - glog
9 | - listener
10 | - protocol
11 | - route
12 | - service
13 | - skykiss
14 | - socket
15 | - transport
16 | - name: github.com/astranet/btree-2d
17 | version: 4b00686449f2c5e8cea67a72e64db87b5244b1af
18 | subpackages:
19 | - lockie
20 | - util
21 | - uuid
22 | - name: github.com/aws/aws-sdk-go
23 | version: e63027ac6e05f6d4ae9f97ce0294d7468ca652da
24 | subpackages:
25 | - aws
26 | - aws/awserr
27 | - aws/awsutil
28 | - aws/client
29 | - aws/client/metadata
30 | - aws/corehandlers
31 | - aws/credentials
32 | - aws/credentials/ec2rolecreds
33 | - aws/credentials/endpointcreds
34 | - aws/credentials/stscreds
35 | - aws/defaults
36 | - aws/ec2metadata
37 | - aws/endpoints
38 | - aws/request
39 | - aws/session
40 | - aws/signer/v4
41 | - internal/shareddefaults
42 | - private/protocol
43 | - private/protocol/query
44 | - private/protocol/query/queryutil
45 | - private/protocol/rest
46 | - private/protocol/restxml
47 | - private/protocol/xml/xmlutil
48 | - service/s3
49 | - service/sts
50 | - name: github.com/boltdb/bolt
51 | version: 2f1ce7a837dcb8da3ec595b1dac9d0632f0f99e8
52 | - name: github.com/cenk/backoff
53 | version: cdf48bbc1eb78d1349cbda326a4a037f7ba565c6
54 | - name: github.com/cznic/b
55 | version: 6955404bf550e1eae1bf83121739078b027f4547
56 | - name: github.com/gin-contrib/sse
57 | version: 22d885f9ecc78bf4ee5d72b937e4bbcdc58e8cae
58 | - name: github.com/gin-gonic/gin
59 | version: d459835d2b077e44f7c9b453505ee29881d5d12d
60 | subpackages:
61 | - binding
62 | - render
63 | - name: github.com/go-ini/ini
64 | version: c787282c39ac1fc618827141a1f762240def08a3
65 | - name: github.com/golang/protobuf
66 | version: 2402d76f3d41f928c7902a765dfc872356dd3aad
67 | subpackages:
68 | - proto
69 | - name: github.com/jawher/mow.cli
70 | version: a459d5906bb7a9c5eda7c4d02eec7c541120226e
71 | - name: github.com/jmespath/go-jmespath
72 | version: bd40a432e4c76585ef6b72d3fd96fb9b6dc7b68d
73 | - name: github.com/mattn/go-isatty
74 | version: dda3de49cbfcec471bd7a70e6cc01fcc3ff90109
75 | - name: github.com/oklog/ulid
76 | version: 66bb6560562feca7045b23db1ae85b01260f87c5
77 | - name: github.com/philhofer/fwd
78 | version: 1612a298117663d7bc9a760ae20d383413859798
79 | - name: github.com/serialx/hashring
80 | version: 75d57fa264ad17fd929304dfdb02c8e278c5c01c
81 | - name: github.com/tinylib/msgp
82 | version: b2b6a672cf1e5b90748f79b8b81fc8c5cf0571a1
83 | subpackages:
84 | - msgp
85 | - name: github.com/ugorji/go
86 | version: c88ee250d0221a57af388746f5cf03768c21d6e2
87 | subpackages:
88 | - codec
89 | - name: github.com/xlab/closer
90 | version: 89cd22812c4fd5188746092cd10992caeffc75ab
91 | - name: golang.org/x/sys
92 | version: 075e574b89e4c2d22f2286a7e2b919519c6f3547
93 | subpackages:
94 | - unix
95 | - name: gopkg.in/go-playground/validator.v8
96 | version: c193cecd124b5cc722d7ee5538e945bdb3348435
97 | - name: gopkg.in/yaml.v2
98 | version: a3f3340b5840cee44f372bddb5880fcbc419b46a
99 | testImports:
100 | - name: github.com/davecgh/go-spew
101 | version: 6d212800a42e8ab5c146b8ace3490ee17e5225f9
102 | subpackages:
103 | - spew
104 | - name: github.com/pmezard/go-difflib
105 | version: d8ed2627bdf02c080bf22230dbb337003b7aba2d
106 | subpackages:
107 | - difflib
108 | - name: github.com/stretchr/testify
109 | version: 69483b4bd14f5845b5a1e55bca19e954e827f1d0
110 | subpackages:
111 | - assert
112 |
--------------------------------------------------------------------------------
/api/public.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/gin-gonic/gin"
7 |
8 | "sphere.software/objstore"
9 | )
10 |
11 | type PublicServer struct {
12 | nodeID string
13 |
14 | mux *gin.Engine
15 | }
16 |
17 | func NewPublicServer(nodeID string) *PublicServer {
18 | return &PublicServer{
19 | nodeID: nodeID,
20 | }
21 | }
22 |
23 | func (p *PublicServer) ListenAndServe(addr string) error {
24 | return p.mux.Run(addr)
25 | }
26 |
27 | func (p *PublicServer) RouteAPI(store objstore.Store) {
28 | r := gin.Default()
29 | r.GET("/api/v1/get/:id", p.GetHandler(store))
30 | r.GET("/api/v1/meta/:id", p.MetaHandler(store))
31 | r.POST("/api/v1/put", p.PutHandler(store))
32 | r.POST("/api/v1/delete/:id", p.DeleteHandler(store))
33 | r.GET("/api/v1/id", p.IDHandler())
34 | r.GET("/api/v1/version", p.VersionHandler())
35 | r.GET("/api/v1/ping", p.PingHandler())
36 | r.GET("/api/v1/stats", p.StatsHandler(store))
37 | p.mux = r
38 | }
39 |
40 | func (p *PublicServer) PingHandler() gin.HandlerFunc {
41 | return func(c *gin.Context) {
42 | c.String(200, p.nodeID)
43 | }
44 | }
45 |
46 | func (p *PublicServer) IDHandler() gin.HandlerFunc {
47 | return func(c *gin.Context) {
48 | c.String(200, objstore.GenerateID())
49 | }
50 | }
51 |
52 | func (p *PublicServer) VersionHandler() gin.HandlerFunc {
53 | return func(c *gin.Context) {
54 | // TODO: version generation from commit ID
55 | c.String(200, "dev")
56 | }
57 | }
58 |
59 | const (
60 | KB = 1024
61 | MB = 1024 * KB
62 | GB = 1024 * MB
63 | )
64 |
65 | type DiskStats struct {
66 | *objstore.DiskStats
67 |
68 | KBytesAll float64 `json:"kb_all"`
69 | KBytesUsed float64 `json:"kb_used"`
70 | KBytesFree float64 `json:"kb_free"`
71 |
72 | MBytesAll float64 `json:"mb_all"`
73 | MBytesUsed float64 `json:"mb_used"`
74 | MBytesFree float64 `json:"mb_free"`
75 |
76 | GBytesAll float64 `json:"gb_all"`
77 | GBytesUsed float64 `json:"gb_used"`
78 | GBytesFree float64 `json:"gb_free"`
79 | }
80 |
81 | type Stats struct {
82 | DiskStats *DiskStats `json:"disk_stats"`
83 | // TODO: other stats
84 | }
85 |
86 | func (p *PublicServer) StatsHandler(store objstore.Store) gin.HandlerFunc {
87 | return func(c *gin.Context) {
88 | var stats Stats
89 | if ds, err := store.DiskStats(); err == nil {
90 | stats.DiskStats = &DiskStats{
91 | DiskStats: ds,
92 | }
93 | stats.DiskStats.KBytesAll = float64(ds.BytesAll) / KB
94 | stats.DiskStats.KBytesUsed = float64(ds.BytesUsed) / KB
95 | stats.DiskStats.KBytesFree = float64(ds.BytesFree) / KB
96 | stats.DiskStats.MBytesAll = float64(ds.BytesAll) / MB
97 | stats.DiskStats.MBytesUsed = float64(ds.BytesUsed) / MB
98 | stats.DiskStats.MBytesFree = float64(ds.BytesFree) / MB
99 | stats.DiskStats.GBytesAll = float64(ds.BytesAll) / GB
100 | stats.DiskStats.GBytesUsed = float64(ds.BytesUsed) / GB
101 | stats.DiskStats.GBytesFree = float64(ds.BytesFree) / GB
102 | }
103 | c.JSON(200, stats)
104 | }
105 | }
106 |
107 | func (p *PublicServer) GetHandler(store objstore.Store) gin.HandlerFunc {
108 | return func(c *gin.Context) {
109 | var fetch bool
110 | fetchOption := c.Request.Header.Get("X-Meta-Fetch")
111 | if strings.ToLower(fetchOption) == "true" || fetchOption == "1" {
112 | fetch = true
113 | }
114 | r, meta, err := store.FindObject(c, c.Param("id"), fetch)
115 | if err == objstore.ErrNotFound {
116 | if meta != nil {
117 | serveMeta(c, meta)
118 | }
119 | c.Status(404)
120 | return
121 | } else if err != nil {
122 | c.String(500, "error: %v", err)
123 | return
124 | }
125 | serveObject(c, r, meta)
126 | }
127 | }
128 |
129 | func (p *PublicServer) MetaHandler(store objstore.Store) gin.HandlerFunc {
130 | return func(c *gin.Context) {
131 | meta, err := store.HeadObject(c.Param("id"))
132 | if err == objstore.ErrNotFound {
133 | if meta != nil {
134 | serveMeta(c, meta)
135 | }
136 | c.Status(404)
137 | return
138 | } else if err != nil {
139 | c.String(500, "error: %v", err)
140 | return
141 | }
142 | c.JSON(200, meta)
143 | }
144 | }
145 |
146 | func (p *PublicServer) PutHandler(store objstore.Store) gin.HandlerFunc {
147 | return func(c *gin.Context) {
148 | putObject(c, store)
149 | }
150 | }
151 |
152 | func (p *PublicServer) DeleteHandler(store objstore.Store) gin.HandlerFunc {
153 | return func(c *gin.Context) {
154 | deleteObject(c, store)
155 | }
156 | }
157 |
--------------------------------------------------------------------------------
/storage/remote.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "io"
7 | "mime"
8 | "path"
9 | "path/filepath"
10 | "strings"
11 | "time"
12 |
13 | "github.com/aws/aws-sdk-go/aws"
14 | "github.com/aws/aws-sdk-go/aws/session"
15 | "github.com/aws/aws-sdk-go/service/s3"
16 | )
17 |
18 | // RemoteStorage provides object access backend,
19 | // it's usually an AWS S3 client pointed to a specific bucket.
20 | type RemoteStorage interface {
21 | PutObject(key string, r io.ReadSeeker, meta map[string]string) (*Spec, error)
22 | GetObject(key string, version ...string) (*Spec, error)
23 | HeadObject(key string, version ...string) (*Spec, error)
24 | ListObjects(prefix string, startAfter ...string) ([]*Spec, error)
25 | CheckAccess(prefix string) error
26 | Bucket() string
27 | }
28 |
29 | var ErrNotFound = errors.New("NoSuchKey: The specified key does not exist.")
30 |
31 | type s3Storage struct {
32 | bucket string
33 | cli *s3.S3
34 | }
35 |
36 | func NewS3Storage(region, bucket string) RemoteStorage {
37 | cli := s3.New(session.New(&aws.Config{
38 | Region: aws.String(region),
39 | }))
40 | return &s3Storage{
41 | bucket: bucket,
42 | cli: cli,
43 | }
44 | }
45 |
46 | type Spec struct {
47 | Path string
48 | Key string
49 | Body io.ReadCloser
50 | ETag string
51 | Version string
52 | UpdatedAt time.Time
53 | Meta map[string]string
54 | Size int64
55 | }
56 |
57 | func (s *s3Storage) Bucket() string {
58 | return s.bucket
59 | }
60 |
61 | func (s *s3Storage) GetObject(key string, version ...string) (*Spec, error) {
62 | obj, err := s.cli.GetObject(&s3.GetObjectInput{
63 | Key: aws.String(key),
64 | Bucket: aws.String(s.bucket),
65 | VersionId: awsStringMaybe(version),
66 | })
67 | if err != nil {
68 | if strings.HasPrefix(err.Error(), "NoSuchKey") {
69 | return nil, ErrNotFound
70 | }
71 | return nil, err
72 | }
73 | spec := &Spec{
74 | Path: fullPath(s.bucket, key),
75 | Key: key,
76 | Body: obj.Body,
77 | ETag: aws.StringValue(obj.ETag),
78 | Version: aws.StringValue(obj.VersionId),
79 | UpdatedAt: aws.TimeValue(obj.LastModified),
80 | Size: aws.Int64Value(obj.ContentLength),
81 | Meta: aws.StringValueMap(obj.Metadata),
82 | }
83 | return spec, nil
84 | }
85 |
86 | func (s *s3Storage) HeadObject(key string, version ...string) (*Spec, error) {
87 | obj, err := s.cli.HeadObject(&s3.HeadObjectInput{
88 | Key: aws.String(key),
89 | Bucket: aws.String(s.bucket),
90 | VersionId: awsStringMaybe(version),
91 | })
92 | if err != nil {
93 | return nil, err
94 | }
95 | spec := &Spec{
96 | Path: fullPath(s.bucket, key),
97 | Key: key,
98 | ETag: aws.StringValue(obj.ETag),
99 | Version: aws.StringValue(obj.VersionId),
100 | UpdatedAt: aws.TimeValue(obj.LastModified),
101 | Size: aws.Int64Value(obj.ContentLength),
102 | }
103 | return spec, nil
104 | }
105 |
106 | func (s *s3Storage) ListObjects(prefix string, startAfter ...string) ([]*Spec, error) {
107 | var token *string
108 | var specs []*Spec
109 | for {
110 | list, err := s.cli.ListObjectsV2(&s3.ListObjectsV2Input{
111 | Bucket: aws.String(s.bucket),
112 | Prefix: aws.String(prefix),
113 | StartAfter: awsStringMaybe(startAfter),
114 | // pagination controls
115 | MaxKeys: aws.Int64(100),
116 | ContinuationToken: token,
117 | })
118 | if err != nil {
119 | return nil, err
120 | }
121 | for _, obj := range list.Contents {
122 | key := aws.StringValue(obj.Key)
123 | specs = append(specs, &Spec{
124 | Path: fullPath(s.bucket, key),
125 | Key: key,
126 | ETag: aws.StringValue(obj.ETag),
127 | UpdatedAt: aws.TimeValue(obj.LastModified),
128 | Size: aws.Int64Value(obj.Size),
129 | })
130 | }
131 | token = list.ContinuationToken
132 | if *list.IsTruncated == false {
133 | return specs, nil
134 | } else if token == nil {
135 | return specs, nil
136 | }
137 | }
138 | }
139 |
140 | func (s *s3Storage) CheckAccess(prefix string) error {
141 | body := []byte(time.Now().UTC().String())
142 | _, err := s.cli.PutObject(&s3.PutObjectInput{
143 | Body: newReadSeeker(body),
144 | Bucket: aws.String(s.bucket),
145 | ContentType: aws.String("text/plain"),
146 | Key: aws.String(path.Join(prefix, "_objstore_touch")),
147 | })
148 | return err
149 | }
150 |
151 | func (s *s3Storage) PutObject(key string, r io.ReadSeeker, meta map[string]string) (*Spec, error) {
152 | var ctype string
153 | if len(meta["name"]) > 0 {
154 | ctype = mime.TypeByExtension(filepath.Ext(meta["name"]))
155 | }
156 | obj, err := s.cli.PutObject(&s3.PutObjectInput{
157 | Body: r,
158 | Bucket: aws.String(s.bucket),
159 | Key: aws.String(key),
160 | ContentType: aws.String(ctype),
161 | Metadata: aws.StringMap(meta),
162 | })
163 | if err != nil {
164 | return nil, err
165 | }
166 | spec := &Spec{
167 | Path: fullPath(s.bucket, key),
168 | Key: key,
169 | ETag: aws.StringValue(obj.ETag),
170 | Version: aws.StringValue(obj.VersionId),
171 | Meta: meta,
172 | }
173 | return spec, err
174 | }
175 |
176 | func fullPath(bucket, key string) string {
177 | return fmt.Sprintf("s3://%s/%s", bucket, key)
178 | }
179 |
180 | func awsStringMaybe(v []string) *string {
181 | if len(v) > 0 {
182 | return aws.String(v[0])
183 | }
184 | return nil
185 | }
186 |
--------------------------------------------------------------------------------
/cmd/objstore/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "log"
5 | "os"
6 | "path/filepath"
7 | "runtime"
8 | "sync"
9 | "time"
10 |
11 | "github.com/boltdb/bolt"
12 | "github.com/gin-gonic/gin"
13 | "github.com/jawher/mow.cli"
14 | "github.com/xlab/closer"
15 |
16 | "sphere.software/objstore"
17 | "sphere.software/objstore/api"
18 | "sphere.software/objstore/cluster"
19 | "sphere.software/objstore/journal"
20 | "sphere.software/objstore/storage"
21 | )
22 |
23 | var app = cli.App("objstore", "A Multi-Master Distributed Caching Layer for Amazon S3.\nVersion 0.1\thttp://github.com/SphereSoftware/objstore")
24 |
25 | var (
26 | debugEnabled bool
27 | debugLevel = app.Int(cli.IntOpt{
28 | Name: "d debug",
29 | Desc: "Debug level to use, currently 0/1 suppported.",
30 | EnvVar: "APP_DEBUG_LEVEL",
31 | Value: 0,
32 | HideValue: true,
33 | })
34 | clusterNodes = app.Strings(cli.StringsOpt{
35 | Name: "N nodes",
36 | Desc: "A list of cluster nodes to join for discovery and journal updates",
37 | EnvVar: "APP_CLUSTER_NODES",
38 | Value: []string{},
39 | HideValue: true,
40 | })
41 | clusterName = app.String(cli.StringOpt{
42 | Name: "T tag",
43 | Desc: "Cluster tag name",
44 | EnvVar: "APP_CLUSTER_TAGNAME",
45 | Value: "default",
46 | })
47 | privateAddr = app.String(cli.StringOpt{
48 | Name: "private-addr",
49 | Desc: "Listen address for cluster discovery and private API",
50 | EnvVar: "NET_PRIVATE_ADDR",
51 | Value: "0.0.0.0:11999",
52 | })
53 | debugAddr = app.String(cli.StringOpt{
54 | Name: "debug-addr",
55 | Desc: "Listen address for private API debugging using external tools",
56 | EnvVar: "NET_DEBUG_ADDR",
57 | Value: "",
58 | })
59 | publicAddr = app.String(cli.StringOpt{
60 | Name: "public-addr",
61 | Desc: "Listen address for external access and public HTTP API",
62 | EnvVar: "NET_PUBLIC_ADDR",
63 | Value: "0.0.0.0:10999",
64 | })
65 | statePrefix = app.String(cli.StringOpt{
66 | Name: "state-dir",
67 | Desc: "Directory where to keep local state and journals.",
68 | EnvVar: "APP_STATE_DIR",
69 | Value: "state/",
70 | })
71 | localPrefix = app.String(cli.StringOpt{
72 | Name: "files-dir",
73 | Desc: "Directory where to keep local files.",
74 | EnvVar: "APP_FILES_DIR",
75 | Value: "files/",
76 | })
77 | s3Region = app.String(cli.StringOpt{
78 | Name: "R region",
79 | Desc: "Amazon S3 region name",
80 | EnvVar: "S3_REGION_NAME",
81 | Value: "us-east-1",
82 | })
83 | s3Bucket = app.String(cli.StringOpt{
84 | Name: "B bucket",
85 | Desc: "Amazon S3 bucket name",
86 | EnvVar: "S3_BUCKET_NAME",
87 | Value: "00-objstore-test",
88 | })
89 | )
90 |
91 | func init() {
92 | log.SetFlags(log.LstdFlags | log.Lshortfile)
93 | }
94 |
95 | func main() {
96 | defer closer.Close()
97 |
98 | closer.Bind(func() {
99 | runtime.GC()
100 | log.Println("bye!")
101 | })
102 |
103 | app.Action = appMain
104 | app.Before = func() {
105 | if *debugLevel > 0 {
106 | debugEnabled = true
107 | }
108 | if debugEnabled {
109 | gin.SetMode(gin.DebugMode)
110 | } else {
111 | gin.SetMode(gin.ReleaseMode)
112 | }
113 | }
114 | if err := app.Run(os.Args); err != nil {
115 | closer.Fatalln(err)
116 | }
117 | }
118 |
119 | func appMain() {
120 | db, err := openStateDB(*statePrefix)
121 | if err != nil {
122 | closer.Fatalln("[ERR] failed to open state DB:", err)
123 | }
124 | if err := os.MkdirAll(*localPrefix, 0700); err != nil {
125 | closer.Fatalln("[ERR] unable to create local files dir:", err)
126 | }
127 |
128 | nodeID := journal.GetULID()
129 | if debugEnabled {
130 | log.Println("[INFO] node ID:", nodeID)
131 | }
132 |
133 | privateServer := api.NewPrivateServer(nodeID, *clusterName)
134 | privateServer.SetDebug(debugEnabled)
135 | privateClient := cluster.NewPrivateClient(privateServer.Router())
136 | journalManager := journal.NewJournalManager(db)
137 | closer.Bind(func() {
138 | if err := journalManager.Close(); err != nil {
139 | log.Println("[WARN] journal close:", err)
140 | }
141 | })
142 | store, err := objstore.NewStore(nodeID,
143 | storage.NewLocalStorage(*localPrefix),
144 | storage.NewS3Storage(*s3Region, *s3Bucket),
145 | journalManager,
146 | cluster.NewClusterManager(privateClient, nodeID),
147 | )
148 | if err != nil {
149 | closer.Fatalln("[ERR]", err)
150 | }
151 | store.SetDebug(debugEnabled)
152 | privateServer.RouteAPI(store)
153 | if err := privateServer.ListenAndServe(*privateAddr); err != nil {
154 | closer.Fatalln(err)
155 | }
156 |
157 | closer.Bind(func() {
158 | if err := store.Close(); err != nil {
159 | log.Println("[WARN]", err)
160 | }
161 | if debugEnabled {
162 | log.Println("[INFO] waiting for queues")
163 | }
164 | wg := new(sync.WaitGroup)
165 | wg.Add(2)
166 | go func() {
167 | defer wg.Done()
168 | store.WaitInbound(2 * time.Minute)
169 | }()
170 | go func() {
171 | defer wg.Done()
172 | store.WaitOutbound(2 * time.Minute)
173 | }()
174 | wg.Wait()
175 | })
176 |
177 | if len(*clusterNodes) == 0 {
178 | log.Println("[WARN] no additional cluster nodes specified, current node starts solo")
179 | } else {
180 | if debugEnabled {
181 | log.Println("[INFO] joining to cluster", *clusterNodes)
182 | }
183 | if err := privateServer.JoinCluster(*clusterNodes); err != nil {
184 | log.Println("[WARN]", err)
185 | }
186 | }
187 | // expose private API to HTTP clients, so objstore cluster nodes can be debugged
188 | // using browser and external tools.
189 | if debugEnabled && len(*debugAddr) > 0 {
190 | log.Println("[INFO] exposing private API on", *debugAddr)
191 | go func() {
192 | if err := privateServer.ExposeAPI(*debugAddr); err != nil {
193 | closer.Fatalln("[ERR]", err)
194 | }
195 | }()
196 | }
197 |
198 | publicServer := api.NewPublicServer(nodeID)
199 | publicServer.RouteAPI(store)
200 | go func() {
201 | if err := publicServer.ListenAndServe(*publicAddr); err != nil {
202 | closer.Fatalln(err)
203 | }
204 | }()
205 |
206 | closer.Hold()
207 | }
208 |
209 | func openStateDB(prefix string) (*bolt.DB, error) {
210 | if err := os.MkdirAll(prefix, 0700); err != nil {
211 | return nil, err
212 | }
213 | return bolt.Open(filepath.Join(prefix, "state.db"), 0600, &bolt.Options{
214 | Timeout: 30 * time.Second, // wait while trying to open state file
215 | InitialMmapSize: 4 * 1024 * 1024 * 1024, // preallocated space to avoid writers block
216 | })
217 | }
218 |
--------------------------------------------------------------------------------
/journal/manager.go:
--------------------------------------------------------------------------------
1 | package journal
2 |
3 | import (
4 | "bytes"
5 | "errors"
6 | "fmt"
7 | "time"
8 |
9 | "github.com/boltdb/bolt"
10 | )
11 |
12 | type JournalManager interface {
13 | Create(id ID) error
14 | View(id ID, fn JournalIter) error
15 | Update(id ID, fn JournalIter) error
16 |
17 | ForEach(fn JournalIter) error
18 | ForEachUpdate(fn JournalIter) error
19 |
20 | JoinAll(target ID) (*JournalMeta, error)
21 | ListAll() ([]*JournalMeta, error)
22 | ExportAll() (FileMetaList, error)
23 |
24 | Close() error
25 | }
26 |
27 | type JournalIter func(journal Journal, meta *JournalMeta) error
28 |
29 | func NewJournalManager(db *bolt.DB) JournalManager {
30 | return &kvJournalManager{
31 | db: db,
32 | }
33 | }
34 |
35 | type kvJournalManager struct {
36 | db *bolt.DB
37 | }
38 |
39 | func (kv *kvJournalManager) Create(id ID) error {
40 | return kv.db.Update(func(tx *bolt.Tx) error {
41 | journalID := []byte(id)
42 | mapping, err := tx.CreateBucketIfNotExists(mappingBucket)
43 | if err != nil {
44 | return err
45 | }
46 | journals, err := tx.CreateBucketIfNotExists(journalsBucket)
47 | if err != nil {
48 | return err
49 | }
50 | if mapping.Get(journalID) != nil {
51 | return errors.New("kvJournal: journal mapping exists")
52 | }
53 | if journals.Get(journalID) != nil {
54 | return errors.New("kvJournal: journal exists")
55 | }
56 | if _, err := journals.CreateBucket(journalID); err != nil {
57 | err = fmt.Errorf("kvJournal: failed to create journal bucket: %v", err)
58 | return err
59 | }
60 | meta := &JournalMeta{
61 | ID: id,
62 | CreatedAt: time.Now().UnixNano(),
63 | }
64 | data, _ := meta.MarshalMsg(nil)
65 | return mapping.Put(journalID, data)
66 | })
67 | }
68 |
69 | func (kv *kvJournalManager) View(id ID, fn JournalIter) error {
70 | return kv.db.View(func(tx *bolt.Tx) error {
71 | journalID := []byte(id)
72 | mapping := tx.Bucket(mappingBucket)
73 | journals := tx.Bucket(journalsBucket)
74 | data := mapping.Get(journalID)
75 | var meta *JournalMeta
76 | if data == nil {
77 | return errors.New("kvJournal: journal mapping not exists")
78 | } else {
79 | meta = new(JournalMeta)
80 | meta.UnmarshalMsg(data)
81 | }
82 | b := journals.Bucket(journalID)
83 | if b == nil {
84 | return errors.New("kvJournal: journal not exists")
85 | }
86 | journal := NewJournal(id, tx, b)
87 | return fn(journal, meta)
88 | })
89 | }
90 |
91 | func (kv *kvJournalManager) Update(id ID, fn JournalIter) error {
92 | return kv.db.Update(func(tx *bolt.Tx) error {
93 | journalID := []byte(id)
94 | mapping := tx.Bucket(mappingBucket)
95 | journals := tx.Bucket(journalsBucket)
96 | data := mapping.Get(journalID)
97 | var meta *JournalMeta
98 | if data == nil {
99 | return errors.New("kvJournal: journal mapping not exists")
100 | } else {
101 | meta = new(JournalMeta)
102 | meta.UnmarshalMsg(data)
103 | }
104 | b := journals.Bucket(journalID)
105 | if b == nil {
106 | return errors.New("kvJournal: journal not exists")
107 | }
108 | journal := NewJournal(id, tx, b)
109 | return fn(journal, meta)
110 | })
111 | }
112 |
113 | func (kv *kvJournalManager) ListAll() (metaList []*JournalMeta, err error) {
114 | err = kv.db.View(func(tx *bolt.Tx) error {
115 | mapping := tx.Bucket(mappingBucket)
116 | journals := tx.Bucket(journalsBucket)
117 | metaList = make([]*JournalMeta, 0, journals.Stats().KeyN)
118 | cur := journals.Cursor()
119 |
120 | id, _ := cur.First()
121 | for id != nil {
122 | journal := NewJournal(ID(id), tx, journals.Bucket(id))
123 | meta := journal.Meta()
124 | if data := mapping.Get(id); data != nil {
125 | extraMeta := new(JournalMeta)
126 | extraMeta.UnmarshalMsg(data)
127 | meta.CreatedAt = extraMeta.CreatedAt
128 | meta.JoinedAt = extraMeta.JoinedAt
129 | }
130 | metaList = append(metaList, meta)
131 | id, _ = cur.Next()
132 | }
133 | return nil
134 | })
135 | return
136 | }
137 |
138 | func (kv *kvJournalManager) JoinAll(target ID) (*JournalMeta, error) {
139 | kv.Create(target) // for safety reasons ensure that journal exists
140 |
141 | var targetMeta *JournalMeta
142 | err := kv.db.Update(func(tx *bolt.Tx) error {
143 | mapping := tx.Bucket(mappingBucket)
144 | journals := tx.Bucket(journalsBucket)
145 | cur := journals.Cursor()
146 |
147 | journalID := []byte(target)
148 | targetJournal := NewJournal(target, tx, journals.Bucket(journalID))
149 |
150 | id, _ := cur.First()
151 | for id != nil {
152 | if bytes.Equal(id, journalID) {
153 | id, _ = cur.Next()
154 | continue
155 | }
156 | journal := NewJournal(ID(id), tx, journals.Bucket(id))
157 | if _, err := journal.Range("", 0, func(k string, v *FileMeta) error {
158 | if targetJournal.Exists(k) {
159 | // disallow override upon consolidation from older journals
160 | return nil
161 | }
162 | return targetJournal.Set(k, v)
163 | }); err != nil {
164 | return err
165 | }
166 |
167 | meta := journal.Meta()
168 | meta.JoinedAt = time.Now().UnixNano()
169 | meta.ID = target // relocated journal
170 | if data := mapping.Get(id); data != nil {
171 | extraMeta := new(JournalMeta)
172 | extraMeta.UnmarshalMsg(data)
173 | meta.CreatedAt = extraMeta.CreatedAt
174 | }
175 | data, _ := meta.MarshalMsg(nil)
176 | if err := mapping.Put(id, data); err != nil {
177 | return err
178 | }
179 | if err := journals.DeleteBucket(id); err != nil {
180 | return err
181 | }
182 | id, _ = cur.Next()
183 | }
184 |
185 | targetMeta = targetJournal.Meta()
186 | if data := mapping.Get(journalID); data != nil {
187 | extraMeta := new(JournalMeta)
188 | extraMeta.UnmarshalMsg(data)
189 | targetMeta.CreatedAt = extraMeta.CreatedAt
190 | }
191 | data, _ := targetMeta.MarshalMsg(nil)
192 | return mapping.Put(journalID, data)
193 | })
194 | return targetMeta, err
195 | }
196 |
197 | func (kv *kvJournalManager) ForEach(fn JournalIter) error {
198 | return kv.db.View(func(tx *bolt.Tx) error {
199 | mapping := tx.Bucket(mappingBucket)
200 | journals := tx.Bucket(journalsBucket)
201 | cur := journals.Cursor()
202 |
203 | id, _ := cur.First()
204 | for id != nil {
205 | var meta *JournalMeta
206 | if data := mapping.Get(id); data != nil {
207 | meta = new(JournalMeta)
208 | meta.UnmarshalMsg(data)
209 | }
210 | journal := NewJournal(ID(id), tx, journals.Bucket(id))
211 | if err := fn(journal, meta); err == RangeStop {
212 | return nil
213 | } else if err != nil {
214 | return err
215 | }
216 | id, _ = cur.Next()
217 | }
218 | return nil
219 | })
220 | }
221 |
222 | func (kv *kvJournalManager) ForEachUpdate(fn JournalIter) error {
223 | return kv.db.Update(func(tx *bolt.Tx) error {
224 | mapping := tx.Bucket(mappingBucket)
225 | journals := tx.Bucket(journalsBucket)
226 | cur := journals.Cursor()
227 |
228 | id, _ := cur.First()
229 | for id != nil {
230 | var meta *JournalMeta
231 | if data := mapping.Get(id); data != nil {
232 | meta = new(JournalMeta)
233 | meta.UnmarshalMsg(data)
234 | }
235 | journal := NewJournal(ID(id), tx, journals.Bucket(id))
236 | if err := fn(journal, meta); err == RangeStop {
237 | return nil
238 | } else if err != nil {
239 | return err
240 | }
241 | id, _ = cur.Next()
242 | }
243 | return nil
244 | })
245 | }
246 |
247 | func (kv *kvJournalManager) ExportAll() (FileMetaList, error) {
248 | var list FileMetaList
249 | err := kv.db.View(func(tx *bolt.Tx) error {
250 | journals := tx.Bucket(journalsBucket)
251 | cur := journals.Cursor()
252 | id, _ := cur.First()
253 | for id != nil {
254 | journal := NewJournal(ID(id), tx, journals.Bucket(id))
255 | list = append(list, journal.List()...)
256 | id, _ = cur.Next()
257 | }
258 | return nil
259 | })
260 | return list, err
261 | }
262 |
263 | func (kv *kvJournalManager) Close() error {
264 | return kv.db.Close()
265 | }
266 |
267 | var (
268 | RangeStop = errors.New("stop")
269 | ForEachStop = RangeStop
270 | )
271 |
272 | var (
273 | mappingBucket = []byte("mapping")
274 | journalsBucket = []byte("journals")
275 | )
276 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # objstore [](https://goreportcard.com/report/sphere.software/objstore) 
6 | _A Multi-Master Distributed Caching Layer for Amazon S3_
7 |
8 | This project aims to provide an easy to use, self-organising multi-master caching layer for various cloud stoarge backends, e.g. S3. It combines functionality of a simple object storage with added robustness of cross-node journal synchronisation, object replication and cluster auto-discovery.
9 |
10 | We know that Amazon S3 has proven to be fast and reliable, a PaaS solution that acts like a
11 | backbone for many business applications. But the cost of service may become too high
12 | depending on your usage patterns, for example, if your application runs in your own datacenter, then
13 | the file transfer costs will skyrocket. Also request frequency has its limits.
14 |
15 | **Objstore Cluster** aims to mitigate this problem, it's supposed to be running in your datacenter, implementing a near-cache for all files. Its API allows to upload, head, read and delete files by key, like any other object. All related meta-data may be perserved with files as well. This caching layer will upload the file to S3 and store a copy locally, with optional replication among other nodes. Next time you'd access the file, it will be served from a local machine, or its near nodes, in case of a cache miss, it will get the file from S3 directly.
16 |
17 |
18 |
19 |
20 |
21 | The cluster must be robust, altrough it's not required to reach the same levels as traditional DBs or other stores that are required to be highly consistent, a certant amount of fault resilience is important because a dropped cache implies a huge (and unplanned) spike in latency and CoS, which may hurt infrastructure and your wallet. And caches may recover very slowly.
22 |
23 | Objstore leverages a P2P discovery mechanism, so once some nodes are started already, another one might join knowing only one physical IP address. The cluster setups a logical network over persistent TCP connections between nodes and uses an internal HTTP API to share events and data between nodes, eliminating the single point of failure. Everything involves zero configuration, except the HTTP load balancer which may be any of your choice.
24 |
25 |
26 |
27 |
28 |
29 | Node disk sizes are required to be identical, the overall limit of the cluster is limited by size of the smallest disk used for data replication. If you want to expand the size linearly, setup another Object Store cluster and tweak your HTTP load balancer.
30 |
31 | ## Installation
32 |
33 | ```
34 | go get -u sphere.software/objstore/cmd/objstore
35 | ```
36 |
37 | For local Docker builds:
38 |
39 | ```
40 | cd deploy && make local
41 | ```
42 |
43 | For remote / CI Docker builds under Linux:
44 |
45 | ```
46 | cd deploy && make build
47 | ```
48 |
49 | ## Server usage
50 |
51 | ```
52 | $ objstore -h
53 |
54 | Usage: objstore [OPTIONS]
55 |
56 | A Multi-Master Distributed Caching Layer for Amazon S3.
57 | Version 0.1 http://github.com/SphereSoftware/objstore
58 |
59 | Options:
60 | -d, --debug Debug level to use, currently 0/1 suppported. ($APP_DEBUG_LEVEL)
61 | -N, --nodes A list of cluster nodes to join for discovery and journal updates ($APP_CLUSTER_NODES)
62 | -T, --tag="default" Cluster tag name ($APP_CLUSTER_TAGNAME)
63 | --private-addr="0.0.0.0:11999" Listen address for cluster discovery and private API ($NET_PRIVATE_ADDR)
64 | --debug-addr="" Listen address for private API debugging using external tools ($NET_DEBUG_ADDR)
65 | --public-addr="0.0.0.0:10999" Listen address for external access and public HTTP API ($NET_PUBLIC_ADDR)
66 | --state-dir="state/" Directory where to keep local state and journals. ($APP_STATE_DIR)
67 | --files-dir="files/" Directory where to keep local files. ($APP_FILES_DIR)
68 | -R, --region="us-east-1" Amazon S3 region name ($S3_REGION_NAME)
69 | -B, --bucket="00-objstore-test" Amazon S3 bucket name ($S3_BUCKET_NAME)
70 | ```
71 |
72 | Example use, single node:
73 |
74 | ```bash
75 | $ objstore -d 1 # with debug
76 |
77 | [INFO] node ID: 01BRNEKEZGKFSPAT10KZM5A141
78 | [WARN] no additional cluster nodes specified, current node starts solo
79 | [GIN-debug] Listening and serving HTTP on 0.0.0.0:10999
80 | ```
81 |
82 | You can start another nodes, on the same machine or another. If starting on the same machine, make sure that data directories are not colliding and the private/public API ports are different. To start a node that will join to the cluster with first one:
83 |
84 | ```bash
85 | $ objstore -d 1 -N localhost:11999 \
86 | --private-addr="0.0.0.0:11997" --public-addr="0.0.0.0:10997"
87 |
88 | [INFO] node ID: 01BRNKZ01MFSJJDN98F6M0640K
89 | [GIN-debug] Listening and serving HTTP on 0.0.0.0:10997
90 | [INFO] joining to cluster [localhost:11999]
91 | [INFO] sync done
92 | ```
93 |
94 | By checking both nodes logs, you can see that `/private/v1/sync` has been called from each other. After that journals are in sync. More about journal synchronisation and node failure scenarios will be written soon in a standalone document.
95 |
96 | ## Client usage
97 |
98 | At this moment both nodes are listening on the public HTTP API addresses:
99 |
100 | * `localhost:10999`
101 | * `localhost:10997`
102 |
103 | You don't need to use a load balancer to start utilising the cluster, the requests may be directed to any active node in the cluster. Load balancer would allow to split the workload equally, also it helps to avoid calling unresponsive nodes.
104 |
105 | ### Public API endpoints
106 |
107 | ```
108 | GET /api/v1/get/:id
109 | GET /api/v1/meta/:id
110 | POST /api/v1/put
111 | POST /api/v1/delete/:id
112 | GET /api/v1/id
113 | GET /api/v1/version
114 | GET /api/v1/ping
115 | GET /api/v1/stats
116 | ```
117 |
118 | ### How to upload files
119 |
120 | 1. **Generate a new ID.** All files are associated with IDs of [ULID](https://github.com/oklog/ulid) format, so you must generate your own or just ask any node for new ID.
121 | ```bash
122 | $ curl localhost:10999/api/v1/id
123 |
124 | 01BRNMMS1DK3CBD4ZZM2TQ8C5B
125 | ```
126 |
127 | 2. **Choose consistency level** Three levels are available:
128 | ```go
129 | // ConsistencyLocal flags file for local persistence only, implying
130 | // that the file body will be stored on a single node. Default.
131 | ConsistencyLocal ConsistencyLevel = 0
132 | // ConsistencyS3 flags file for local+S3 persistence, implying that the file
133 | // body will be stored on a single node and Amazon S3.
134 | ConsistencyS3 ConsistencyLevel = 1
135 | // ConsistencyFull flags file to be replicated across all existing nodes in cluster and S3.
136 | ConsistencyFull ConsistencyLevel = 2
137 | ```
138 |
139 | 3. **Specify headers** The following headers are available:
140 | * `X-Meta-ID` is a previously generated or retrieved [ULID](https://github.com/oklog/ulid);
141 | * `X-Meta-Name` is the file name, used with extension to serve the content with proper type;
142 | * `X-Meta-ConsistencyLevel` specifies the consistency level for the file, it may be upgraded later;
143 | * `X-Meta-UserMeta` specifies any meta data for the file as JSON map, stored in S3 tags.
144 |
145 | 4. **POST** Example, let's upload `test.txt` with replication across cluster and S3.
146 |
147 | ```
148 | $ curl -d @test.txt -H "X-Meta-ConsistencyLevel: 2" -H "X-Meta-Name: test.txt" \
149 | -H "X-Meta-ID: 01BRNMMS1DK3CBD4ZZM2TQ8C5B" localhost:10999/api/v1/put
150 | ```
151 |
152 | ### How to read files
153 |
154 | Accessing a file is straightforward:
155 |
156 | ```
157 | $ curl localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B
158 |
159 | It works!
160 | ```
161 |
162 | More is going on under the covers. Apparently the file exists on both nodes and S3, but in case when file is stored only on a single node (with level=1), then the node would fetch this file from another one and serve, this fact can be checked by inspecting the headers:
163 |
164 | ```
165 | $ curl -v localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B
166 |
167 | < HTTP/1.1 200 OK
168 | < Accept-Ranges: bytes
169 | < Content-Length: 9
170 | < Content-Type: text/plain; charset=utf-8
171 | < Last-Modified: Tue, 29 Aug 2017 00:11:35 GMT
172 | < X-Meta-Consistencylevel: 2
173 | < X-Meta-Id: 01BRNMMS1DK3CBD4ZZM2TQ8C5B
174 | < X-Meta-Name: test.txt
175 | < Date: Tue, 29 Aug 2017 00:19:45 GMT
176 | ```
177 |
178 | In case when file is fetched from another node, `X-Meta-Symlink: true` will appear. If file is known but has been deleted, a 404 with `X-Meta-Deleted: true` will be served. And if file has been missing locally and fetched from S3, a header flag `X-Meta-Fetched: true` will appear.
179 |
180 | By default, fetching from S3 in case of cache misses is disabled, to get a file with possibility of fetching it from the cloud storage backend, use `X-Meta-Fetch`:
181 |
182 | ```bash
183 | # to check we delete local state and restarted nodes (!)
184 |
185 | $ curl -v localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B
186 |
187 | < HTTP/1.1 404 Not Found
188 |
189 | $ curl -v -H "X-Meta-Fetch: 1" localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B
190 |
191 | < HTTP/1.1 200 OK
192 | < Accept-Ranges: bytes
193 | < Content-Length: 9
194 | < Content-Type: text/plain; charset=utf-8
195 | < Last-Modified: Tue, 29 Aug 2017 00:49:17 GMT
196 | < X-Meta-Consistencylevel: 2
197 | < X-Meta-Fetched: true
198 | < X-Meta-Id: 01BRNMMS1DK3CBD4ZZM2TQ8C5B
199 | < X-Meta-Name: test.txt
200 | < Date: Tue, 29 Aug 2017 00:49:17 GMT
201 |
202 | It works!
203 | ```
204 |
205 | Notice that file has been fetched with `X-Meta-Fetched: true`, it also has all properties saved such as name, content type and the consistency level. The latter means it was also replicated again across the nodes.
206 |
207 | ## Acknowledgements
208 |
209 | The project is in Open Beta stage, please test it before using in something serious.
210 |
211 | ## TODO / Roadmap
212 |
213 | * Implement eviction policy (LRU / ARC / ...)
214 | * Document the internal design
215 | * Improve deployment scripts
216 | * Test coverage
217 | * Implement a sample client
218 | * Benchmarks
219 |
220 | ## License
221 |
222 | MIT
223 |
--------------------------------------------------------------------------------
/api/private.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import (
4 | "encoding/json"
5 | "errors"
6 | "fmt"
7 | "io"
8 | "io/ioutil"
9 | "log"
10 | "mime"
11 | "net"
12 | "net/http"
13 | "net/http/httputil"
14 | "path/filepath"
15 | "strconv"
16 | "strings"
17 | "time"
18 |
19 | "github.com/astranet/astranet"
20 | "github.com/astranet/astranet/addr"
21 | "github.com/gin-gonic/gin"
22 |
23 | "sphere.software/objstore"
24 | )
25 |
26 | type PrivateServer struct {
27 | router astranet.AstraNet
28 | mux http.Handler
29 |
30 | nodeID string
31 | debug bool
32 | tags []string
33 | }
34 |
35 | func NewPrivateServer(nodeID string, tags ...string) *PrivateServer {
36 | return &PrivateServer{
37 | nodeID: nodeID,
38 | tags: tags,
39 |
40 | // initializes server+client+router for private net
41 | router: astranet.New().Router().WithEnv(tags...),
42 | }
43 | }
44 |
45 | func (p *PrivateServer) SetDebug(enabled bool) {
46 | p.debug = enabled
47 | }
48 |
49 | func (p *PrivateServer) Env() []string {
50 | return p.tags
51 | }
52 |
53 | func (p *PrivateServer) Router() astranet.AstraNet {
54 | return p.router
55 | }
56 |
57 | // ListenAndServe initializes a HTTP listener for private services, starts
58 | // listening on a TCP address for virtual network transport.
59 | func (p *PrivateServer) ListenAndServe(addr string) error {
60 | listener, err := p.router.Bind("", "objstore-"+p.nodeID)
61 | if err != nil {
62 | return err
63 | }
64 | if p.debug {
65 | log.Println("ListenAndServe on", addr, "with service", "objstore-"+p.nodeID)
66 | log.Println(p.router.Services())
67 | }
68 | // start a HTTP server using node's private listener
69 | go http.Serve(listener, p.mux)
70 |
71 | if err = p.router.ListenAndServe("tcp4", addr); err == nil {
72 | p.router.Join("tcp4", addr)
73 | }
74 | return err
75 | }
76 |
77 | const defaultPort = "11999"
78 |
79 | // JoinCluster connects to another machines via TCP to join the virtual network.
80 | func (p *PrivateServer) JoinCluster(nodes []string) error {
81 | var failed []string
82 | for _, nodeAddr := range nodes {
83 | if _, _, err := net.SplitHostPort(nodeAddr); err != nil {
84 | nodeAddr = nodeAddr + ":" + defaultPort
85 | }
86 | if err := p.router.Join("tcp4", nodeAddr); err != nil {
87 | failed = append(failed, nodeAddr)
88 | }
89 | }
90 | if len(failed) > 0 {
91 | return fmt.Errorf("failed to join nodes: %v", failed)
92 | }
93 | p.router.Services()
94 | return nil
95 | }
96 |
97 | func newHTTPTransport(router astranet.AstraNet) *http.Transport {
98 | return &http.Transport{
99 | DisableKeepAlives: true,
100 | Dial: func(network, addr string) (net.Conn, error) {
101 | host, _, err := net.SplitHostPort(addr)
102 | if err != nil {
103 | return nil, err
104 | }
105 | return router.Dial(network, host)
106 | },
107 | }
108 | }
109 |
110 | // ExposeAPI initiates HTTP routing to the private API via loopback.
111 | func (p *PrivateServer) ExposeAPI(addr string) error {
112 | privateProxy := &httputil.ReverseProxy{
113 | Transport: newHTTPTransport(p.router),
114 | FlushInterval: time.Millisecond * 10,
115 | Director: func(req *http.Request) {
116 | req.URL.Scheme = "http"
117 | req.URL.Host = "objstore-" + p.nodeID
118 | },
119 | }
120 | return http.ListenAndServe(addr, privateProxy)
121 | }
122 |
123 | type NodeInfo struct {
124 | ID string `json:"id"`
125 | Addr string `json:"addr"`
126 | VAddr string `json:"vaddr"`
127 | }
128 |
129 | type NodeIter func(id, addr, vaddr string) error
130 |
131 | var (
132 | RangeStop = errors.New("stop")
133 | ForEachStop = RangeStop
134 | )
135 |
136 | func forEachNode(router astranet.AstraNet, iterFunc NodeIter) error {
137 | services := router.Services()
138 | seen := make(map[string]bool)
139 | for _, info := range services {
140 | if !strings.HasPrefix(info.Service, "objstore-") {
141 | continue
142 | }
143 | if info.Upstream == nil {
144 | continue
145 | }
146 | nodeID := strings.TrimPrefix(strings.Split(info.Service, ".")[0], "objstore-")
147 | host, _, _ := net.SplitHostPort(info.Upstream.RAddr().String())
148 | if seen[nodeID+host] {
149 | continue
150 | } else {
151 | seen[nodeID+host] = true
152 | }
153 | vaddr := getAddr(info.Host, info.Port)
154 | if err := iterFunc(nodeID, host, vaddr); err == RangeStop {
155 | return nil
156 | } else if err != nil {
157 | return err
158 | }
159 | }
160 | return nil
161 | }
162 |
163 | func getAddr(host uint64, port uint32) string {
164 | return fmt.Sprintf("%s:%d", addr.Uint2Host(host), port)
165 | }
166 |
167 | func (p *PrivateServer) RouteAPI(store objstore.Store) {
168 | r := gin.Default()
169 | r.GET("/private/v1/ping", p.PingHandler())
170 | r.GET("/private/v1/nodes", p.ListNodesHandler())
171 | r.POST("/private/v1/announce", p.AnnounceHandler(store))
172 | r.GET("/private/v1/get/:id", p.GetHandler(store))
173 | r.POST("/private/v1/message", p.MessageHandler(store))
174 | r.POST("/private/v1/put", p.PutHandler(store))
175 | r.POST("/private/v1/sync", p.SyncHandler(store))
176 | r.POST("/private/v1/delete/:id", p.DeleteHandler(store))
177 | p.mux = r
178 | }
179 |
180 | func (p *PrivateServer) PingHandler() gin.HandlerFunc {
181 | return func(c *gin.Context) {
182 | c.String(200, p.nodeID)
183 | }
184 | }
185 |
186 | func (p *PrivateServer) ListNodesHandler() gin.HandlerFunc {
187 | return func(c *gin.Context) {
188 | var nodes []NodeInfo
189 | if err := forEachNode(p.router, func(id, addr, vaddr string) error {
190 | nodes = append(nodes, NodeInfo{
191 | ID: id,
192 | Addr: addr,
193 | VAddr: vaddr,
194 | })
195 | return nil
196 | }); err != nil {
197 | c.String(500, "error: %v", err)
198 | return
199 | }
200 | c.JSON(200, nodes)
201 | }
202 | }
203 |
204 | func (p *PrivateServer) AnnounceHandler(store objstore.Store) gin.HandlerFunc {
205 | return func(c *gin.Context) {
206 | var event *objstore.EventAnnounce
207 | if err := c.BindJSON(&event); err != nil {
208 | return
209 | }
210 | store.ReceiveEventAnnounce(event)
211 | c.Status(200)
212 | }
213 | }
214 |
215 | func (p *PrivateServer) MessageHandler(store objstore.Store) gin.HandlerFunc {
216 | return func(c *gin.Context) {
217 | r := io.LimitReader(c.Request.Body, 8*1024) // 8kB limit
218 | data, _ := ioutil.ReadAll(r)
219 | c.Request.Body.Close()
220 | store.EmitEventAnnounce(&objstore.EventAnnounce{
221 | Type: objstore.EventOpaqueData,
222 | OpaqueData: data,
223 | })
224 | c.Status(200)
225 | }
226 | }
227 |
228 | func (p *PrivateServer) GetHandler(store objstore.Store) gin.HandlerFunc {
229 | return func(c *gin.Context) {
230 | r, meta, err := store.GetObject(c.Param("id"))
231 | if err == objstore.ErrNotFound {
232 | c.Status(404)
233 | return
234 | } else if err != nil {
235 | c.String(500, "error: %v", err)
236 | return
237 | }
238 | serveObject(c, r, meta)
239 | }
240 | }
241 |
242 | func serveMeta(c *gin.Context, meta *objstore.FileMeta) {
243 | c.Header("X-Meta-ID", meta.ID)
244 | if len(meta.Name) > 0 {
245 | c.Header("X-Meta-Name", meta.Name)
246 | }
247 | if len(meta.UserMeta) > 0 {
248 | user, _ := json.Marshal(meta.UserMeta)
249 | c.Header("X-Meta-UserMeta", string(user))
250 | }
251 | c.Header("X-Meta-ConsistencyLevel", strconv.Itoa(int(meta.Consistency)))
252 | if meta.IsSymlink {
253 | c.Header("X-Meta-Symlink", "true")
254 | }
255 | if meta.IsFetched {
256 | c.Header("X-Meta-Fetched", "true")
257 | }
258 | if meta.IsDeleted {
259 | c.Header("X-Meta-Deleted", "true")
260 | }
261 | }
262 |
263 | func serveObject(c *gin.Context, r io.ReadCloser, meta *objstore.FileMeta) {
264 | serveMeta(c, meta)
265 | ts := time.Unix(0, meta.Timestamp)
266 | if seekable, ok := r.(io.ReadSeeker); ok {
267 | http.ServeContent(c.Writer, c.Request, meta.Name, ts, seekable)
268 | return
269 | }
270 | // actually do all the work http.ServeContent does, but without support
271 | // of ranges and partial reads due to lack of io.Seeker interface.
272 | if !ts.IsZero() {
273 | c.Header("Last-Modified", ts.UTC().Format(http.TimeFormat))
274 | }
275 | ctype := mime.TypeByExtension(filepath.Ext(meta.Name))
276 | c.Header("Content-Type", ctype)
277 | c.Header("Content-Length", strconv.FormatInt(meta.Size, 10))
278 | io.CopyN(c.Writer, r, meta.Size)
279 | }
280 |
281 | func (p *PrivateServer) PutHandler(store objstore.Store) gin.HandlerFunc {
282 | return func(c *gin.Context) {
283 | putObject(c, store)
284 | }
285 | }
286 |
287 | func putObject(c *gin.Context, store objstore.Store) {
288 | userMeta := func(data string) map[string]string {
289 | if len(data) == 0 {
290 | return nil
291 | }
292 | var v map[string]string
293 | json.Unmarshal([]byte(data), &v)
294 | return v
295 | }
296 | size, _ := strconv.ParseInt(c.Request.Header.Get("Content-Length"), 10, 64)
297 | meta := &objstore.FileMeta{
298 | ID: c.Request.Header.Get("X-Meta-ID"),
299 | Name: c.Request.Header.Get("X-Meta-Name"),
300 | UserMeta: userMeta(c.Request.Header.Get("X-Meta-UserMeta")),
301 | Timestamp: time.Now().UnixNano(),
302 | Size: size,
303 | }
304 | if len(meta.ID) == 0 {
305 | c.String(400, "error: ID not specified, use /id to get one")
306 | return
307 | } else if !objstore.CheckID(meta.ID) {
308 | err := fmt.Errorf("objstore: not a valid ULID: %s", meta.ID)
309 | c.String(400, "error: %v", err)
310 | return
311 | }
312 | levelData := c.Request.Header.Get("X-Meta-ConsistencyLevel")
313 | if len(levelData) == 0 {
314 | level, _ := (objstore.ConsistencyLevel)(0).Check()
315 | meta.Consistency = level
316 | } else {
317 | n, _ := strconv.Atoi(levelData)
318 | level, err := (objstore.ConsistencyLevel)(n).Check()
319 | if err != nil {
320 | c.String(400, "error: %v", err)
321 | return
322 | }
323 | meta.Consistency = level
324 | }
325 | if _, err := store.PutObject(c.Request.Body, meta); err != nil {
326 | c.String(400, "error: %v", err)
327 | return
328 | }
329 | c.Status(200)
330 | }
331 |
332 | type SyncResponse struct {
333 | Added objstore.FileMetaList `json:"list_added"`
334 | Deleted objstore.FileMetaList `json:"list_deleted"`
335 | }
336 |
337 | func (p *PrivateServer) SyncHandler(store objstore.Store) gin.HandlerFunc {
338 | return func(c *gin.Context) {
339 | var list objstore.FileMetaList
340 | if err := c.BindJSON(&list); err != nil {
341 | return
342 | }
343 | added, deleted, err := store.Diff(list)
344 | if err != nil {
345 | c.String(400, "error: %v", err)
346 | return
347 | }
348 | c.JSON(200, SyncResponse{
349 | Added: added,
350 | Deleted: deleted,
351 | })
352 | }
353 | }
354 |
355 | func deleteObject(c *gin.Context, store objstore.Store) {
356 | meta, err := store.DeleteObject(c.Param("id"))
357 | if err == objstore.ErrNotFound {
358 | c.Status(404)
359 | return
360 | } else if err != nil {
361 | c.String(500, "error: %v", err)
362 | return
363 | }
364 | if meta != nil {
365 | serveMeta(c, meta)
366 | }
367 | c.Status(200)
368 | }
369 |
370 | func (p *PrivateServer) DeleteHandler(store objstore.Store) gin.HandlerFunc {
371 | return func(c *gin.Context) {
372 | deleteObject(c, store)
373 | }
374 | }
375 |
--------------------------------------------------------------------------------
/journal/journal.go:
--------------------------------------------------------------------------------
1 | // Package journal is responsible for maintaining the inner state of the OBJSTORE,
2 | // journals represent managed event logs that can be diffed, joined and stored as
3 | // in-memory B-tree or in a BoltDB bucket. All operations on BoltDB are performed
4 | // in the context of a transaction, so journals are ACID-compatible.
5 | package journal
6 |
7 | import (
8 | "errors"
9 | "fmt"
10 | "io"
11 | "strings"
12 | "sync"
13 | "time"
14 |
15 | "github.com/boltdb/bolt"
16 | "github.com/cznic/b"
17 | )
18 |
19 | type Journal interface {
20 | ID() ID
21 | Get(k string) *FileMeta
22 | Exists(k string) bool
23 | Set(k string, m *FileMeta) error
24 | Delete(k string) error
25 | Diff(j Journal) (added FileMetaList, deleted FileMetaList)
26 | Range(start string, limit int, fn func(k string, v *FileMeta) error) (string, error)
27 | Join(target Journal, mapping Mapping) error
28 | List() FileMetaList
29 | Close() error
30 | Meta() *JournalMeta
31 | }
32 |
33 | type kvJournal struct {
34 | id ID
35 |
36 | b *bolt.Bucket
37 | tx *bolt.Tx
38 | }
39 |
40 | type btreeJournal struct {
41 | id ID
42 |
43 | t *b.Tree
44 | mux *sync.Mutex
45 | closed bool
46 | }
47 |
48 | func (b *btreeJournal) Close() error {
49 | b.mux.Lock()
50 | defer b.mux.Unlock()
51 | b.t.Close()
52 | b.closed = true
53 | return nil
54 | }
55 |
56 | func (j *kvJournal) Close() error {
57 | // no-op as kvJournal is managed by BoltDB transaction
58 | return nil
59 | }
60 |
61 | func (b *btreeJournal) Get(k string) *FileMeta {
62 | b.mux.Lock()
63 | if b.closed {
64 | b.mux.Unlock()
65 | return nil
66 | }
67 | v, ok := b.t.Get(k)
68 | b.mux.Unlock()
69 | if ok {
70 | return v.(*FileMeta)
71 | }
72 | return nil
73 | }
74 |
75 | func (b *btreeJournal) Exists(k string) bool {
76 | b.mux.Lock()
77 | if b.closed {
78 | b.mux.Unlock()
79 | return false
80 | }
81 | _, ok := b.t.Get(k)
82 | b.mux.Unlock()
83 | return ok
84 | }
85 |
86 | func (b *btreeJournal) Set(k string, m *FileMeta) error {
87 | if m == nil {
88 | return errors.New("journal: nil entries not allowed")
89 | }
90 | if len(k) == 0 {
91 | return errors.New("journal: zero-length keys not allowed")
92 | }
93 | b.mux.Lock()
94 | if b.closed {
95 | b.mux.Unlock()
96 | return closedErr
97 | }
98 | b.t.Set(k, m)
99 | b.mux.Unlock()
100 | return nil
101 | }
102 |
103 | func (b *btreeJournal) Delete(k string) error {
104 | if len(k) == 0 {
105 | return errors.New("journal: zero-length keys not allowed")
106 | }
107 | b.mux.Lock()
108 | if b.closed {
109 | b.mux.Unlock()
110 | return closedErr
111 | }
112 | b.t.Delete(k)
113 | b.mux.Unlock()
114 | return nil
115 | }
116 |
117 | var closedErr = errors.New("journal: closed already")
118 |
119 | func (b *btreeJournal) Range(start string, limit int, fn func(k string, v *FileMeta) error) (string, error) {
120 | b.mux.Lock()
121 | if b.closed {
122 | b.mux.Unlock()
123 | return "", closedErr
124 | }
125 | iter, ok := b.t.Seek(start)
126 | b.mux.Unlock()
127 | if !ok {
128 | return "", nil
129 | }
130 | defer iter.Close()
131 |
132 | var processed int
133 | var lastK string
134 | for {
135 | b.mux.Lock()
136 | if b.closed {
137 | b.mux.Unlock()
138 | return "", closedErr
139 | }
140 | k, v, err := iter.Next()
141 | b.mux.Unlock()
142 | if err == nil {
143 | lastK = k.(string)
144 | if err := fn(k.(string), v.(*FileMeta)); err == ErrRangeStop {
145 | return lastK, nil
146 | } else if err != nil {
147 | return lastK, err
148 | }
149 | } else {
150 | return "", nil
151 | }
152 | processed++
153 | if limit > 0 && processed >= limit {
154 | break
155 | }
156 | }
157 | return lastK, nil
158 | }
159 |
160 | func (b *btreeJournal) Join(target Journal, mapping Mapping) error {
161 | return errors.New("journal: unjoinable journals")
162 | }
163 |
164 | func (b *btreeJournal) Meta() *JournalMeta {
165 | firstKey, _ := b.t.First()
166 | lastKey, _ := b.t.Last()
167 | return &JournalMeta{
168 | ID: b.id,
169 |
170 | FirstKey: firstKey.(string),
171 | LastKey: lastKey.(string),
172 | CountTotal: b.t.Len(),
173 | }
174 | }
175 |
176 | func (b *btreeJournal) ID() ID {
177 | return b.id
178 | }
179 |
180 | func (b *btreeJournal) List() FileMetaList {
181 | b.mux.Lock()
182 | if b.closed {
183 | b.mux.Unlock()
184 | return nil
185 | }
186 | iter, err := b.t.SeekFirst()
187 | b.mux.Unlock()
188 | if err == nil {
189 | defer iter.Close()
190 | }
191 |
192 | var list FileMetaList
193 | var v interface{}
194 | for err == nil {
195 | b.mux.Lock()
196 | _, v, err = iter.Next()
197 | b.mux.Unlock()
198 | if err == nil {
199 | list = append(list, v.(*FileMeta))
200 | }
201 | }
202 | return list
203 | }
204 |
205 | func (prev *btreeJournal) Diff(next Journal) (added FileMetaList, deleted FileMetaList) {
206 | switch next := next.(type) {
207 | case *btreeJournal:
208 | prev.mux.Lock()
209 | if prev.closed {
210 | prev.mux.Unlock()
211 | return nil, nil
212 | }
213 | prevIter, prevErr := prev.t.SeekFirst()
214 | prev.mux.Unlock()
215 | if prevErr == nil {
216 | defer prevIter.Close()
217 | }
218 | next.mux.Lock()
219 | if next.closed {
220 | next.mux.Unlock()
221 | return nil, nil
222 | }
223 | nextIter, nextErr := next.t.SeekFirst()
224 | next.mux.Unlock()
225 | if nextErr == nil {
226 | defer nextIter.Close()
227 | }
228 |
229 | switch {
230 | case prevErr == io.EOF && nextErr == io.EOF:
231 | return nil, nil
232 | case prevErr == io.EOF:
233 | // all added
234 | return next.List(), nil
235 | case nextErr == io.EOF:
236 | // all deleted
237 | return nil, prev.List()
238 | default:
239 | prev.mux.Lock()
240 | prevK, prevV, prevErr := prevIter.Next()
241 | prev.mux.Unlock()
242 | next.mux.Lock()
243 | nextK, nextV, nextErr := nextIter.Next()
244 | next.mux.Unlock()
245 |
246 | for {
247 | switch {
248 | case prevErr == io.EOF:
249 | if nextErr == io.EOF {
250 | // done
251 | return
252 | }
253 | added = append(added, nextV.(*FileMeta))
254 | // move next iterator
255 | next.mux.Lock()
256 | nextK, nextV, nextErr = nextIter.Next()
257 | next.mux.Unlock()
258 | case nextErr == io.EOF:
259 | if prevErr == io.EOF {
260 | // done
261 | return
262 | }
263 | deleted = append(deleted, prevV.(*FileMeta))
264 | // move prev iterator
265 | prev.mux.Lock()
266 | prevK, prevV, prevErr = prevIter.Next()
267 | prev.mux.Unlock()
268 | default:
269 | prevCmp := strings.Compare(prevK.(string), nextK.(string))
270 | switch {
271 | case prevCmp < 0: // nextK > prevK
272 | // prevK has been deleted
273 | deleted = append(deleted, prevV.(*FileMeta))
274 | // advance prev iter
275 | prev.mux.Lock()
276 | prevK, prevV, prevErr = prevIter.Next()
277 | prev.mux.Unlock()
278 | case prevCmp > 0: // nextK < prevK
279 | // nextK has been insterted
280 | added = append(added, nextV.(*FileMeta))
281 | // advance next iter
282 | next.mux.Lock()
283 | nextK, nextV, nextErr = nextIter.Next()
284 | next.mux.Unlock()
285 | default:
286 | // same key -> advance iterators
287 | prev.mux.Lock()
288 | prevK, prevV, prevErr = prevIter.Next()
289 | prev.mux.Unlock()
290 | next.mux.Lock()
291 | nextK, nextV, nextErr = nextIter.Next()
292 | next.mux.Unlock()
293 | }
294 | }
295 | }
296 | }
297 | case *kvJournal:
298 | prev.mux.Lock()
299 | if prev.closed {
300 | prev.mux.Unlock()
301 | return nil, nil
302 | }
303 | prevIter, prevErr := prev.t.SeekFirst()
304 | prev.mux.Unlock()
305 | if prevErr == nil {
306 | defer prevIter.Close()
307 | }
308 | nextIter := next.b.Cursor()
309 |
310 | switch {
311 | case prevErr == io.EOF && nextIter == nil:
312 | return nil, nil
313 | case prevErr == io.EOF:
314 | // all added
315 | return next.List(), nil
316 | case nextIter == nil:
317 | // all deleted
318 | return nil, prev.List()
319 | default:
320 | prev.mux.Lock()
321 | prevK, prevV, prevErr := prevIter.Next()
322 | prev.mux.Unlock()
323 | nextK, nextV := nextIter.Next()
324 |
325 | for {
326 | switch {
327 | case prevErr == io.EOF:
328 | if nextK == nil {
329 | // done
330 | return
331 | }
332 | if nextV != nil {
333 | meta := new(FileMeta)
334 | meta.UnmarshalMsg(nextV)
335 | added = append(added, meta)
336 | }
337 | // move next iterator
338 | nextK, nextV = nextIter.Next()
339 | case nextK == nil:
340 | if prevErr == io.EOF {
341 | // done
342 | return
343 | }
344 | deleted = append(deleted, prevV.(*FileMeta))
345 | // move prev iterator
346 | prev.mux.Lock()
347 | prevK, prevV, prevErr = prevIter.Next()
348 | prev.mux.Unlock()
349 | default:
350 | prevCmp := strings.Compare(prevK.(string), string(nextK))
351 | switch {
352 | case prevCmp < 0: // nextK > prevK
353 | // prevK has been deleted
354 | deleted = append(deleted, prevV.(*FileMeta))
355 | // advance prev iter
356 | prev.mux.Lock()
357 | prevK, prevV, prevErr = prevIter.Next()
358 | prev.mux.Unlock()
359 | case prevCmp > 0: // nextK < prevK
360 | // nextK has been insterted
361 | if nextV != nil {
362 | meta := new(FileMeta)
363 | meta.UnmarshalMsg(nextV)
364 | added = append(added, meta)
365 | }
366 | // advance next iter
367 | nextK, nextV = nextIter.Next()
368 | default:
369 | // same key -> advance iterators
370 | prev.mux.Lock()
371 | prevK, prevV, prevErr = prevIter.Next()
372 | prev.mux.Unlock()
373 | nextK, nextV = nextIter.Next()
374 | }
375 | }
376 | }
377 | }
378 | default:
379 | panic("indifferentiable types")
380 | }
381 | }
382 |
383 | func (prev *kvJournal) Diff(next Journal) (added FileMetaList, deleted FileMetaList) {
384 | switch next := next.(type) {
385 | case *kvJournal:
386 | prevIter := prev.b.Cursor()
387 | nextIter := next.b.Cursor()
388 |
389 | switch {
390 | case prevIter == nil && nextIter == nil:
391 | return nil, nil
392 | case prevIter == nil:
393 | // all added
394 | return next.List(), nil
395 | case nextIter == nil:
396 | // all deleted
397 | return nil, prev.List()
398 | default:
399 | prevK, prevV := prevIter.Next()
400 | nextK, nextV := nextIter.Next()
401 |
402 | for {
403 | switch {
404 | case prevK == nil:
405 | if nextK == nil {
406 | // done
407 | return
408 | }
409 | if nextV != nil {
410 | meta := new(FileMeta)
411 | meta.UnmarshalMsg(nextV)
412 | added = append(added, meta)
413 | }
414 | // move next iterator
415 | nextK, nextV = nextIter.Next()
416 | case nextK == nil:
417 | if prevK == nil {
418 | // done
419 | return
420 | }
421 | if prevV != nil {
422 | meta := new(FileMeta)
423 | meta.UnmarshalMsg(prevV)
424 | deleted = append(deleted, meta)
425 | }
426 | // move prev iterator
427 | prevK, prevV = prevIter.Next()
428 | default:
429 | prevCmp := strings.Compare(string(prevK), string(nextK))
430 | switch {
431 | case prevCmp < 0: // nextK > prevK
432 | // prevK has been deleted
433 | if prevV != nil {
434 | meta := new(FileMeta)
435 | meta.UnmarshalMsg(prevV)
436 | deleted = append(deleted, meta)
437 | }
438 | // advance prev iter
439 | prevK, prevV = prevIter.Next()
440 | case prevCmp > 0: // nextK < prevK
441 | // nextK has been insterted
442 | if nextV != nil {
443 | meta := new(FileMeta)
444 | meta.UnmarshalMsg(nextV)
445 | added = append(added, meta)
446 | }
447 | // advance next iter
448 | nextK, nextV = nextIter.Next()
449 | default:
450 | // same key -> advance iterators
451 | prevK, prevV = prevIter.Next()
452 | nextK, nextV = nextIter.Next()
453 | }
454 | }
455 | }
456 | }
457 | case *btreeJournal:
458 | next.mux.Lock()
459 | if next.closed {
460 | next.mux.Unlock()
461 | return nil, nil
462 | }
463 | nextIter, nextErr := next.t.SeekFirst()
464 | next.mux.Unlock()
465 | if nextErr == nil {
466 | defer nextIter.Close()
467 | }
468 | prevIter := prev.b.Cursor()
469 |
470 | switch {
471 | case nextErr == io.EOF && prevIter == nil:
472 | return nil, nil
473 | case nextErr == io.EOF:
474 | // all added
475 | return prev.List(), nil
476 | case prevIter == nil:
477 | // all deleted
478 | return nil, next.List()
479 | default:
480 | next.mux.Lock()
481 | nextK, nextV, nextErr := nextIter.Next()
482 | next.mux.Unlock()
483 | prevK, prevV := prevIter.Next()
484 |
485 | for {
486 | switch {
487 | case nextErr == io.EOF:
488 | if prevK == nil {
489 | // done
490 | return
491 | }
492 | if prevV != nil {
493 | meta := new(FileMeta)
494 | meta.UnmarshalMsg(prevV)
495 | added = append(added, meta)
496 | }
497 | // move prev iterator
498 | prevK, prevV = prevIter.Next()
499 | case prevK == nil:
500 | if nextErr == io.EOF {
501 | // done
502 | return
503 | }
504 | deleted = append(deleted, nextV.(*FileMeta))
505 | // move next iterator
506 | next.mux.Lock()
507 | nextK, nextV, nextErr = nextIter.Next()
508 | next.mux.Unlock()
509 | default:
510 | nextCmp := strings.Compare(nextK.(string), string(prevK))
511 | switch {
512 | case nextCmp < 0: // prevK > nextK
513 | // nextK has been deleted
514 | deleted = append(deleted, nextV.(*FileMeta))
515 | // advance next iter
516 | next.mux.Lock()
517 | nextK, nextV, nextErr = nextIter.Next()
518 | next.mux.Unlock()
519 | case nextCmp > 0: // prevK < nextK
520 | // prevK has been insterted
521 | if prevV != nil {
522 | meta := new(FileMeta)
523 | meta.UnmarshalMsg(prevV)
524 | added = append(added, meta)
525 | }
526 | // advance prev iter
527 | prevK, prevV = prevIter.Next()
528 | default:
529 | // same key -> advance iterators
530 | next.mux.Lock()
531 | nextK, nextV, nextErr = nextIter.Next()
532 | next.mux.Unlock()
533 | prevK, prevV = prevIter.Next()
534 | }
535 | }
536 | }
537 | }
538 | default:
539 | panic("journal: indifferentiable types")
540 | }
541 | }
542 |
543 | // Join appends the current journal to the target one, reassigning atomically the mapping.
544 | func (j *kvJournal) Join(target Journal, mapping Mapping) error {
545 | kvTarget, ok := target.(*kvJournal)
546 | if !ok {
547 | return errors.New("journal: unjoinable journals")
548 | }
549 | meta := mapping.Get(j.id)
550 | if meta == nil {
551 | // somehow mapping not available in the current Tx
552 | meta = j.Meta()
553 | } else if meta.ID != j.id {
554 | // ID mismatch -> already joined?
555 | err := fmt.Errorf("journal: already joined %s -> %s", j.id, meta.ID)
556 | return err
557 | }
558 | cur := j.b.Cursor()
559 | k, v := cur.First()
560 | var copied int
561 | for k != nil {
562 | if v == nil {
563 | continue
564 | }
565 | copied++
566 | if err := kvTarget.b.Put(k, v); err != nil {
567 | return err
568 | }
569 | k, v = cur.Next()
570 | }
571 |
572 | meta.JoinedAt = time.Now().UnixNano()
573 | meta.ID = target.ID() // relocate mapping
574 | mapping.Set(j.id, meta)
575 | return nil
576 | }
577 |
578 | func (j *kvJournal) Range(start string, limit int, fn func(k string, v *FileMeta) error) (string, error) {
579 | cur := j.b.Cursor()
580 | k, v := cur.Seek([]byte(start))
581 | var processed int
582 | for k != nil {
583 | var meta *FileMeta
584 | if v != nil {
585 | meta = new(FileMeta)
586 | meta.UnmarshalMsg(v)
587 | }
588 | if err := fn(string(k), meta); err == ErrRangeStop {
589 | return string(k), nil
590 | } else if err != nil {
591 | return string(k), err
592 | }
593 | k, v = cur.Next()
594 | processed++
595 | if limit > 0 && processed >= limit {
596 | return string(k), nil
597 | }
598 | }
599 | return "", nil
600 | }
601 |
602 | func (j *kvJournal) Get(k string) *FileMeta {
603 | data := j.b.Get([]byte(k))
604 | if data == nil {
605 | return nil
606 | }
607 | meta := new(FileMeta)
608 | meta.UnmarshalMsg(data)
609 | return meta
610 | }
611 |
612 | func (j *kvJournal) Exists(k string) bool {
613 | return j.b.Get([]byte(k)) != nil
614 | }
615 |
616 | func (j *kvJournal) Set(k string, m *FileMeta) error {
617 | v, err := m.MarshalMsg(nil)
618 | if err != nil {
619 | return err
620 | }
621 | return j.b.Put([]byte(k), v)
622 | }
623 |
624 | func (j *kvJournal) Delete(k string) error {
625 | return j.b.Delete([]byte(k))
626 | }
627 |
628 | func (j *kvJournal) List() FileMetaList {
629 | cur := j.b.Cursor()
630 | k, v := cur.First()
631 | var list FileMetaList
632 | for k != nil {
633 | if v != nil {
634 | meta := new(FileMeta)
635 | meta.UnmarshalMsg(v)
636 | list = append(list, meta)
637 | }
638 | k, v = cur.Next()
639 | }
640 | return list
641 | }
642 |
643 | func (j *kvJournal) ID() ID {
644 | return j.id
645 | }
646 |
647 | func (j *kvJournal) Meta() *JournalMeta {
648 | cur := j.b.Cursor()
649 | firstKey, _ := cur.First()
650 | lastKey, _ := cur.Last()
651 | return &JournalMeta{
652 | ID: j.id,
653 | FirstKey: string(firstKey),
654 | LastKey: string(lastKey),
655 | CountTotal: j.b.Stats().KeyN,
656 | }
657 | }
658 |
659 | var ErrRangeStop = errors.New("range stop")
660 |
661 | // NewJournal creates a new journal backed by a BoltDB bucket,
662 | // in the context of a transaction.
663 | func NewJournal(id ID, tx *bolt.Tx, bucket *bolt.Bucket) Journal {
664 | return &kvJournal{
665 | id: id,
666 | tx: tx,
667 | b: bucket,
668 | }
669 | }
670 |
671 | // MakeJournal allows to represent a serialized list of events
672 | // as an in-memory journal compatible with journals backed by a real KV store.
673 | func MakeJournal(id ID, events FileMetaList) Journal {
674 | j := &btreeJournal{
675 | id: id,
676 | mux: new(sync.Mutex),
677 | t: b.TreeNew(func(a interface{}, b interface{}) int {
678 | return strings.Compare(a.(string), b.(string))
679 | }),
680 | }
681 | for i := range events {
682 | j.t.Set(string(events[i].ID), events[i])
683 | }
684 | return j
685 | }
686 |
--------------------------------------------------------------------------------
/journal/meta_gen.go:
--------------------------------------------------------------------------------
1 | package journal
2 |
3 | // NOTE: THIS FILE WAS PRODUCED BY THE
4 | // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp)
5 | // DO NOT EDIT
6 |
7 | import "github.com/tinylib/msgp/msgp"
8 |
9 | // DecodeMsg implements msgp.Decodable
10 | func (z *ConsistencyLevel) DecodeMsg(dc *msgp.Reader) (err error) {
11 | {
12 | var zxvk int
13 | zxvk, err = dc.ReadInt()
14 | (*z) = ConsistencyLevel(zxvk)
15 | }
16 | if err != nil {
17 | return
18 | }
19 | return
20 | }
21 |
22 | // EncodeMsg implements msgp.Encodable
23 | func (z ConsistencyLevel) EncodeMsg(en *msgp.Writer) (err error) {
24 | err = en.WriteInt(int(z))
25 | if err != nil {
26 | return
27 | }
28 | return
29 | }
30 |
31 | // MarshalMsg implements msgp.Marshaler
32 | func (z ConsistencyLevel) MarshalMsg(b []byte) (o []byte, err error) {
33 | o = msgp.Require(b, z.Msgsize())
34 | o = msgp.AppendInt(o, int(z))
35 | return
36 | }
37 |
38 | // UnmarshalMsg implements msgp.Unmarshaler
39 | func (z *ConsistencyLevel) UnmarshalMsg(bts []byte) (o []byte, err error) {
40 | {
41 | var zbzg int
42 | zbzg, bts, err = msgp.ReadIntBytes(bts)
43 | (*z) = ConsistencyLevel(zbzg)
44 | }
45 | if err != nil {
46 | return
47 | }
48 | o = bts
49 | return
50 | }
51 |
52 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
53 | func (z ConsistencyLevel) Msgsize() (s int) {
54 | s = msgp.IntSize
55 | return
56 | }
57 |
58 | // DecodeMsg implements msgp.Decodable
59 | func (z *FileMeta) DecodeMsg(dc *msgp.Reader) (err error) {
60 | var field []byte
61 | _ = field
62 | var zajw uint32
63 | zajw, err = dc.ReadMapHeader()
64 | if err != nil {
65 | return
66 | }
67 | for zajw > 0 {
68 | zajw--
69 | field, err = dc.ReadMapKeyPtr()
70 | if err != nil {
71 | return
72 | }
73 | switch msgp.UnsafeString(field) {
74 | case "ID":
75 | z.ID, err = dc.ReadString()
76 | if err != nil {
77 | return
78 | }
79 | case "Name":
80 | z.Name, err = dc.ReadString()
81 | if err != nil {
82 | return
83 | }
84 | case "Size":
85 | z.Size, err = dc.ReadInt64()
86 | if err != nil {
87 | return
88 | }
89 | case "Timestamp":
90 | z.Timestamp, err = dc.ReadInt64()
91 | if err != nil {
92 | return
93 | }
94 | case "UserMeta":
95 | var zwht uint32
96 | zwht, err = dc.ReadMapHeader()
97 | if err != nil {
98 | return
99 | }
100 | if z.UserMeta == nil && zwht > 0 {
101 | z.UserMeta = make(map[string]string, zwht)
102 | } else if len(z.UserMeta) > 0 {
103 | for key := range z.UserMeta {
104 | delete(z.UserMeta, key)
105 | }
106 | }
107 | for zwht > 0 {
108 | zwht--
109 | var zbai string
110 | var zcmr string
111 | zbai, err = dc.ReadString()
112 | if err != nil {
113 | return
114 | }
115 | zcmr, err = dc.ReadString()
116 | if err != nil {
117 | return
118 | }
119 | z.UserMeta[zbai] = zcmr
120 | }
121 | case "IsSymlink":
122 | z.IsSymlink, err = dc.ReadBool()
123 | if err != nil {
124 | return
125 | }
126 | case "Consistency":
127 | {
128 | var zhct int
129 | zhct, err = dc.ReadInt()
130 | z.Consistency = ConsistencyLevel(zhct)
131 | }
132 | if err != nil {
133 | return
134 | }
135 | case "IsDeleted":
136 | z.IsDeleted, err = dc.ReadBool()
137 | if err != nil {
138 | return
139 | }
140 | case "IsFetched":
141 | z.IsFetched, err = dc.ReadBool()
142 | if err != nil {
143 | return
144 | }
145 | default:
146 | err = dc.Skip()
147 | if err != nil {
148 | return
149 | }
150 | }
151 | }
152 | return
153 | }
154 |
155 | // EncodeMsg implements msgp.Encodable
156 | func (z *FileMeta) EncodeMsg(en *msgp.Writer) (err error) {
157 | // map header, size 9
158 | // write "ID"
159 | err = en.Append(0x89, 0xa2, 0x49, 0x44)
160 | if err != nil {
161 | return err
162 | }
163 | err = en.WriteString(z.ID)
164 | if err != nil {
165 | return
166 | }
167 | // write "Name"
168 | err = en.Append(0xa4, 0x4e, 0x61, 0x6d, 0x65)
169 | if err != nil {
170 | return err
171 | }
172 | err = en.WriteString(z.Name)
173 | if err != nil {
174 | return
175 | }
176 | // write "Size"
177 | err = en.Append(0xa4, 0x53, 0x69, 0x7a, 0x65)
178 | if err != nil {
179 | return err
180 | }
181 | err = en.WriteInt64(z.Size)
182 | if err != nil {
183 | return
184 | }
185 | // write "Timestamp"
186 | err = en.Append(0xa9, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70)
187 | if err != nil {
188 | return err
189 | }
190 | err = en.WriteInt64(z.Timestamp)
191 | if err != nil {
192 | return
193 | }
194 | // write "UserMeta"
195 | err = en.Append(0xa8, 0x55, 0x73, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x61)
196 | if err != nil {
197 | return err
198 | }
199 | err = en.WriteMapHeader(uint32(len(z.UserMeta)))
200 | if err != nil {
201 | return
202 | }
203 | for zbai, zcmr := range z.UserMeta {
204 | err = en.WriteString(zbai)
205 | if err != nil {
206 | return
207 | }
208 | err = en.WriteString(zcmr)
209 | if err != nil {
210 | return
211 | }
212 | }
213 | // write "IsSymlink"
214 | err = en.Append(0xa9, 0x49, 0x73, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b)
215 | if err != nil {
216 | return err
217 | }
218 | err = en.WriteBool(z.IsSymlink)
219 | if err != nil {
220 | return
221 | }
222 | // write "Consistency"
223 | err = en.Append(0xab, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x79)
224 | if err != nil {
225 | return err
226 | }
227 | err = en.WriteInt(int(z.Consistency))
228 | if err != nil {
229 | return
230 | }
231 | // write "IsDeleted"
232 | err = en.Append(0xa9, 0x49, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x64)
233 | if err != nil {
234 | return err
235 | }
236 | err = en.WriteBool(z.IsDeleted)
237 | if err != nil {
238 | return
239 | }
240 | // write "IsFetched"
241 | err = en.Append(0xa9, 0x49, 0x73, 0x46, 0x65, 0x74, 0x63, 0x68, 0x65, 0x64)
242 | if err != nil {
243 | return err
244 | }
245 | err = en.WriteBool(z.IsFetched)
246 | if err != nil {
247 | return
248 | }
249 | return
250 | }
251 |
252 | // MarshalMsg implements msgp.Marshaler
253 | func (z *FileMeta) MarshalMsg(b []byte) (o []byte, err error) {
254 | o = msgp.Require(b, z.Msgsize())
255 | // map header, size 9
256 | // string "ID"
257 | o = append(o, 0x89, 0xa2, 0x49, 0x44)
258 | o = msgp.AppendString(o, z.ID)
259 | // string "Name"
260 | o = append(o, 0xa4, 0x4e, 0x61, 0x6d, 0x65)
261 | o = msgp.AppendString(o, z.Name)
262 | // string "Size"
263 | o = append(o, 0xa4, 0x53, 0x69, 0x7a, 0x65)
264 | o = msgp.AppendInt64(o, z.Size)
265 | // string "Timestamp"
266 | o = append(o, 0xa9, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70)
267 | o = msgp.AppendInt64(o, z.Timestamp)
268 | // string "UserMeta"
269 | o = append(o, 0xa8, 0x55, 0x73, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x61)
270 | o = msgp.AppendMapHeader(o, uint32(len(z.UserMeta)))
271 | for zbai, zcmr := range z.UserMeta {
272 | o = msgp.AppendString(o, zbai)
273 | o = msgp.AppendString(o, zcmr)
274 | }
275 | // string "IsSymlink"
276 | o = append(o, 0xa9, 0x49, 0x73, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b)
277 | o = msgp.AppendBool(o, z.IsSymlink)
278 | // string "Consistency"
279 | o = append(o, 0xab, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x79)
280 | o = msgp.AppendInt(o, int(z.Consistency))
281 | // string "IsDeleted"
282 | o = append(o, 0xa9, 0x49, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x64)
283 | o = msgp.AppendBool(o, z.IsDeleted)
284 | // string "IsFetched"
285 | o = append(o, 0xa9, 0x49, 0x73, 0x46, 0x65, 0x74, 0x63, 0x68, 0x65, 0x64)
286 | o = msgp.AppendBool(o, z.IsFetched)
287 | return
288 | }
289 |
290 | // UnmarshalMsg implements msgp.Unmarshaler
291 | func (z *FileMeta) UnmarshalMsg(bts []byte) (o []byte, err error) {
292 | var field []byte
293 | _ = field
294 | var zcua uint32
295 | zcua, bts, err = msgp.ReadMapHeaderBytes(bts)
296 | if err != nil {
297 | return
298 | }
299 | for zcua > 0 {
300 | zcua--
301 | field, bts, err = msgp.ReadMapKeyZC(bts)
302 | if err != nil {
303 | return
304 | }
305 | switch msgp.UnsafeString(field) {
306 | case "ID":
307 | z.ID, bts, err = msgp.ReadStringBytes(bts)
308 | if err != nil {
309 | return
310 | }
311 | case "Name":
312 | z.Name, bts, err = msgp.ReadStringBytes(bts)
313 | if err != nil {
314 | return
315 | }
316 | case "Size":
317 | z.Size, bts, err = msgp.ReadInt64Bytes(bts)
318 | if err != nil {
319 | return
320 | }
321 | case "Timestamp":
322 | z.Timestamp, bts, err = msgp.ReadInt64Bytes(bts)
323 | if err != nil {
324 | return
325 | }
326 | case "UserMeta":
327 | var zxhx uint32
328 | zxhx, bts, err = msgp.ReadMapHeaderBytes(bts)
329 | if err != nil {
330 | return
331 | }
332 | if z.UserMeta == nil && zxhx > 0 {
333 | z.UserMeta = make(map[string]string, zxhx)
334 | } else if len(z.UserMeta) > 0 {
335 | for key := range z.UserMeta {
336 | delete(z.UserMeta, key)
337 | }
338 | }
339 | for zxhx > 0 {
340 | var zbai string
341 | var zcmr string
342 | zxhx--
343 | zbai, bts, err = msgp.ReadStringBytes(bts)
344 | if err != nil {
345 | return
346 | }
347 | zcmr, bts, err = msgp.ReadStringBytes(bts)
348 | if err != nil {
349 | return
350 | }
351 | z.UserMeta[zbai] = zcmr
352 | }
353 | case "IsSymlink":
354 | z.IsSymlink, bts, err = msgp.ReadBoolBytes(bts)
355 | if err != nil {
356 | return
357 | }
358 | case "Consistency":
359 | {
360 | var zlqf int
361 | zlqf, bts, err = msgp.ReadIntBytes(bts)
362 | z.Consistency = ConsistencyLevel(zlqf)
363 | }
364 | if err != nil {
365 | return
366 | }
367 | case "IsDeleted":
368 | z.IsDeleted, bts, err = msgp.ReadBoolBytes(bts)
369 | if err != nil {
370 | return
371 | }
372 | case "IsFetched":
373 | z.IsFetched, bts, err = msgp.ReadBoolBytes(bts)
374 | if err != nil {
375 | return
376 | }
377 | default:
378 | bts, err = msgp.Skip(bts)
379 | if err != nil {
380 | return
381 | }
382 | }
383 | }
384 | o = bts
385 | return
386 | }
387 |
388 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
389 | func (z *FileMeta) Msgsize() (s int) {
390 | s = 1 + 3 + msgp.StringPrefixSize + len(z.ID) + 5 + msgp.StringPrefixSize + len(z.Name) + 5 + msgp.Int64Size + 10 + msgp.Int64Size + 9 + msgp.MapHeaderSize
391 | if z.UserMeta != nil {
392 | for zbai, zcmr := range z.UserMeta {
393 | _ = zcmr
394 | s += msgp.StringPrefixSize + len(zbai) + msgp.StringPrefixSize + len(zcmr)
395 | }
396 | }
397 | s += 10 + msgp.BoolSize + 12 + msgp.IntSize + 10 + msgp.BoolSize + 10 + msgp.BoolSize
398 | return
399 | }
400 |
401 | // DecodeMsg implements msgp.Decodable
402 | func (z *FileMetaList) DecodeMsg(dc *msgp.Reader) (err error) {
403 | var zjfb uint32
404 | zjfb, err = dc.ReadArrayHeader()
405 | if err != nil {
406 | return
407 | }
408 | if cap((*z)) >= int(zjfb) {
409 | (*z) = (*z)[:zjfb]
410 | } else {
411 | (*z) = make(FileMetaList, zjfb)
412 | }
413 | for zpks := range *z {
414 | if dc.IsNil() {
415 | err = dc.ReadNil()
416 | if err != nil {
417 | return
418 | }
419 | (*z)[zpks] = nil
420 | } else {
421 | if (*z)[zpks] == nil {
422 | (*z)[zpks] = new(FileMeta)
423 | }
424 | err = (*z)[zpks].DecodeMsg(dc)
425 | if err != nil {
426 | return
427 | }
428 | }
429 | }
430 | return
431 | }
432 |
433 | // EncodeMsg implements msgp.Encodable
434 | func (z FileMetaList) EncodeMsg(en *msgp.Writer) (err error) {
435 | err = en.WriteArrayHeader(uint32(len(z)))
436 | if err != nil {
437 | return
438 | }
439 | for zcxo := range z {
440 | if z[zcxo] == nil {
441 | err = en.WriteNil()
442 | if err != nil {
443 | return
444 | }
445 | } else {
446 | err = z[zcxo].EncodeMsg(en)
447 | if err != nil {
448 | return
449 | }
450 | }
451 | }
452 | return
453 | }
454 |
455 | // MarshalMsg implements msgp.Marshaler
456 | func (z FileMetaList) MarshalMsg(b []byte) (o []byte, err error) {
457 | o = msgp.Require(b, z.Msgsize())
458 | o = msgp.AppendArrayHeader(o, uint32(len(z)))
459 | for zcxo := range z {
460 | if z[zcxo] == nil {
461 | o = msgp.AppendNil(o)
462 | } else {
463 | o, err = z[zcxo].MarshalMsg(o)
464 | if err != nil {
465 | return
466 | }
467 | }
468 | }
469 | return
470 | }
471 |
472 | // UnmarshalMsg implements msgp.Unmarshaler
473 | func (z *FileMetaList) UnmarshalMsg(bts []byte) (o []byte, err error) {
474 | var zrsw uint32
475 | zrsw, bts, err = msgp.ReadArrayHeaderBytes(bts)
476 | if err != nil {
477 | return
478 | }
479 | if cap((*z)) >= int(zrsw) {
480 | (*z) = (*z)[:zrsw]
481 | } else {
482 | (*z) = make(FileMetaList, zrsw)
483 | }
484 | for zeff := range *z {
485 | if msgp.IsNil(bts) {
486 | bts, err = msgp.ReadNilBytes(bts)
487 | if err != nil {
488 | return
489 | }
490 | (*z)[zeff] = nil
491 | } else {
492 | if (*z)[zeff] == nil {
493 | (*z)[zeff] = new(FileMeta)
494 | }
495 | bts, err = (*z)[zeff].UnmarshalMsg(bts)
496 | if err != nil {
497 | return
498 | }
499 | }
500 | }
501 | o = bts
502 | return
503 | }
504 |
505 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
506 | func (z FileMetaList) Msgsize() (s int) {
507 | s = msgp.ArrayHeaderSize
508 | for zxpk := range z {
509 | if z[zxpk] == nil {
510 | s += msgp.NilSize
511 | } else {
512 | s += z[zxpk].Msgsize()
513 | }
514 | }
515 | return
516 | }
517 |
518 | // DecodeMsg implements msgp.Decodable
519 | func (z *ID) DecodeMsg(dc *msgp.Reader) (err error) {
520 | {
521 | var zdnj string
522 | zdnj, err = dc.ReadString()
523 | (*z) = ID(zdnj)
524 | }
525 | if err != nil {
526 | return
527 | }
528 | return
529 | }
530 |
531 | // EncodeMsg implements msgp.Encodable
532 | func (z ID) EncodeMsg(en *msgp.Writer) (err error) {
533 | err = en.WriteString(string(z))
534 | if err != nil {
535 | return
536 | }
537 | return
538 | }
539 |
540 | // MarshalMsg implements msgp.Marshaler
541 | func (z ID) MarshalMsg(b []byte) (o []byte, err error) {
542 | o = msgp.Require(b, z.Msgsize())
543 | o = msgp.AppendString(o, string(z))
544 | return
545 | }
546 |
547 | // UnmarshalMsg implements msgp.Unmarshaler
548 | func (z *ID) UnmarshalMsg(bts []byte) (o []byte, err error) {
549 | {
550 | var zobc string
551 | zobc, bts, err = msgp.ReadStringBytes(bts)
552 | (*z) = ID(zobc)
553 | }
554 | if err != nil {
555 | return
556 | }
557 | o = bts
558 | return
559 | }
560 |
561 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
562 | func (z ID) Msgsize() (s int) {
563 | s = msgp.StringPrefixSize + len(string(z))
564 | return
565 | }
566 |
567 | // DecodeMsg implements msgp.Decodable
568 | func (z *JournalMeta) DecodeMsg(dc *msgp.Reader) (err error) {
569 | var field []byte
570 | _ = field
571 | var zsnv uint32
572 | zsnv, err = dc.ReadMapHeader()
573 | if err != nil {
574 | return
575 | }
576 | for zsnv > 0 {
577 | zsnv--
578 | field, err = dc.ReadMapKeyPtr()
579 | if err != nil {
580 | return
581 | }
582 | switch msgp.UnsafeString(field) {
583 | case "ID":
584 | {
585 | var zkgt string
586 | zkgt, err = dc.ReadString()
587 | z.ID = ID(zkgt)
588 | }
589 | if err != nil {
590 | return
591 | }
592 | case "CreatedAt":
593 | z.CreatedAt, err = dc.ReadInt64()
594 | if err != nil {
595 | return
596 | }
597 | case "JoinedAt":
598 | z.JoinedAt, err = dc.ReadInt64()
599 | if err != nil {
600 | return
601 | }
602 | case "FirstKey":
603 | z.FirstKey, err = dc.ReadString()
604 | if err != nil {
605 | return
606 | }
607 | case "LastKey":
608 | z.LastKey, err = dc.ReadString()
609 | if err != nil {
610 | return
611 | }
612 | case "CountTotal":
613 | z.CountTotal, err = dc.ReadInt()
614 | if err != nil {
615 | return
616 | }
617 | default:
618 | err = dc.Skip()
619 | if err != nil {
620 | return
621 | }
622 | }
623 | }
624 | return
625 | }
626 |
627 | // EncodeMsg implements msgp.Encodable
628 | func (z *JournalMeta) EncodeMsg(en *msgp.Writer) (err error) {
629 | // map header, size 6
630 | // write "ID"
631 | err = en.Append(0x86, 0xa2, 0x49, 0x44)
632 | if err != nil {
633 | return err
634 | }
635 | err = en.WriteString(string(z.ID))
636 | if err != nil {
637 | return
638 | }
639 | // write "CreatedAt"
640 | err = en.Append(0xa9, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74)
641 | if err != nil {
642 | return err
643 | }
644 | err = en.WriteInt64(z.CreatedAt)
645 | if err != nil {
646 | return
647 | }
648 | // write "JoinedAt"
649 | err = en.Append(0xa8, 0x4a, 0x6f, 0x69, 0x6e, 0x65, 0x64, 0x41, 0x74)
650 | if err != nil {
651 | return err
652 | }
653 | err = en.WriteInt64(z.JoinedAt)
654 | if err != nil {
655 | return
656 | }
657 | // write "FirstKey"
658 | err = en.Append(0xa8, 0x46, 0x69, 0x72, 0x73, 0x74, 0x4b, 0x65, 0x79)
659 | if err != nil {
660 | return err
661 | }
662 | err = en.WriteString(z.FirstKey)
663 | if err != nil {
664 | return
665 | }
666 | // write "LastKey"
667 | err = en.Append(0xa7, 0x4c, 0x61, 0x73, 0x74, 0x4b, 0x65, 0x79)
668 | if err != nil {
669 | return err
670 | }
671 | err = en.WriteString(z.LastKey)
672 | if err != nil {
673 | return
674 | }
675 | // write "CountTotal"
676 | err = en.Append(0xaa, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x54, 0x6f, 0x74, 0x61, 0x6c)
677 | if err != nil {
678 | return err
679 | }
680 | err = en.WriteInt(z.CountTotal)
681 | if err != nil {
682 | return
683 | }
684 | return
685 | }
686 |
687 | // MarshalMsg implements msgp.Marshaler
688 | func (z *JournalMeta) MarshalMsg(b []byte) (o []byte, err error) {
689 | o = msgp.Require(b, z.Msgsize())
690 | // map header, size 6
691 | // string "ID"
692 | o = append(o, 0x86, 0xa2, 0x49, 0x44)
693 | o = msgp.AppendString(o, string(z.ID))
694 | // string "CreatedAt"
695 | o = append(o, 0xa9, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74)
696 | o = msgp.AppendInt64(o, z.CreatedAt)
697 | // string "JoinedAt"
698 | o = append(o, 0xa8, 0x4a, 0x6f, 0x69, 0x6e, 0x65, 0x64, 0x41, 0x74)
699 | o = msgp.AppendInt64(o, z.JoinedAt)
700 | // string "FirstKey"
701 | o = append(o, 0xa8, 0x46, 0x69, 0x72, 0x73, 0x74, 0x4b, 0x65, 0x79)
702 | o = msgp.AppendString(o, z.FirstKey)
703 | // string "LastKey"
704 | o = append(o, 0xa7, 0x4c, 0x61, 0x73, 0x74, 0x4b, 0x65, 0x79)
705 | o = msgp.AppendString(o, z.LastKey)
706 | // string "CountTotal"
707 | o = append(o, 0xaa, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x54, 0x6f, 0x74, 0x61, 0x6c)
708 | o = msgp.AppendInt(o, z.CountTotal)
709 | return
710 | }
711 |
712 | // UnmarshalMsg implements msgp.Unmarshaler
713 | func (z *JournalMeta) UnmarshalMsg(bts []byte) (o []byte, err error) {
714 | var field []byte
715 | _ = field
716 | var zema uint32
717 | zema, bts, err = msgp.ReadMapHeaderBytes(bts)
718 | if err != nil {
719 | return
720 | }
721 | for zema > 0 {
722 | zema--
723 | field, bts, err = msgp.ReadMapKeyZC(bts)
724 | if err != nil {
725 | return
726 | }
727 | switch msgp.UnsafeString(field) {
728 | case "ID":
729 | {
730 | var zpez string
731 | zpez, bts, err = msgp.ReadStringBytes(bts)
732 | z.ID = ID(zpez)
733 | }
734 | if err != nil {
735 | return
736 | }
737 | case "CreatedAt":
738 | z.CreatedAt, bts, err = msgp.ReadInt64Bytes(bts)
739 | if err != nil {
740 | return
741 | }
742 | case "JoinedAt":
743 | z.JoinedAt, bts, err = msgp.ReadInt64Bytes(bts)
744 | if err != nil {
745 | return
746 | }
747 | case "FirstKey":
748 | z.FirstKey, bts, err = msgp.ReadStringBytes(bts)
749 | if err != nil {
750 | return
751 | }
752 | case "LastKey":
753 | z.LastKey, bts, err = msgp.ReadStringBytes(bts)
754 | if err != nil {
755 | return
756 | }
757 | case "CountTotal":
758 | z.CountTotal, bts, err = msgp.ReadIntBytes(bts)
759 | if err != nil {
760 | return
761 | }
762 | default:
763 | bts, err = msgp.Skip(bts)
764 | if err != nil {
765 | return
766 | }
767 | }
768 | }
769 | o = bts
770 | return
771 | }
772 |
773 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
774 | func (z *JournalMeta) Msgsize() (s int) {
775 | s = 1 + 3 + msgp.StringPrefixSize + len(string(z.ID)) + 10 + msgp.Int64Size + 9 + msgp.Int64Size + 9 + msgp.StringPrefixSize + len(z.FirstKey) + 8 + msgp.StringPrefixSize + len(z.LastKey) + 11 + msgp.IntSize
776 | return
777 | }
778 |
--------------------------------------------------------------------------------
/objstore.go:
--------------------------------------------------------------------------------
1 | package objstore
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "io"
8 | "log"
9 | "sync"
10 | "time"
11 |
12 | "github.com/oklog/ulid"
13 | "github.com/xlab/closer"
14 |
15 | "sphere.software/objstore/cluster"
16 | "sphere.software/objstore/journal"
17 | "sphere.software/objstore/storage"
18 | )
19 |
20 | type Store interface {
21 | NodeID() string
22 | IsReady() bool
23 | SetDebug(v bool)
24 | WaitOutbound(timeout time.Duration)
25 | WaitInbound(timeout time.Duration)
26 | ReceiveEventAnnounce(event *EventAnnounce)
27 | EmitEventAnnounce(event *EventAnnounce)
28 | DiskStats() (*DiskStats, error)
29 | Close() error
30 |
31 | // HeadObject gets object's meta data from the local journal.
32 | HeadObject(id string) (*FileMeta, error)
33 | // GetObject gets an object from the local storage of the node.
34 | // Used for private API, when other nodes ask for an object.
35 | GetObject(id string) (io.ReadCloser, *FileMeta, error)
36 | // FindObject gets and object from any node, if not found then tries to acquire from
37 | // the remote storage, e.g. Amazon S3.
38 | FindObject(ctx context.Context, id string, fetch bool) (io.ReadCloser, *FileMeta, error)
39 | // FetchObject retrieves an object from the remote storage, e.g. Amazon S3.
40 | // This should be called only on a total cache miss, when file is not found
41 | // on any node of the cluster. If supplied ID is not a valid ULID, resulting meta will have a new ID.
42 | FetchObject(ctx context.Context, id string) (io.ReadCloser, *FileMeta, error)
43 | // PutObject writes object to the local storage, emits cluster announcements, optionally
44 | // writes object to remote storage, e.g. Amazon S3. Returns amount of bytes written.
45 | PutObject(r io.ReadCloser, meta *FileMeta) (int64, error)
46 | // DeleteObject marks object as deleted in journals and deletes it from the local storage.
47 | // This operation does not delete object from remote storage.
48 | DeleteObject(id string) (*FileMeta, error)
49 | // Diff finds the difference between serialized exernal journal represented as list,
50 | // and journals currently available on this local node.
51 | Diff(list FileMetaList) (added, deleted FileMetaList, err error)
52 | }
53 |
54 | var ErrNotFound = errors.New("not found")
55 |
56 | type DiskStats storage.DiskStats
57 |
58 | type EventAnnounce cluster.EventAnnounce
59 |
60 | type ConsistencyLevel journal.ConsistencyLevel
61 |
62 | func (c ConsistencyLevel) Check() (journal.ConsistencyLevel, error) {
63 | level := (journal.ConsistencyLevel)(c)
64 | switch level {
65 | case journal.ConsistencyLocal, journal.ConsistencyS3, journal.ConsistencyFull:
66 | return level, nil
67 | default:
68 | return 0, errors.New("objstore: invalid consistency level")
69 | }
70 | }
71 |
72 | const (
73 | EventOpaqueData cluster.EventType = cluster.EventOpaqueData
74 | )
75 |
76 | type storeState int
77 |
78 | const (
79 | storeInactiveState storeState = 0
80 | storeSyncState storeState = 1
81 | storeActiveState storeState = 2
82 | )
83 |
84 | type objStore struct {
85 | nodeID string
86 | debug bool
87 |
88 | stateMux *sync.RWMutex
89 | state storeState
90 |
91 | localStorage storage.LocalStorage
92 | remoteStorage storage.RemoteStorage
93 | journals journal.JournalManager
94 | cluster cluster.ClusterManager
95 |
96 | outboundWg *sync.WaitGroup
97 | outboundPump chan *EventAnnounce
98 | outboundAnnounces chan *EventAnnounce
99 |
100 | inboundWg *sync.WaitGroup
101 | inboundPump chan *EventAnnounce
102 | inboundAnnounces chan *EventAnnounce
103 | }
104 |
105 | func NewStore(nodeID string,
106 | localStorage storage.LocalStorage,
107 | remoteStorage storage.RemoteStorage,
108 | journals journal.JournalManager,
109 | cluster cluster.ClusterManager,
110 | ) (Store, error) {
111 | if !CheckID(nodeID) {
112 | return nil, errors.New("objstore: invalid node ID")
113 | }
114 | if localStorage == nil {
115 | return nil, errors.New("objstore: local storage not provided")
116 | }
117 | if remoteStorage == nil {
118 | return nil, errors.New("objstore: remote storage not provided")
119 | }
120 | if journals == nil {
121 | return nil, errors.New("objstore: journals manager not provided")
122 | }
123 | if cluster == nil {
124 | return nil, errors.New("objstore: cluster manager not provided")
125 | }
126 | if err := localStorage.CheckAccess(""); err != nil {
127 | err = fmt.Errorf("objstore: cannot access local storage: %v", err)
128 | return nil, err
129 | }
130 | if err := remoteStorage.CheckAccess(""); err != nil {
131 | err = fmt.Errorf("objstore: cannot access remote storage: %v", err)
132 | return nil, err
133 | }
134 | if err := journals.Create(journal.ID(nodeID)); err != nil {
135 | err = fmt.Errorf("objstore: unable to create new journal: %v", err)
136 | return nil, err
137 | }
138 | outboundAnnounces := make(chan *EventAnnounce, 1024)
139 | inboundAnnounces := make(chan *EventAnnounce, 1024)
140 | store := &objStore{
141 | nodeID: nodeID,
142 | stateMux: new(sync.RWMutex),
143 |
144 | localStorage: localStorage,
145 | remoteStorage: remoteStorage,
146 | journals: journals,
147 | cluster: cluster,
148 |
149 | outboundWg: new(sync.WaitGroup),
150 | outboundPump: pumpEventAnnounces(outboundAnnounces),
151 | outboundAnnounces: outboundAnnounces,
152 |
153 | inboundWg: new(sync.WaitGroup),
154 | inboundPump: pumpEventAnnounces(inboundAnnounces),
155 | inboundAnnounces: inboundAnnounces,
156 | }
157 | store.processInbound(4, 10*time.Minute)
158 | store.processOutbound(4, 10*time.Minute)
159 | go func() {
160 | time.Sleep(2 * time.Second)
161 | var synced bool
162 | for !synced {
163 | synced = store.sync(10 * time.Minute)
164 | time.Sleep(2 * time.Second)
165 | }
166 | if store.debug {
167 | log.Println("[INFO] sync done")
168 | }
169 | }()
170 | go func() {
171 | listJournals := func() {
172 | list, err := store.journals.ListAll()
173 | if err != nil {
174 | log.Println("[WARN] error listing journals", err)
175 | return
176 | }
177 | log.Println("[INFO] node journals:")
178 | log.Println(list)
179 | }
180 | for {
181 | for !store.IsReady() {
182 | time.Sleep(2 * time.Second)
183 | }
184 | if store.debug {
185 | listJournals()
186 | }
187 | ts := time.Now()
188 | _, err := store.journals.JoinAll(journal.ID(nodeID))
189 | if err != nil {
190 | log.Println("[WARN] journal consolidation failed:", err)
191 | } else if store.debug {
192 | log.Println("[INFO] consolidation done in", time.Since(ts))
193 | listJournals()
194 | }
195 | time.Sleep(24 * time.Hour)
196 | }
197 | }()
198 | return store, nil
199 | }
200 |
201 | func (o *objStore) sync(timeout time.Duration) bool {
202 | nodes, err := o.cluster.ListNodes()
203 | if err != nil {
204 | closer.Fatalln("[WARN] list nodes failed, sync cancelled:", err)
205 | } else if len(nodes) < 2 {
206 | o.stateMux.Lock()
207 | o.state = storeActiveState
208 | o.stateMux.Unlock()
209 | return false
210 | }
211 | o.stateMux.Lock()
212 | o.state = storeInactiveState
213 | o.stateMux.Unlock()
214 |
215 | list, err := o.journals.ExportAll()
216 | if err != nil {
217 | closer.Fatalln("[WARN] list journals failed, sync cancelled:", err)
218 | }
219 |
220 | wg := new(sync.WaitGroup)
221 | ctx, cancelFn := context.WithTimeout(context.Background(), timeout)
222 |
223 | var listAdded journal.FileMetaList
224 | var listDeleted journal.FileMetaList
225 |
226 | for _, node := range nodes {
227 | if node.ID == o.nodeID {
228 | continue
229 | }
230 | wg.Add(1)
231 | go func(node *cluster.NodeInfo) {
232 | defer wg.Done()
233 |
234 | added, deleted, err := o.cluster.Sync(ctx, node.ID, list)
235 | if err != nil {
236 | log.Println("[WARN] sync error:", err)
237 | } else {
238 | listAdded = append(listAdded, added...)
239 | listDeleted = append(listDeleted, deleted...)
240 | }
241 | }(node)
242 | }
243 | wg.Wait()
244 | cancelFn()
245 |
246 | setAdded := make(map[string]*journal.FileMeta, len(listAdded))
247 | setDeleted := make(map[string]*journal.FileMeta, len(listDeleted))
248 |
249 | for _, meta := range listAdded {
250 | if m, ok := setAdded[meta.ID]; ok {
251 | if meta.Timestamp > m.Timestamp {
252 | setAdded[meta.ID] = meta
253 | continue
254 | }
255 | }
256 | setAdded[meta.ID] = meta
257 | }
258 | for _, meta := range listDeleted {
259 | if mAdd, ok := setAdded[meta.ID]; ok {
260 | // added already, check priority by age
261 | if mAdd.Timestamp > meta.Timestamp {
262 | continue // skip this delete event
263 | } else {
264 | delete(setAdded, meta.ID)
265 | }
266 | }
267 | if m, ok := setDeleted[meta.ID]; ok {
268 | if meta.Timestamp > m.Timestamp {
269 | setDeleted[meta.ID] = meta
270 | continue
271 | }
272 | }
273 | setDeleted[meta.ID] = meta
274 | }
275 |
276 | err = o.journals.Update(journal.ID(o.nodeID),
277 | func(j journal.Journal, _ *journal.JournalMeta) error {
278 | for _, meta := range setAdded {
279 | if meta.IsDeleted {
280 | // missing in our records, but marked as deleted elsewere
281 | o.localStorage.Delete(meta.ID)
282 | meta.IsSymlink = true
283 | if err := j.Set(meta.ID, meta); err != nil {
284 | log.Println("[WARN] journal set:", err)
285 | }
286 | continue
287 | }
288 | switch meta.Consistency {
289 | case journal.ConsistencyLocal, journal.ConsistencyS3:
290 | // stored elsewere
291 | meta.IsSymlink = true
292 | if err := j.Set(meta.ID, meta); err != nil {
293 | log.Println("[WARN] journal set:", err)
294 | }
295 | case journal.ConsistencyFull:
296 | // must replicate, i.e. handle the missing announce
297 | meta.IsSymlink = true // temporarily, will be overridden once replicated
298 | o.ReceiveEventAnnounce(&EventAnnounce{
299 | Type: cluster.EventFileAdded,
300 | FileMeta: meta,
301 | })
302 | if err := j.Set(meta.ID, meta); err != nil {
303 | log.Println("[WARN] journal set:", err)
304 | }
305 | }
306 | }
307 | return nil
308 | })
309 | if err != nil {
310 | closer.Fatalln("[WARN] failed to sync journal:", err)
311 | }
312 |
313 | o.stateMux.Lock()
314 | o.state = storeActiveState
315 | o.stateMux.Unlock()
316 |
317 | for _, meta := range setDeleted {
318 | if meta.IsDeleted {
319 | // some nodes missing info we have on deleted object
320 | o.EmitEventAnnounce(&EventAnnounce{
321 | Type: cluster.EventFileDeleted,
322 | FileMeta: meta,
323 | })
324 | continue
325 | }
326 | // some nodes are missing our file
327 | o.EmitEventAnnounce(&EventAnnounce{
328 | Type: cluster.EventFileAdded,
329 | FileMeta: meta,
330 | })
331 | }
332 |
333 | return true
334 | }
335 |
336 | func (o *objStore) processOutbound(workers int, emitTimeout time.Duration) {
337 | for i := 0; i < workers; i++ {
338 | o.outboundWg.Add(1)
339 | go func() {
340 | defer o.outboundWg.Done()
341 |
342 | for !o.IsReady() {
343 | time.Sleep(100 * time.Millisecond)
344 | }
345 | for ev := range o.outboundAnnounces {
346 | if err := o.emitEvent(ev, emitTimeout); err != nil {
347 | log.Println("[WARN] emitting event:", err)
348 | }
349 | }
350 | }()
351 | }
352 | }
353 |
354 | func (o *objStore) processInbound(workers int, timeout time.Duration) {
355 | for i := 0; i < workers; i++ {
356 | o.inboundWg.Add(1)
357 | go func() {
358 | defer o.inboundWg.Done()
359 |
360 | for !o.IsReady() {
361 | time.Sleep(100 * time.Millisecond)
362 | }
363 | for ev := range o.inboundAnnounces {
364 | if err := o.handleEvent(ev, timeout); err != nil {
365 | log.Println("[WARN] handling event:", err)
366 | }
367 | }
368 | }()
369 | }
370 | }
371 |
372 | func (o *objStore) IsReady() bool {
373 | o.stateMux.RLock()
374 | ready := o.state == storeActiveState
375 | o.stateMux.RUnlock()
376 | return ready
377 | }
378 |
379 | func (o *objStore) Close() error {
380 | o.inboundPump <- &EventAnnounce{
381 | Type: cluster.EventStopAnnounce,
382 | }
383 | o.outboundPump <- &EventAnnounce{
384 | Type: cluster.EventStopAnnounce,
385 | }
386 | return nil
387 | }
388 |
389 | func (o *objStore) WaitOutbound(timeout time.Duration) {
390 | waitWG(o.outboundWg, timeout)
391 | }
392 |
393 | func (o *objStore) WaitInbound(timeout time.Duration) {
394 | waitWG(o.inboundWg, timeout)
395 | }
396 |
397 | func waitWG(wg *sync.WaitGroup, timeout time.Duration) {
398 | done := make(chan struct{})
399 | go func() {
400 | wg.Wait()
401 | select {
402 | case <-done:
403 | default:
404 | close(done)
405 | }
406 | }()
407 | select {
408 | case <-time.Tick(timeout):
409 | case <-done:
410 | }
411 | }
412 |
413 | // ReceiveEventAnnounce never blocks. Internal workers will eventually handle the received events.
414 | func (o *objStore) ReceiveEventAnnounce(event *EventAnnounce) {
415 | if event.Type == cluster.EventStopAnnounce {
416 | return
417 | }
418 | o.inboundPump <- event
419 | }
420 |
421 | // EmitEventAnnounce never blocks. Internal workers will eventually handle the events to emit.
422 | func (o *objStore) EmitEventAnnounce(event *EventAnnounce) {
423 | if event.Type == cluster.EventStopAnnounce {
424 | return
425 | }
426 | o.outboundPump <- event
427 | }
428 |
429 | func (s *objStore) NodeID() string {
430 | return s.nodeID
431 | }
432 |
433 | func GenerateID() string {
434 | return journal.GetULID()
435 | }
436 |
437 | func CheckID(str string) bool {
438 | id, err := ulid.Parse(str)
439 | if err != nil {
440 | log.Printf("[WARN] ULID is invalid: %s: %v", str, err)
441 | return false
442 | }
443 | ts := time.Unix(int64(id.Time()/1000), 0)
444 | if ts.Before(time.Date(2010, 0, 0, 0, 0, 0, 0, time.UTC)) ||
445 | ts.After(time.Date(2100, 0, 0, 0, 0, 0, 0, time.UTC)) {
446 | log.Println("[WARN] ULID has timestamp:", ts, "which is not current")
447 | return false
448 | }
449 | return true
450 | }
451 |
452 | func (o *objStore) emitEvent(ev *EventAnnounce, timeout time.Duration) error {
453 | ctx, cancelFn := context.WithTimeout(context.Background(), timeout)
454 | defer cancelFn()
455 | wg := new(sync.WaitGroup)
456 | defer wg.Wait()
457 | nodes, err := o.cluster.ListNodes()
458 | if err != nil {
459 | return err
460 | }
461 | for _, node := range nodes {
462 | if node.ID == o.nodeID {
463 | continue
464 | }
465 | wg.Add(1)
466 | go func(node *cluster.NodeInfo) {
467 | defer wg.Done()
468 | if err := o.cluster.Announce(ctx, node.ID, (*cluster.EventAnnounce)(ev)); err != nil {
469 | log.Println("[WARN] announce error:", err)
470 | }
471 | }(node)
472 | }
473 | return nil
474 | }
475 |
476 | func (o *objStore) findOnCluster(ctx context.Context, id string) (io.ReadCloser, error) {
477 | nodes, err := o.cluster.ListNodes()
478 | if err != nil {
479 | err = fmt.Errorf("objstore: cannot discover nodes: %v", err)
480 | return nil, err
481 | } else if len(nodes) < 2 {
482 | // no other nodes except us..
483 | return nil, ErrNotFound
484 | }
485 | found := make(chan io.ReadCloser, len(nodes))
486 | wg := new(sync.WaitGroup)
487 | for _, node := range nodes {
488 | if node.ID == o.nodeID {
489 | continue
490 | }
491 | wg.Add(1)
492 | go func(node *cluster.NodeInfo) {
493 | defer wg.Done()
494 | if r, err := o.cluster.GetObject(ctx, node.ID, id); err == nil {
495 | found <- r
496 | } else if err != cluster.ErrNotFound {
497 | log.Println("[WARN] cluster error:", err)
498 | }
499 | }(node)
500 | }
501 |
502 | go func() {
503 | wg.Wait()
504 | close(found)
505 | }()
506 | // found will be closed if all workers done,
507 | // or we get at least 1 result from the channel.
508 | if r, ok := <-found; ok {
509 | return r, nil
510 | }
511 | return nil, ErrNotFound
512 | }
513 |
514 | func (o *objStore) handleEvent(ev *EventAnnounce, timeout time.Duration) error {
515 | switch ev.Type {
516 | case cluster.EventFileAdded:
517 | if ev.FileMeta == nil {
518 | log.Println("[WARN] skipping added event with no meta")
519 | return nil
520 | }
521 | id := ev.FileMeta.ID
522 | meta := (*FileMeta)(ev.FileMeta)
523 | if meta.Consistency == journal.ConsistencyFull {
524 | // need to replicate the file locally
525 | ctx, cancelFn := context.WithTimeout(context.Background(), timeout)
526 | r, err := o.findOnCluster(ctx, id)
527 | cancelFn()
528 | if err == ErrNotFound {
529 | if o.debug {
530 | log.Println("[INFO] file not found on cluster:", ev.FileMeta)
531 | }
532 | // object not found on cluster, fetch from remote store
533 | r, meta, err = o.FetchObject(ctx, id)
534 | if err == ErrNotFound {
535 | // we simply bail out if the file is expected with full consistency but not
536 | // found on the cluster and the remote storage.
537 | log.Println("[WARN] unable to find object for:", ev.FileMeta)
538 | return nil
539 | }
540 | meta.Consistency = journal.ConsistencyFull
541 | id = meta.ID // id is the same or new
542 | }
543 | meta.IsSymlink = false
544 | if _, err := o.storeLocal(r, meta); err != nil {
545 | r.Close()
546 | log.Println("[WARN] failed to fetch and store object:", err)
547 | return nil
548 | }
549 | r.Close()
550 | } else {
551 | meta.IsSymlink = true
552 | }
553 | if err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error {
554 | if j.ID() == journal.ID(o.nodeID) {
555 | return j.Set(id, (*journal.FileMeta)(meta))
556 | }
557 | return j.Delete(id)
558 | }); err != nil {
559 | return err
560 | }
561 | case cluster.EventFileDeleted:
562 | if ev.FileMeta == nil {
563 | log.Println("[WARN] skipping deleted event with no meta")
564 | return nil
565 | }
566 | var found bool
567 | id := ev.FileMeta.ID
568 | err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error {
569 | if m := j.Get(id); m != nil {
570 | found = true
571 | m.IsDeleted = true
572 | m.Timestamp = time.Now().UnixNano()
573 | if err := j.Set(id, m); err != nil {
574 | return err
575 | }
576 | return journal.ForEachStop
577 | }
578 | return nil
579 | })
580 | if err != nil {
581 | err = fmt.Errorf("objstore: journal update failed: %v", err)
582 | return err
583 | } else if found {
584 | if err := o.localStorage.Delete(id); err != nil {
585 | log.Println("[WARN] failed to delete local file:", err)
586 | }
587 | }
588 | case cluster.EventOpaqueData:
589 | log.Println("[INFO] cluster message:", string(ev.OpaqueData))
590 | default:
591 | log.Println("[WARN] skipping illegal cluster event type", ev.Type)
592 | }
593 | return nil
594 | }
595 |
596 | func (o *objStore) DiskStats() (*DiskStats, error) {
597 | ds, err := o.localStorage.DiskStats()
598 | if err != nil {
599 | return nil, err
600 | }
601 | return (*DiskStats)(ds), nil
602 | }
603 |
604 | type FileMeta journal.FileMeta
605 | type FileMetaList journal.FileMetaList
606 |
607 | func (o *objStore) HeadObject(id string) (*FileMeta, error) {
608 | var meta *FileMeta
609 | err := o.journals.ForEach(func(j journal.Journal, _ *journal.JournalMeta) error {
610 | if m := j.Get(id); m != nil {
611 | meta = (*FileMeta)(m)
612 | return journal.ForEachStop
613 | }
614 | return nil
615 | })
616 | if err != nil {
617 | return nil, err
618 | } else if meta == nil {
619 | return nil, ErrNotFound
620 | }
621 | return meta, nil
622 | }
623 |
624 | func (o *objStore) GetObject(id string) (io.ReadCloser, *FileMeta, error) {
625 | var meta *FileMeta
626 | err := o.journals.ForEach(func(j journal.Journal, _ *journal.JournalMeta) error {
627 | if m := j.Get(id); m != nil {
628 | meta = (*FileMeta)(m)
629 | return journal.ForEachStop
630 | }
631 | return nil
632 | })
633 | if err != nil {
634 | return nil, nil, err
635 | } else if meta == nil {
636 | return nil, nil, ErrNotFound
637 | }
638 | if meta.IsSymlink {
639 | // file should be located somewhere else, we don't have that file
640 | return nil, meta, ErrNotFound
641 | } else if meta.IsDeleted {
642 | return nil, meta, ErrNotFound
643 | }
644 | f, err := o.localStorage.Read(id)
645 | if err != nil {
646 | log.Println("[WARN] file not found on disk:", (*journal.FileMeta)(meta).String())
647 | return nil, meta, ErrNotFound
648 | }
649 | return f, meta, nil
650 | }
651 |
652 | func (o *objStore) FindObject(ctx context.Context,
653 | id string, fetch bool) (io.ReadCloser, *FileMeta, error) {
654 | r, meta, err := o.GetObject(id)
655 | if err == nil {
656 | // found locally
657 | return r, meta, nil
658 | } else if err != ErrNotFound {
659 | log.Println("[WARN]", err)
660 | }
661 | if meta == nil && !fetch {
662 | // completely not found -> file has been removed
663 | return nil, nil, ErrNotFound
664 | } else if meta != nil {
665 | r, err = o.findOnCluster(ctx, id)
666 | if err == nil {
667 | return r, meta, err
668 | } else if err != ErrNotFound {
669 | log.Println("[WARN] error when finding object:", err)
670 | }
671 | if o.debug {
672 | log.Println("[INFO] file not found on cluster:", id)
673 | }
674 | }
675 | // fetch from remote store
676 | r, meta, err = o.FetchObject(ctx, id)
677 | if err == ErrNotFound {
678 | return nil, nil, ErrNotFound
679 | } else if err != nil {
680 | log.Println("[WARN] unknown error:", err)
681 | return nil, nil, err
682 | }
683 | // id is the same or new
684 | id = meta.ID
685 | // store it locally
686 | meta.IsSymlink = false
687 | if (meta.Consistency) == 0 {
688 | meta.Consistency = journal.ConsistencyS3
689 | }
690 | meta.Timestamp = time.Now().UnixNano()
691 | if _, err := o.storeLocal(r, meta); err != nil {
692 | r.Close()
693 | log.Println("[WARN] failed to fetch and store object:", err)
694 | return nil, meta, err
695 | }
696 | r.Close()
697 | // update journals
698 | if err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error {
699 | if j.ID() == journal.ID(o.nodeID) {
700 | return j.Set(id, (*journal.FileMeta)(meta))
701 | }
702 | return j.Delete(id)
703 | }); err != nil {
704 | return nil, meta, err
705 | }
706 | o.EmitEventAnnounce(&EventAnnounce{
707 | Type: cluster.EventFileAdded,
708 | FileMeta: (*journal.FileMeta)(meta),
709 | })
710 | // serve from local storage
711 | f, err := o.localStorage.Read(id)
712 | if err != nil {
713 | log.Println("[WARN] file not found on disk:", meta)
714 | return nil, meta, ErrNotFound
715 | }
716 | copyMeta := *meta
717 | copyMeta.IsFetched = true
718 | return f, ©Meta, nil
719 | }
720 |
721 | func (o *objStore) FetchObject(ctx context.Context, id string) (io.ReadCloser, *FileMeta, error) {
722 | spec, err := o.remoteStorage.GetObject(id)
723 | if err == storage.ErrNotFound {
724 | return nil, nil, ErrNotFound
725 | } else if err != nil {
726 | return nil, nil, err
727 | }
728 | meta := new(journal.FileMeta)
729 | meta.Unmap(spec.Meta)
730 | meta.ID = id
731 | if !CheckID(id) {
732 | // generate a new ID for the file to store in the journals
733 | meta.ID = GenerateID()
734 | }
735 | if spec.Size > 0 {
736 | meta.Size = spec.Size
737 | }
738 | return spec.Body, (*FileMeta)(meta), nil
739 | }
740 |
741 | func (o *objStore) storeLocal(r io.Reader, meta *FileMeta) (written int64, err error) {
742 | written, err = o.localStorage.Write(meta.ID, r)
743 | if err != nil {
744 | return
745 | }
746 | journalID := journal.ID(o.nodeID)
747 | var journalOk bool
748 | if err = o.journals.ForEachUpdate(
749 | func(j journal.Journal, _ *journal.JournalMeta) error {
750 | if journalID == j.ID() {
751 | journalOk = true
752 | return j.Set(meta.ID, (*journal.FileMeta)(meta))
753 | }
754 | return j.Delete(meta.ID)
755 | }); err != nil {
756 | return
757 | }
758 | if !journalOk {
759 | err = fmt.Errorf("objstore: journal not found: %v", journalID)
760 | return
761 | }
762 | return
763 | }
764 |
765 | func (o *objStore) PutObject(r io.ReadCloser, meta *FileMeta) (int64, error) {
766 | switch meta.Consistency {
767 | case journal.ConsistencyLocal:
768 | written, err := o.storeLocal(r, meta)
769 | if err != nil {
770 | r.Close()
771 | err = fmt.Errorf("objstore: local store failed: %v", err)
772 | return written, err
773 | }
774 | r.Close()
775 | o.EmitEventAnnounce(&EventAnnounce{
776 | Type: cluster.EventFileAdded,
777 | FileMeta: (*journal.FileMeta)(meta),
778 | })
779 | case journal.ConsistencyS3, journal.ConsistencyFull:
780 | written, err := o.storeLocal(r, meta)
781 | if err != nil {
782 | r.Close()
783 | err = fmt.Errorf("objstore: local store failed: %v", err)
784 | return written, err
785 | }
786 | r.Close()
787 | o.EmitEventAnnounce(&EventAnnounce{
788 | Type: cluster.EventFileAdded,
789 | FileMeta: (*journal.FileMeta)(meta),
790 | })
791 | // for optimal S3 uploads we should provide io.ReadSeeker,
792 | // this is why we store object as local file first, then upload to S3.
793 | f, err := o.localStorage.Read(meta.ID)
794 | if err != nil {
795 | err = fmt.Errorf("objstore: local store missing file: %v", err)
796 | return written, err
797 | }
798 | defer f.Close()
799 |
800 | if _, err = o.remoteStorage.PutObject(meta.ID, f, (*journal.FileMeta)(meta).Map()); err != nil {
801 | err = fmt.Errorf("objstore: remote store failed: %v", err)
802 | return written, err
803 | }
804 | return written, nil
805 | default:
806 | return 0, fmt.Errorf("objstore: unknown consistency %v", meta.Consistency)
807 | }
808 | return 0, nil
809 | }
810 |
811 | func (o *objStore) DeleteObject(id string) (*FileMeta, error) {
812 | var meta *FileMeta
813 | err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error {
814 | if m := j.Get(id); m != nil {
815 | m.IsDeleted = true
816 | m.Timestamp = time.Now().UnixNano()
817 | if err := j.Set(id, m); err != nil {
818 | return err
819 | }
820 | meta = (*FileMeta)(m)
821 | return journal.ForEachStop
822 | }
823 | return nil
824 | })
825 | if err != nil {
826 | return nil, err
827 | } else if meta == nil {
828 | return nil, ErrNotFound
829 | }
830 | o.EmitEventAnnounce(&EventAnnounce{
831 | Type: cluster.EventFileDeleted,
832 | FileMeta: (*journal.FileMeta)(meta),
833 | })
834 | if err := o.localStorage.Delete(id); err != nil {
835 | log.Println("[WARN] failed to delete local file:", err)
836 | }
837 | return meta, nil
838 | }
839 |
840 | func (o *objStore) Diff(list FileMetaList) (added, deleted FileMetaList, err error) {
841 | internal, err := o.journals.ExportAll()
842 | if err != nil {
843 | err := fmt.Errorf("objstore: failed to collect journals: %v", err)
844 | return nil, nil, err
845 | }
846 | internalJournal := journal.MakeJournal("", internal)
847 | externalJournal := journal.MakeJournal("", (journal.FileMetaList)(list))
848 | add, del := externalJournal.Diff(internalJournal)
849 | return (FileMetaList)(add), (FileMetaList)(del), nil
850 | }
851 |
852 | func (o *objStore) SetDebug(v bool) {
853 | o.debug = v
854 | }
855 |
--------------------------------------------------------------------------------