├── .gitignore ├── journal ├── Makefile ├── journal_test.go ├── helpers.go ├── mapping.go ├── meta.go ├── manager.go ├── journal.go └── meta_gen.go ├── docs ├── logo.png ├── cluster-view-0.png └── cluster-view-1.png ├── deploy ├── build.sh ├── Dockerfile └── Makefile ├── cluster ├── events.go ├── client.go └── manager.go ├── glide.yaml ├── storage ├── helpers.go ├── local.go └── remote.go ├── LICENSE ├── helpers.go ├── glide.lock ├── api ├── public.go └── private.go ├── cmd └── objstore │ └── main.go ├── README.md └── objstore.go /.gitignore: -------------------------------------------------------------------------------- 1 | var/ 2 | .DS_Store 3 | vendor/ 4 | -------------------------------------------------------------------------------- /journal/Makefile: -------------------------------------------------------------------------------- 1 | msgp: 2 | msgp -file meta.go -tests=false 3 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SphereSoftware/objstore/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/cluster-view-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SphereSoftware/objstore/HEAD/docs/cluster-view-0.png -------------------------------------------------------------------------------- /docs/cluster-view-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SphereSoftware/objstore/HEAD/docs/cluster-view-1.png -------------------------------------------------------------------------------- /deploy/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | go get github.com/Masterminds/glide 4 | cd $GOPATH/src/sphere.software/objstore 5 | $GOPATH/bin/glide install 6 | go build -o /out/objstore sphere.software/objstore/cmd/objstore 7 | -------------------------------------------------------------------------------- /deploy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gliderlabs/alpine 2 | 3 | RUN apk add --no-cache ca-certificates 4 | COPY out/objstore /objstore/bin/objstore 5 | 6 | ENV APP_DEBUG_LEVEL=1 7 | ENV APP_CLUSTER_TAGNAME=default 8 | 9 | VOLUME /objstore/data 10 | WORKDIR /objstore/data 11 | 12 | EXPOSE 10999 10080 13 | 14 | CMD ["/objstore/bin/objstore"] 15 | -------------------------------------------------------------------------------- /cluster/events.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import "sphere.software/objstore/journal" 4 | 5 | type EventType int 6 | 7 | const ( 8 | EventUnknown EventType = 0 9 | EventFileAdded EventType = 1 10 | EventFileDeleted EventType = 2 11 | EventOpaqueData EventType = 3 12 | EventStopAnnounce EventType = 999 13 | ) 14 | 15 | type EventAnnounce struct { 16 | Type EventType `json:"type"` 17 | 18 | FileMeta *journal.FileMeta `json:"meta"` 19 | OpaqueData []byte `json:"data"` 20 | } 21 | -------------------------------------------------------------------------------- /journal/journal_test.go: -------------------------------------------------------------------------------- 1 | package journal 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | var noID ID 10 | 11 | func TestBtreeDiffBtree(t *testing.T) { 12 | assert := assert.New(t) 13 | 14 | j1 := MakeJournal(noID, []*Meta{ 15 | {ID: "000"}, {ID: "001"}, {ID: "002"}, {ID: "003"}, {ID: "005"}, 16 | }) 17 | j2 := MakeJournal(noID, []*Meta{ 18 | {ID: "000"}, {ID: "002"}, {ID: "003"}, {ID: "004"}, {ID: "005"}, 19 | }) 20 | 21 | added, deleted := j1.Diff(j2) 22 | assert.Equal([]*Meta{{ID: "004"}}, added) 23 | assert.Equal([]*Meta{{ID: "001"}}, deleted) 24 | 25 | added, deleted = j1.Diff(j1) 26 | assert.Empty(added) 27 | assert.Empty(deleted) 28 | } 29 | -------------------------------------------------------------------------------- /deploy/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | 3 | build: 4 | go get github.com/Masterminds/glide 5 | cd $(GOPATH)/src/sphere.software/objstore 6 | glide install 7 | go build 8 | docker build -t spheresoftware/objstore . 9 | 10 | OUT ?= $(shell pwd)/out 11 | PROJECT ?= $(GOPATH)/src/sphere.software/objstore 12 | 13 | local: 14 | mkdir -p $(OUT) 15 | docker run -a stdout -a stderr --rm \ 16 | -v $(OUT):/out -v $(PROJECT):/go/src/sphere.software/objstore \ 17 | -e GOPATH=/go \ 18 | golang:alpine /go/src/sphere.software/objstore/deploy/build.sh 19 | docker build -t spheresoftware/objstore . 20 | rm $(OUT)/objstore 21 | rmdir $(OUT) 22 | 23 | docker: 24 | make local 25 | docker save -o objstore.tar spheresoftware/objstore 26 | zip objstore.tar.zip objstore.tar 27 | -------------------------------------------------------------------------------- /glide.yaml: -------------------------------------------------------------------------------- 1 | package: sphere.software/objstore 2 | import: 3 | - package: github.com/astranet/astranet 4 | subpackages: 5 | - addr 6 | - package: github.com/aws/aws-sdk-go 7 | version: ^1.10.3 8 | subpackages: 9 | - aws 10 | - aws/session 11 | - service/s3 12 | - package: github.com/oklog/ulid 13 | - package: github.com/boltdb/bolt 14 | version: ^1.3.0 15 | - package: github.com/cznic/b 16 | - package: github.com/gin-gonic/gin 17 | version: ^1.1.4 18 | - package: github.com/jawher/mow.cli 19 | - package: github.com/tinylib/msgp 20 | version: ^1.0.1 21 | subpackages: 22 | - msgp 23 | - package: github.com/xlab/closer 24 | testImport: 25 | - package: github.com/stretchr/testify 26 | version: ^1.1.4 27 | subpackages: 28 | - assert 29 | -------------------------------------------------------------------------------- /journal/helpers.go: -------------------------------------------------------------------------------- 1 | package journal 2 | 3 | import ( 4 | "math/rand" 5 | "sync" 6 | "time" 7 | 8 | "github.com/oklog/ulid" 9 | ) 10 | 11 | var globalRand = rand.New(&lockedSource{ 12 | src: rand.NewSource(time.Now().UnixNano()), 13 | }) 14 | 15 | // GetULID constucts an Universally Unique Lexicographically Sortable Identifier. 16 | // See https://github.com/oklog/ulid 17 | func GetULID() string { 18 | return ulid.MustNew(ulid.Timestamp(time.Now()), globalRand).String() 19 | } 20 | 21 | type lockedSource struct { 22 | lk sync.Mutex 23 | src rand.Source 24 | } 25 | 26 | func (r *lockedSource) Int63() (n int64) { 27 | r.lk.Lock() 28 | n = r.src.Int63() 29 | r.lk.Unlock() 30 | return 31 | } 32 | 33 | func (r *lockedSource) Seed(seed int64) { 34 | r.lk.Lock() 35 | r.src.Seed(seed) 36 | r.lk.Unlock() 37 | } 38 | -------------------------------------------------------------------------------- /journal/mapping.go: -------------------------------------------------------------------------------- 1 | package journal 2 | 3 | import "github.com/boltdb/bolt" 4 | 5 | type Mapping interface { 6 | Get(id ID) *JournalMeta 7 | Set(id ID, meta *JournalMeta) error 8 | SetBytes(k, v []byte) error 9 | } 10 | 11 | type mapping struct { 12 | tx *bolt.Tx 13 | b *bolt.Bucket 14 | } 15 | 16 | func NewMapping(tx *bolt.Tx) (Mapping, error) { 17 | b, err := tx.CreateBucketIfNotExists(mappingBucket) 18 | if err != nil { 19 | return nil, err 20 | } 21 | m := &mapping{ 22 | tx: tx, 23 | b: b, 24 | } 25 | return m, nil 26 | } 27 | 28 | func (m *mapping) Get(id ID) *JournalMeta { 29 | data := m.b.Get([]byte(id)) 30 | if data == nil { 31 | return nil 32 | } 33 | meta := new(JournalMeta) 34 | meta.UnmarshalMsg(data) 35 | return meta 36 | } 37 | 38 | func (m *mapping) Set(id ID, meta *JournalMeta) error { 39 | v, err := meta.MarshalMsg(nil) 40 | if err != nil { 41 | return err 42 | } 43 | return m.b.Put([]byte(id), v) 44 | } 45 | 46 | func (m *mapping) SetBytes(k, v []byte) error { 47 | return m.b.Put(k, v) 48 | } 49 | -------------------------------------------------------------------------------- /storage/helpers.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | ) 7 | 8 | type readSeeker struct { 9 | io.Reader 10 | 11 | buf []byte 12 | offset int 13 | } 14 | 15 | func newReadSeeker(buf []byte) io.ReadSeeker { 16 | return &readSeeker{ 17 | Reader: bytes.NewReader(buf), 18 | buf: buf, 19 | } 20 | } 21 | 22 | func (r *readSeeker) Seek(off int64, whence int) (int64, error) { 23 | offset := int(off) 24 | switch whence { 25 | case io.SeekStart: 26 | if offset < 0 || offset > len(r.buf) { 27 | return 0, io.EOF 28 | } 29 | r.offset = offset 30 | r.Reader = bytes.NewReader(r.buf[offset:]) 31 | case io.SeekEnd: 32 | if offset < 0 || offset > len(r.buf) { 33 | return 0, io.EOF 34 | } 35 | r.offset = len(r.buf) - offset 36 | r.Reader = bytes.NewReader(r.buf[len(r.buf)-offset:]) 37 | case io.SeekCurrent: 38 | if offset+r.offset > len(r.buf) || 39 | offset+r.offset < 0 { 40 | return 0, io.EOF 41 | } 42 | r.offset = r.offset + offset 43 | r.Reader = bytes.NewReader(r.buf[r.offset:]) 44 | default: 45 | panic("wrong whence arg") 46 | } 47 | return int64(r.offset), nil 48 | } 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Sphere Software 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /helpers.go: -------------------------------------------------------------------------------- 1 | package objstore 2 | 3 | import "sphere.software/objstore/cluster" 4 | 5 | // PUMP CODE — a circular buffer 6 | // Copyright 2014 The Go Authors 7 | // 8 | // pumpEventAnnounces returns a channel src such that sending on src will eventually send on 9 | // dst, in order, but that src will always be ready to send/receive soon, even 10 | // if dst currently isn't. It is effectively an infinitely buffered channel. 11 | // 12 | // In particular, goroutine A sending on src will not deadlock even if goroutine 13 | // B that's responsible for receiving on dst is currently blocked trying to 14 | // send to A on a separate channel. 15 | // 16 | // Send a EventStopAnnounce event on the src channel to close the dst channel after all queued 17 | // events are sent on dst. After that, other goroutines can still send to src, 18 | // so that such sends won't block forever, but such events will be ignored. 19 | func pumpEventAnnounces(dst chan *EventAnnounce) (src chan *EventAnnounce) { 20 | src = make(chan *EventAnnounce) 21 | go func() { 22 | // initialSize is the initial size of the circular buffer. It must be a 23 | // power of 2. 24 | const initialSize = 16 25 | i, j, buf, mask := 0, 0, make([]*EventAnnounce, initialSize), initialSize-1 26 | 27 | maybeSrc := src 28 | for { 29 | maybeDst := dst 30 | if i == j { 31 | maybeDst = nil 32 | } 33 | if maybeDst == nil && maybeSrc == nil { 34 | break 35 | } 36 | 37 | select { 38 | case maybeDst <- buf[i&mask]: 39 | buf[i&mask] = nil 40 | i++ 41 | 42 | case e := <-maybeSrc: 43 | if e.Type == cluster.EventStopAnnounce { 44 | maybeSrc = nil 45 | continue 46 | } 47 | 48 | // Allocate a bigger buffer if necessary. 49 | if i+len(buf) == j { 50 | b := make([]*EventAnnounce, 2*len(buf)) 51 | n := copy(b, buf[j&mask:]) 52 | copy(b[n:], buf[:j&mask]) 53 | i, j = 0, len(buf) 54 | buf, mask = b, len(b)-1 55 | } 56 | 57 | buf[j&mask] = e 58 | j++ 59 | } 60 | } 61 | 62 | close(dst) 63 | // Block forever. 64 | for range src { 65 | } 66 | }() 67 | return src 68 | } 69 | -------------------------------------------------------------------------------- /storage/local.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | "path/filepath" 8 | "syscall" 9 | "time" 10 | ) 11 | 12 | // LocalStorage provides access to the local filesystem. 13 | type LocalStorage interface { 14 | Prefix() string 15 | Read(key string) (*os.File, error) 16 | Stat(key string) (os.FileInfo, error) 17 | Delete(key string) error 18 | Write(key string, body io.Reader) (int64, error) 19 | ListFiles(prefix string) ([]os.FileInfo, error) 20 | CheckAccess(prefix string) error 21 | DiskStats() (*DiskStats, error) 22 | } 23 | 24 | type localStorage struct { 25 | prefix string 26 | } 27 | 28 | func NewLocalStorage(prefix string) LocalStorage { 29 | return &localStorage{ 30 | prefix: prefix, 31 | } 32 | } 33 | 34 | func (l *localStorage) Prefix() string { 35 | return l.prefix 36 | } 37 | 38 | func (l *localStorage) Read(key string) (*os.File, error) { 39 | return os.OpenFile(filepath.Join(l.prefix, key), os.O_RDONLY, 0600) 40 | } 41 | 42 | func (l *localStorage) Stat(key string) (os.FileInfo, error) { 43 | return os.Stat(filepath.Join(l.prefix, key)) 44 | } 45 | 46 | func (l *localStorage) Delete(key string) error { 47 | return os.Remove(filepath.Join(l.prefix, key)) 48 | } 49 | 50 | func (l *localStorage) Write(key string, body io.Reader) (int64, error) { 51 | f, err := os.OpenFile(filepath.Join(l.prefix, key), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0600) 52 | if err != nil { 53 | return 0, err 54 | } 55 | defer f.Close() 56 | return io.Copy(f, body) 57 | } 58 | 59 | func (l *localStorage) ListFiles(path string) ([]os.FileInfo, error) { 60 | var infos []os.FileInfo 61 | path = filepath.Join(l.prefix, path) 62 | err := filepath.Walk(path, func(name string, info os.FileInfo, err error) error { 63 | if err != nil { 64 | return err 65 | } else if info.IsDir() { 66 | if path == name { 67 | return nil 68 | } 69 | return filepath.SkipDir 70 | } 71 | infos = append(infos, info) 72 | return nil 73 | }) 74 | if err != nil { 75 | return nil, err 76 | } 77 | return infos, nil 78 | } 79 | 80 | func (l *localStorage) CheckAccess(path string) error { 81 | body := []byte(time.Now().UTC().String()) 82 | key := filepath.Join(path, "_objstore_touch") 83 | _, err := l.Write(key, bytes.NewReader(body)) 84 | return err 85 | } 86 | 87 | type DiskStats struct { 88 | BytesAll uint64 `json:"bytes_all"` 89 | BytesUsed uint64 `json:"bytes_used"` 90 | BytesFree uint64 `json:"bytes_free"` 91 | } 92 | 93 | func (l *localStorage) DiskStats() (*DiskStats, error) { 94 | var fs syscall.Statfs_t 95 | if err := syscall.Statfs(l.prefix, &fs); err != nil { 96 | return nil, err 97 | } 98 | ds := &DiskStats{ 99 | BytesAll: fs.Blocks * uint64(fs.Bsize), 100 | BytesFree: fs.Bfree * uint64(fs.Bsize), 101 | } 102 | ds.BytesUsed = ds.BytesAll - ds.BytesFree 103 | return ds, nil 104 | } 105 | -------------------------------------------------------------------------------- /cluster/client.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "net" 9 | "net/http" 10 | "strings" 11 | 12 | "github.com/astranet/astranet" 13 | "github.com/astranet/astranet/addr" 14 | ) 15 | 16 | type PrivateClient struct { 17 | router astranet.AstraNet 18 | cli *http.Client 19 | } 20 | 21 | // NewPrivateClient initializes a new client for the virtual network. 22 | // Obtain router handle from an initialized private API server. 23 | func NewPrivateClient(router astranet.AstraNet) *PrivateClient { 24 | return &PrivateClient{ 25 | router: router, 26 | cli: &http.Client{ 27 | Transport: newHTTPTransport(router), 28 | }, 29 | } 30 | } 31 | 32 | func newHTTPTransport(router astranet.AstraNet) *http.Transport { 33 | return &http.Transport{ 34 | DisableKeepAlives: true, 35 | Dial: func(network, addr string) (net.Conn, error) { 36 | host, _, err := net.SplitHostPort(addr) 37 | if err != nil { 38 | return nil, err 39 | } 40 | return router.Dial(network, host) 41 | }, 42 | } 43 | } 44 | 45 | func nodeURI(id string) string { 46 | return "http://objstore-" + id 47 | } 48 | 49 | type NodeIter func(id, addr, vaddr string) error 50 | 51 | func (p *PrivateClient) ForEachNode(iterFunc NodeIter) error { 52 | return forEachNode(p.router, iterFunc) 53 | } 54 | 55 | func forEachNode(router astranet.AstraNet, iterFunc NodeIter) error { 56 | services := router.Services() 57 | seen := make(map[string]bool) 58 | for _, info := range services { 59 | if !strings.HasPrefix(info.Service, "objstore-") { 60 | continue 61 | } 62 | if info.Upstream == nil { 63 | continue 64 | } 65 | nodeID := strings.TrimPrefix(strings.Split(info.Service, ".")[0], "objstore-") 66 | host, _, _ := net.SplitHostPort(info.Upstream.RAddr().String()) 67 | if seen[nodeID+host] { 68 | continue 69 | } else { 70 | seen[nodeID+host] = true 71 | } 72 | vaddr := getAddr(info.Host, info.Port) 73 | if err := iterFunc(nodeID, host, vaddr); err == RangeStop { 74 | return nil 75 | } else if err != nil { 76 | return err 77 | } 78 | } 79 | return nil 80 | } 81 | 82 | func getAddr(host uint64, port uint32) string { 83 | return fmt.Sprintf("%s:%d", addr.Uint2Host(host), port) 84 | } 85 | 86 | var ( 87 | RangeStop = errors.New("stop") 88 | ForEachStop = RangeStop 89 | ) 90 | 91 | func (p *PrivateClient) GET(ctx context.Context, nodeID, path string, body io.Reader) (*http.Response, error) { 92 | req, err := http.NewRequest("GET", nodeURI(nodeID)+path, nil) 93 | req = req.WithContext(ctx) 94 | if err != nil { 95 | return nil, err 96 | } 97 | return p.cli.Do(req) 98 | } 99 | 100 | func (p *PrivateClient) POST(ctx context.Context, nodeID, path string, body io.Reader) (*http.Response, error) { 101 | req, err := http.NewRequest("POST", nodeURI(nodeID)+path, body) 102 | req = req.WithContext(ctx) 103 | if err != nil { 104 | return nil, err 105 | } 106 | return p.cli.Do(req) 107 | } 108 | -------------------------------------------------------------------------------- /journal/meta.go: -------------------------------------------------------------------------------- 1 | package journal 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | type FileMeta struct { 11 | ID string `msgp:"0" json:"id"` 12 | Name string `msgp:"1" json:"name"` 13 | Size int64 `msgp:"2" json:"size"` 14 | Timestamp int64 `msgp:"3" json:"timestamp"` 15 | UserMeta map[string]string `msgp:"4" json:"user_meta"` 16 | IsSymlink bool `msgp:"5" json:"is_symlink"` 17 | Consistency ConsistencyLevel `msgp:"6" json:"consistency"` 18 | IsDeleted bool `msgp:"7" json:"is_deleted"` 19 | IsFetched bool `msgp:"8" json:"is_fetched"` 20 | } 21 | 22 | func (f *FileMeta) Map() map[string]string { 23 | m := map[string]string{ 24 | "id": f.ID, 25 | "name": f.Name, 26 | "size": strconv.FormatInt(f.Size, 10), 27 | "timestamp": strconv.FormatInt(f.Timestamp, 10), 28 | "consistency": strconv.Itoa(int(f.Consistency)), 29 | } 30 | for k, v := range f.UserMeta { 31 | m["usermeta-"+k] = v 32 | } 33 | return m 34 | } 35 | 36 | func (f *FileMeta) Unmap(m map[string]string) { 37 | userMeta := make(map[string]string, len(m)) 38 | for k, v := range m { 39 | switch k = strings.ToLower(k); k { 40 | case "id": 41 | f.ID = v 42 | case "name": 43 | f.Name = v 44 | case "size": 45 | f.Size, _ = strconv.ParseInt(v, 10, 64) 46 | case "timestamp": 47 | f.Timestamp, _ = strconv.ParseInt(v, 10, 64) 48 | case "consistency": 49 | level, _ := strconv.Atoi(v) 50 | if level == 0 { 51 | // at least 52 | f.Consistency = ConsistencyS3 53 | } else { 54 | f.Consistency = (ConsistencyLevel)(level) 55 | } 56 | default: 57 | if !strings.HasPrefix(k, "usermeta-") { 58 | continue 59 | } 60 | k = strings.TrimPrefix(k, "usermeta-") 61 | userMeta[k] = v 62 | } 63 | } 64 | f.UserMeta = userMeta 65 | } 66 | 67 | type FileMetaList []*FileMeta 68 | 69 | func (m FileMeta) String() string { 70 | if m.IsDeleted { 71 | return fmt.Sprintf("%s: %s (deleted)", m.ID, m.Name) 72 | } 73 | return fmt.Sprintf("%s: %s (%db->%v)", m.ID, m.Name, m.Size, m.IsSymlink) 74 | } 75 | 76 | type ConsistencyLevel int 77 | 78 | const ( 79 | // ConsistencyLocal flags file for local persistence only, implying 80 | // that the file body will be stored on a single node. Default. 81 | ConsistencyLocal ConsistencyLevel = 0 82 | // ConsistencyS3 flags file for local+S3 persistence, implying that the file 83 | // body will be stored on a single node and Amazon S3. 84 | ConsistencyS3 ConsistencyLevel = 1 85 | // ConsistencyFull flags file to be replicated across all existing nodes in cluster and S3. 86 | ConsistencyFull ConsistencyLevel = 2 87 | ) 88 | 89 | type ID string 90 | 91 | type JournalMeta struct { 92 | ID ID `msgp:"0" json:"journal_id"` 93 | CreatedAt int64 `msgp:"1" json:"created_at"` 94 | JoinedAt int64 `msgp:"2" json:"joined_at"` 95 | FirstKey string `msgp:"3" json:"first_key"` 96 | LastKey string `msgp:"4" json:"last_key"` 97 | CountTotal int `msgp:"5" json:"count_total"` 98 | } 99 | 100 | func (j JournalMeta) String() string { 101 | if len(j.FirstKey) == 0 { 102 | j.FirstKey = "?" 103 | } 104 | if len(j.LastKey) == 0 { 105 | j.LastKey = "?" 106 | } 107 | ts := time.Unix(0, j.CreatedAt).UTC().Format(time.StampMilli) 108 | return fmt.Sprintf("%s (%s): %s-%s (count: %d) joined: %v", 109 | j.ID, ts, j.FirstKey, j.LastKey, j.CountTotal, j.JoinedAt > 0) 110 | } 111 | -------------------------------------------------------------------------------- /cluster/manager.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "errors" 8 | "io" 9 | "io/ioutil" 10 | 11 | "sphere.software/objstore/journal" 12 | ) 13 | 14 | type ClusterManager interface { 15 | ListNodes() ([]*NodeInfo, error) 16 | Announce(ctx context.Context, nodeID string, event *EventAnnounce) error 17 | GetObject(ctx context.Context, nodeID string, id string) (io.ReadCloser, error) 18 | Sync(ctx context.Context, nodeID string, 19 | list journal.FileMetaList) (added, deleted journal.FileMetaList, err error) 20 | } 21 | 22 | type NodeInfo struct { 23 | ID string `json:"id"` 24 | Addr string `json:"addr"` 25 | VAddr string `json:"vaddr"` 26 | } 27 | 28 | func NewClusterManager(cli *PrivateClient, nodeID string) ClusterManager { 29 | return &clusterManager{ 30 | cli: cli, 31 | nodeID: nodeID, 32 | } 33 | } 34 | 35 | type clusterManager struct { 36 | nodeID string 37 | cli *PrivateClient 38 | } 39 | 40 | func (c *clusterManager) ListNodes() ([]*NodeInfo, error) { 41 | var nodes []*NodeInfo 42 | if err := c.cli.ForEachNode(func(id, addr, vaddr string) error { 43 | nodes = append(nodes, &NodeInfo{ 44 | ID: id, 45 | Addr: addr, 46 | VAddr: vaddr, 47 | }) 48 | return nil 49 | }); err != nil { 50 | return nil, err 51 | } 52 | return nodes, nil 53 | } 54 | 55 | func (c *clusterManager) Announce(ctx context.Context, nodeID string, event *EventAnnounce) error { 56 | body, _ := json.Marshal(event) 57 | resp, err := c.cli.POST(ctx, nodeID, "/private/v1/announce", bytes.NewReader(body)) 58 | if err != nil { 59 | return err 60 | } 61 | respBody, _ := ioutil.ReadAll(resp.Body) 62 | resp.Body.Close() 63 | if resp.StatusCode != 200 { 64 | if len(respBody) > 0 { 65 | err := errors.New(string(respBody)) 66 | return err 67 | } 68 | return errors.New(resp.Status) 69 | } 70 | return nil 71 | } 72 | 73 | var ErrNotFound = errors.New("not found") 74 | 75 | func (c *clusterManager) GetObject(ctx context.Context, nodeID string, id string) (io.ReadCloser, error) { 76 | resp, err := c.cli.GET(ctx, nodeID, "/private/v1/get/"+id, nil) 77 | if err != nil { 78 | return nil, err 79 | } 80 | if resp.StatusCode == 404 { 81 | resp.Body.Close() 82 | return nil, ErrNotFound 83 | } else if resp.StatusCode != 200 { 84 | respBody, _ := ioutil.ReadAll(resp.Body) 85 | resp.Body.Close() 86 | if len(respBody) > 0 { 87 | err := errors.New(string(respBody)) 88 | return nil, err 89 | } 90 | err := errors.New(resp.Status) 91 | return nil, err 92 | } 93 | return resp.Body, nil 94 | } 95 | 96 | type SyncResponse struct { 97 | Added journal.FileMetaList `json:"list_added"` 98 | Deleted journal.FileMetaList `json:"list_deleted"` 99 | } 100 | 101 | func (c *clusterManager) Sync(ctx context.Context, nodeID string, 102 | list journal.FileMetaList) (added, deleted journal.FileMetaList, err error) { 103 | 104 | body, _ := json.Marshal(list) 105 | resp, err := c.cli.POST(ctx, nodeID, "/private/v1/sync", bytes.NewReader(body)) 106 | if err != nil { 107 | return nil, nil, err 108 | } 109 | respBody, _ := ioutil.ReadAll(resp.Body) 110 | resp.Body.Close() 111 | if resp.StatusCode != 200 { 112 | if len(respBody) > 0 { 113 | err := errors.New(string(respBody)) 114 | return nil, nil, err 115 | } 116 | return nil, nil, errors.New(resp.Status) 117 | } 118 | var syncResp SyncResponse 119 | if err := json.Unmarshal(respBody, &syncResp); err != nil { 120 | return nil, nil, err 121 | } 122 | return syncResp.Added, syncResp.Deleted, nil 123 | } 124 | -------------------------------------------------------------------------------- /glide.lock: -------------------------------------------------------------------------------- 1 | hash: dac827ec9b936d98e9d57a9cbb2a4b436a4ed0a8a20f331a805cf0d9a66c041b 2 | updated: 2017-08-29T01:39:08.521273101+03:00 3 | imports: 4 | - name: github.com/astranet/astranet 5 | version: ededf87e9f24d6482bf900c46bcf57e52cb73cee 6 | subpackages: 7 | - addr 8 | - glog 9 | - listener 10 | - protocol 11 | - route 12 | - service 13 | - skykiss 14 | - socket 15 | - transport 16 | - name: github.com/astranet/btree-2d 17 | version: 4b00686449f2c5e8cea67a72e64db87b5244b1af 18 | subpackages: 19 | - lockie 20 | - util 21 | - uuid 22 | - name: github.com/aws/aws-sdk-go 23 | version: e63027ac6e05f6d4ae9f97ce0294d7468ca652da 24 | subpackages: 25 | - aws 26 | - aws/awserr 27 | - aws/awsutil 28 | - aws/client 29 | - aws/client/metadata 30 | - aws/corehandlers 31 | - aws/credentials 32 | - aws/credentials/ec2rolecreds 33 | - aws/credentials/endpointcreds 34 | - aws/credentials/stscreds 35 | - aws/defaults 36 | - aws/ec2metadata 37 | - aws/endpoints 38 | - aws/request 39 | - aws/session 40 | - aws/signer/v4 41 | - internal/shareddefaults 42 | - private/protocol 43 | - private/protocol/query 44 | - private/protocol/query/queryutil 45 | - private/protocol/rest 46 | - private/protocol/restxml 47 | - private/protocol/xml/xmlutil 48 | - service/s3 49 | - service/sts 50 | - name: github.com/boltdb/bolt 51 | version: 2f1ce7a837dcb8da3ec595b1dac9d0632f0f99e8 52 | - name: github.com/cenk/backoff 53 | version: cdf48bbc1eb78d1349cbda326a4a037f7ba565c6 54 | - name: github.com/cznic/b 55 | version: 6955404bf550e1eae1bf83121739078b027f4547 56 | - name: github.com/gin-contrib/sse 57 | version: 22d885f9ecc78bf4ee5d72b937e4bbcdc58e8cae 58 | - name: github.com/gin-gonic/gin 59 | version: d459835d2b077e44f7c9b453505ee29881d5d12d 60 | subpackages: 61 | - binding 62 | - render 63 | - name: github.com/go-ini/ini 64 | version: c787282c39ac1fc618827141a1f762240def08a3 65 | - name: github.com/golang/protobuf 66 | version: 2402d76f3d41f928c7902a765dfc872356dd3aad 67 | subpackages: 68 | - proto 69 | - name: github.com/jawher/mow.cli 70 | version: a459d5906bb7a9c5eda7c4d02eec7c541120226e 71 | - name: github.com/jmespath/go-jmespath 72 | version: bd40a432e4c76585ef6b72d3fd96fb9b6dc7b68d 73 | - name: github.com/mattn/go-isatty 74 | version: dda3de49cbfcec471bd7a70e6cc01fcc3ff90109 75 | - name: github.com/oklog/ulid 76 | version: 66bb6560562feca7045b23db1ae85b01260f87c5 77 | - name: github.com/philhofer/fwd 78 | version: 1612a298117663d7bc9a760ae20d383413859798 79 | - name: github.com/serialx/hashring 80 | version: 75d57fa264ad17fd929304dfdb02c8e278c5c01c 81 | - name: github.com/tinylib/msgp 82 | version: b2b6a672cf1e5b90748f79b8b81fc8c5cf0571a1 83 | subpackages: 84 | - msgp 85 | - name: github.com/ugorji/go 86 | version: c88ee250d0221a57af388746f5cf03768c21d6e2 87 | subpackages: 88 | - codec 89 | - name: github.com/xlab/closer 90 | version: 89cd22812c4fd5188746092cd10992caeffc75ab 91 | - name: golang.org/x/sys 92 | version: 075e574b89e4c2d22f2286a7e2b919519c6f3547 93 | subpackages: 94 | - unix 95 | - name: gopkg.in/go-playground/validator.v8 96 | version: c193cecd124b5cc722d7ee5538e945bdb3348435 97 | - name: gopkg.in/yaml.v2 98 | version: a3f3340b5840cee44f372bddb5880fcbc419b46a 99 | testImports: 100 | - name: github.com/davecgh/go-spew 101 | version: 6d212800a42e8ab5c146b8ace3490ee17e5225f9 102 | subpackages: 103 | - spew 104 | - name: github.com/pmezard/go-difflib 105 | version: d8ed2627bdf02c080bf22230dbb337003b7aba2d 106 | subpackages: 107 | - difflib 108 | - name: github.com/stretchr/testify 109 | version: 69483b4bd14f5845b5a1e55bca19e954e827f1d0 110 | subpackages: 111 | - assert 112 | -------------------------------------------------------------------------------- /api/public.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/gin-gonic/gin" 7 | 8 | "sphere.software/objstore" 9 | ) 10 | 11 | type PublicServer struct { 12 | nodeID string 13 | 14 | mux *gin.Engine 15 | } 16 | 17 | func NewPublicServer(nodeID string) *PublicServer { 18 | return &PublicServer{ 19 | nodeID: nodeID, 20 | } 21 | } 22 | 23 | func (p *PublicServer) ListenAndServe(addr string) error { 24 | return p.mux.Run(addr) 25 | } 26 | 27 | func (p *PublicServer) RouteAPI(store objstore.Store) { 28 | r := gin.Default() 29 | r.GET("/api/v1/get/:id", p.GetHandler(store)) 30 | r.GET("/api/v1/meta/:id", p.MetaHandler(store)) 31 | r.POST("/api/v1/put", p.PutHandler(store)) 32 | r.POST("/api/v1/delete/:id", p.DeleteHandler(store)) 33 | r.GET("/api/v1/id", p.IDHandler()) 34 | r.GET("/api/v1/version", p.VersionHandler()) 35 | r.GET("/api/v1/ping", p.PingHandler()) 36 | r.GET("/api/v1/stats", p.StatsHandler(store)) 37 | p.mux = r 38 | } 39 | 40 | func (p *PublicServer) PingHandler() gin.HandlerFunc { 41 | return func(c *gin.Context) { 42 | c.String(200, p.nodeID) 43 | } 44 | } 45 | 46 | func (p *PublicServer) IDHandler() gin.HandlerFunc { 47 | return func(c *gin.Context) { 48 | c.String(200, objstore.GenerateID()) 49 | } 50 | } 51 | 52 | func (p *PublicServer) VersionHandler() gin.HandlerFunc { 53 | return func(c *gin.Context) { 54 | // TODO: version generation from commit ID 55 | c.String(200, "dev") 56 | } 57 | } 58 | 59 | const ( 60 | KB = 1024 61 | MB = 1024 * KB 62 | GB = 1024 * MB 63 | ) 64 | 65 | type DiskStats struct { 66 | *objstore.DiskStats 67 | 68 | KBytesAll float64 `json:"kb_all"` 69 | KBytesUsed float64 `json:"kb_used"` 70 | KBytesFree float64 `json:"kb_free"` 71 | 72 | MBytesAll float64 `json:"mb_all"` 73 | MBytesUsed float64 `json:"mb_used"` 74 | MBytesFree float64 `json:"mb_free"` 75 | 76 | GBytesAll float64 `json:"gb_all"` 77 | GBytesUsed float64 `json:"gb_used"` 78 | GBytesFree float64 `json:"gb_free"` 79 | } 80 | 81 | type Stats struct { 82 | DiskStats *DiskStats `json:"disk_stats"` 83 | // TODO: other stats 84 | } 85 | 86 | func (p *PublicServer) StatsHandler(store objstore.Store) gin.HandlerFunc { 87 | return func(c *gin.Context) { 88 | var stats Stats 89 | if ds, err := store.DiskStats(); err == nil { 90 | stats.DiskStats = &DiskStats{ 91 | DiskStats: ds, 92 | } 93 | stats.DiskStats.KBytesAll = float64(ds.BytesAll) / KB 94 | stats.DiskStats.KBytesUsed = float64(ds.BytesUsed) / KB 95 | stats.DiskStats.KBytesFree = float64(ds.BytesFree) / KB 96 | stats.DiskStats.MBytesAll = float64(ds.BytesAll) / MB 97 | stats.DiskStats.MBytesUsed = float64(ds.BytesUsed) / MB 98 | stats.DiskStats.MBytesFree = float64(ds.BytesFree) / MB 99 | stats.DiskStats.GBytesAll = float64(ds.BytesAll) / GB 100 | stats.DiskStats.GBytesUsed = float64(ds.BytesUsed) / GB 101 | stats.DiskStats.GBytesFree = float64(ds.BytesFree) / GB 102 | } 103 | c.JSON(200, stats) 104 | } 105 | } 106 | 107 | func (p *PublicServer) GetHandler(store objstore.Store) gin.HandlerFunc { 108 | return func(c *gin.Context) { 109 | var fetch bool 110 | fetchOption := c.Request.Header.Get("X-Meta-Fetch") 111 | if strings.ToLower(fetchOption) == "true" || fetchOption == "1" { 112 | fetch = true 113 | } 114 | r, meta, err := store.FindObject(c, c.Param("id"), fetch) 115 | if err == objstore.ErrNotFound { 116 | if meta != nil { 117 | serveMeta(c, meta) 118 | } 119 | c.Status(404) 120 | return 121 | } else if err != nil { 122 | c.String(500, "error: %v", err) 123 | return 124 | } 125 | serveObject(c, r, meta) 126 | } 127 | } 128 | 129 | func (p *PublicServer) MetaHandler(store objstore.Store) gin.HandlerFunc { 130 | return func(c *gin.Context) { 131 | meta, err := store.HeadObject(c.Param("id")) 132 | if err == objstore.ErrNotFound { 133 | if meta != nil { 134 | serveMeta(c, meta) 135 | } 136 | c.Status(404) 137 | return 138 | } else if err != nil { 139 | c.String(500, "error: %v", err) 140 | return 141 | } 142 | c.JSON(200, meta) 143 | } 144 | } 145 | 146 | func (p *PublicServer) PutHandler(store objstore.Store) gin.HandlerFunc { 147 | return func(c *gin.Context) { 148 | putObject(c, store) 149 | } 150 | } 151 | 152 | func (p *PublicServer) DeleteHandler(store objstore.Store) gin.HandlerFunc { 153 | return func(c *gin.Context) { 154 | deleteObject(c, store) 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /storage/remote.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "mime" 8 | "path" 9 | "path/filepath" 10 | "strings" 11 | "time" 12 | 13 | "github.com/aws/aws-sdk-go/aws" 14 | "github.com/aws/aws-sdk-go/aws/session" 15 | "github.com/aws/aws-sdk-go/service/s3" 16 | ) 17 | 18 | // RemoteStorage provides object access backend, 19 | // it's usually an AWS S3 client pointed to a specific bucket. 20 | type RemoteStorage interface { 21 | PutObject(key string, r io.ReadSeeker, meta map[string]string) (*Spec, error) 22 | GetObject(key string, version ...string) (*Spec, error) 23 | HeadObject(key string, version ...string) (*Spec, error) 24 | ListObjects(prefix string, startAfter ...string) ([]*Spec, error) 25 | CheckAccess(prefix string) error 26 | Bucket() string 27 | } 28 | 29 | var ErrNotFound = errors.New("NoSuchKey: The specified key does not exist.") 30 | 31 | type s3Storage struct { 32 | bucket string 33 | cli *s3.S3 34 | } 35 | 36 | func NewS3Storage(region, bucket string) RemoteStorage { 37 | cli := s3.New(session.New(&aws.Config{ 38 | Region: aws.String(region), 39 | })) 40 | return &s3Storage{ 41 | bucket: bucket, 42 | cli: cli, 43 | } 44 | } 45 | 46 | type Spec struct { 47 | Path string 48 | Key string 49 | Body io.ReadCloser 50 | ETag string 51 | Version string 52 | UpdatedAt time.Time 53 | Meta map[string]string 54 | Size int64 55 | } 56 | 57 | func (s *s3Storage) Bucket() string { 58 | return s.bucket 59 | } 60 | 61 | func (s *s3Storage) GetObject(key string, version ...string) (*Spec, error) { 62 | obj, err := s.cli.GetObject(&s3.GetObjectInput{ 63 | Key: aws.String(key), 64 | Bucket: aws.String(s.bucket), 65 | VersionId: awsStringMaybe(version), 66 | }) 67 | if err != nil { 68 | if strings.HasPrefix(err.Error(), "NoSuchKey") { 69 | return nil, ErrNotFound 70 | } 71 | return nil, err 72 | } 73 | spec := &Spec{ 74 | Path: fullPath(s.bucket, key), 75 | Key: key, 76 | Body: obj.Body, 77 | ETag: aws.StringValue(obj.ETag), 78 | Version: aws.StringValue(obj.VersionId), 79 | UpdatedAt: aws.TimeValue(obj.LastModified), 80 | Size: aws.Int64Value(obj.ContentLength), 81 | Meta: aws.StringValueMap(obj.Metadata), 82 | } 83 | return spec, nil 84 | } 85 | 86 | func (s *s3Storage) HeadObject(key string, version ...string) (*Spec, error) { 87 | obj, err := s.cli.HeadObject(&s3.HeadObjectInput{ 88 | Key: aws.String(key), 89 | Bucket: aws.String(s.bucket), 90 | VersionId: awsStringMaybe(version), 91 | }) 92 | if err != nil { 93 | return nil, err 94 | } 95 | spec := &Spec{ 96 | Path: fullPath(s.bucket, key), 97 | Key: key, 98 | ETag: aws.StringValue(obj.ETag), 99 | Version: aws.StringValue(obj.VersionId), 100 | UpdatedAt: aws.TimeValue(obj.LastModified), 101 | Size: aws.Int64Value(obj.ContentLength), 102 | } 103 | return spec, nil 104 | } 105 | 106 | func (s *s3Storage) ListObjects(prefix string, startAfter ...string) ([]*Spec, error) { 107 | var token *string 108 | var specs []*Spec 109 | for { 110 | list, err := s.cli.ListObjectsV2(&s3.ListObjectsV2Input{ 111 | Bucket: aws.String(s.bucket), 112 | Prefix: aws.String(prefix), 113 | StartAfter: awsStringMaybe(startAfter), 114 | // pagination controls 115 | MaxKeys: aws.Int64(100), 116 | ContinuationToken: token, 117 | }) 118 | if err != nil { 119 | return nil, err 120 | } 121 | for _, obj := range list.Contents { 122 | key := aws.StringValue(obj.Key) 123 | specs = append(specs, &Spec{ 124 | Path: fullPath(s.bucket, key), 125 | Key: key, 126 | ETag: aws.StringValue(obj.ETag), 127 | UpdatedAt: aws.TimeValue(obj.LastModified), 128 | Size: aws.Int64Value(obj.Size), 129 | }) 130 | } 131 | token = list.ContinuationToken 132 | if *list.IsTruncated == false { 133 | return specs, nil 134 | } else if token == nil { 135 | return specs, nil 136 | } 137 | } 138 | } 139 | 140 | func (s *s3Storage) CheckAccess(prefix string) error { 141 | body := []byte(time.Now().UTC().String()) 142 | _, err := s.cli.PutObject(&s3.PutObjectInput{ 143 | Body: newReadSeeker(body), 144 | Bucket: aws.String(s.bucket), 145 | ContentType: aws.String("text/plain"), 146 | Key: aws.String(path.Join(prefix, "_objstore_touch")), 147 | }) 148 | return err 149 | } 150 | 151 | func (s *s3Storage) PutObject(key string, r io.ReadSeeker, meta map[string]string) (*Spec, error) { 152 | var ctype string 153 | if len(meta["name"]) > 0 { 154 | ctype = mime.TypeByExtension(filepath.Ext(meta["name"])) 155 | } 156 | obj, err := s.cli.PutObject(&s3.PutObjectInput{ 157 | Body: r, 158 | Bucket: aws.String(s.bucket), 159 | Key: aws.String(key), 160 | ContentType: aws.String(ctype), 161 | Metadata: aws.StringMap(meta), 162 | }) 163 | if err != nil { 164 | return nil, err 165 | } 166 | spec := &Spec{ 167 | Path: fullPath(s.bucket, key), 168 | Key: key, 169 | ETag: aws.StringValue(obj.ETag), 170 | Version: aws.StringValue(obj.VersionId), 171 | Meta: meta, 172 | } 173 | return spec, err 174 | } 175 | 176 | func fullPath(bucket, key string) string { 177 | return fmt.Sprintf("s3://%s/%s", bucket, key) 178 | } 179 | 180 | func awsStringMaybe(v []string) *string { 181 | if len(v) > 0 { 182 | return aws.String(v[0]) 183 | } 184 | return nil 185 | } 186 | -------------------------------------------------------------------------------- /cmd/objstore/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "path/filepath" 7 | "runtime" 8 | "sync" 9 | "time" 10 | 11 | "github.com/boltdb/bolt" 12 | "github.com/gin-gonic/gin" 13 | "github.com/jawher/mow.cli" 14 | "github.com/xlab/closer" 15 | 16 | "sphere.software/objstore" 17 | "sphere.software/objstore/api" 18 | "sphere.software/objstore/cluster" 19 | "sphere.software/objstore/journal" 20 | "sphere.software/objstore/storage" 21 | ) 22 | 23 | var app = cli.App("objstore", "A Multi-Master Distributed Caching Layer for Amazon S3.\nVersion 0.1\thttp://github.com/SphereSoftware/objstore") 24 | 25 | var ( 26 | debugEnabled bool 27 | debugLevel = app.Int(cli.IntOpt{ 28 | Name: "d debug", 29 | Desc: "Debug level to use, currently 0/1 suppported.", 30 | EnvVar: "APP_DEBUG_LEVEL", 31 | Value: 0, 32 | HideValue: true, 33 | }) 34 | clusterNodes = app.Strings(cli.StringsOpt{ 35 | Name: "N nodes", 36 | Desc: "A list of cluster nodes to join for discovery and journal updates", 37 | EnvVar: "APP_CLUSTER_NODES", 38 | Value: []string{}, 39 | HideValue: true, 40 | }) 41 | clusterName = app.String(cli.StringOpt{ 42 | Name: "T tag", 43 | Desc: "Cluster tag name", 44 | EnvVar: "APP_CLUSTER_TAGNAME", 45 | Value: "default", 46 | }) 47 | privateAddr = app.String(cli.StringOpt{ 48 | Name: "private-addr", 49 | Desc: "Listen address for cluster discovery and private API", 50 | EnvVar: "NET_PRIVATE_ADDR", 51 | Value: "0.0.0.0:11999", 52 | }) 53 | debugAddr = app.String(cli.StringOpt{ 54 | Name: "debug-addr", 55 | Desc: "Listen address for private API debugging using external tools", 56 | EnvVar: "NET_DEBUG_ADDR", 57 | Value: "", 58 | }) 59 | publicAddr = app.String(cli.StringOpt{ 60 | Name: "public-addr", 61 | Desc: "Listen address for external access and public HTTP API", 62 | EnvVar: "NET_PUBLIC_ADDR", 63 | Value: "0.0.0.0:10999", 64 | }) 65 | statePrefix = app.String(cli.StringOpt{ 66 | Name: "state-dir", 67 | Desc: "Directory where to keep local state and journals.", 68 | EnvVar: "APP_STATE_DIR", 69 | Value: "state/", 70 | }) 71 | localPrefix = app.String(cli.StringOpt{ 72 | Name: "files-dir", 73 | Desc: "Directory where to keep local files.", 74 | EnvVar: "APP_FILES_DIR", 75 | Value: "files/", 76 | }) 77 | s3Region = app.String(cli.StringOpt{ 78 | Name: "R region", 79 | Desc: "Amazon S3 region name", 80 | EnvVar: "S3_REGION_NAME", 81 | Value: "us-east-1", 82 | }) 83 | s3Bucket = app.String(cli.StringOpt{ 84 | Name: "B bucket", 85 | Desc: "Amazon S3 bucket name", 86 | EnvVar: "S3_BUCKET_NAME", 87 | Value: "00-objstore-test", 88 | }) 89 | ) 90 | 91 | func init() { 92 | log.SetFlags(log.LstdFlags | log.Lshortfile) 93 | } 94 | 95 | func main() { 96 | defer closer.Close() 97 | 98 | closer.Bind(func() { 99 | runtime.GC() 100 | log.Println("bye!") 101 | }) 102 | 103 | app.Action = appMain 104 | app.Before = func() { 105 | if *debugLevel > 0 { 106 | debugEnabled = true 107 | } 108 | if debugEnabled { 109 | gin.SetMode(gin.DebugMode) 110 | } else { 111 | gin.SetMode(gin.ReleaseMode) 112 | } 113 | } 114 | if err := app.Run(os.Args); err != nil { 115 | closer.Fatalln(err) 116 | } 117 | } 118 | 119 | func appMain() { 120 | db, err := openStateDB(*statePrefix) 121 | if err != nil { 122 | closer.Fatalln("[ERR] failed to open state DB:", err) 123 | } 124 | if err := os.MkdirAll(*localPrefix, 0700); err != nil { 125 | closer.Fatalln("[ERR] unable to create local files dir:", err) 126 | } 127 | 128 | nodeID := journal.GetULID() 129 | if debugEnabled { 130 | log.Println("[INFO] node ID:", nodeID) 131 | } 132 | 133 | privateServer := api.NewPrivateServer(nodeID, *clusterName) 134 | privateServer.SetDebug(debugEnabled) 135 | privateClient := cluster.NewPrivateClient(privateServer.Router()) 136 | journalManager := journal.NewJournalManager(db) 137 | closer.Bind(func() { 138 | if err := journalManager.Close(); err != nil { 139 | log.Println("[WARN] journal close:", err) 140 | } 141 | }) 142 | store, err := objstore.NewStore(nodeID, 143 | storage.NewLocalStorage(*localPrefix), 144 | storage.NewS3Storage(*s3Region, *s3Bucket), 145 | journalManager, 146 | cluster.NewClusterManager(privateClient, nodeID), 147 | ) 148 | if err != nil { 149 | closer.Fatalln("[ERR]", err) 150 | } 151 | store.SetDebug(debugEnabled) 152 | privateServer.RouteAPI(store) 153 | if err := privateServer.ListenAndServe(*privateAddr); err != nil { 154 | closer.Fatalln(err) 155 | } 156 | 157 | closer.Bind(func() { 158 | if err := store.Close(); err != nil { 159 | log.Println("[WARN]", err) 160 | } 161 | if debugEnabled { 162 | log.Println("[INFO] waiting for queues") 163 | } 164 | wg := new(sync.WaitGroup) 165 | wg.Add(2) 166 | go func() { 167 | defer wg.Done() 168 | store.WaitInbound(2 * time.Minute) 169 | }() 170 | go func() { 171 | defer wg.Done() 172 | store.WaitOutbound(2 * time.Minute) 173 | }() 174 | wg.Wait() 175 | }) 176 | 177 | if len(*clusterNodes) == 0 { 178 | log.Println("[WARN] no additional cluster nodes specified, current node starts solo") 179 | } else { 180 | if debugEnabled { 181 | log.Println("[INFO] joining to cluster", *clusterNodes) 182 | } 183 | if err := privateServer.JoinCluster(*clusterNodes); err != nil { 184 | log.Println("[WARN]", err) 185 | } 186 | } 187 | // expose private API to HTTP clients, so objstore cluster nodes can be debugged 188 | // using browser and external tools. 189 | if debugEnabled && len(*debugAddr) > 0 { 190 | log.Println("[INFO] exposing private API on", *debugAddr) 191 | go func() { 192 | if err := privateServer.ExposeAPI(*debugAddr); err != nil { 193 | closer.Fatalln("[ERR]", err) 194 | } 195 | }() 196 | } 197 | 198 | publicServer := api.NewPublicServer(nodeID) 199 | publicServer.RouteAPI(store) 200 | go func() { 201 | if err := publicServer.ListenAndServe(*publicAddr); err != nil { 202 | closer.Fatalln(err) 203 | } 204 | }() 205 | 206 | closer.Hold() 207 | } 208 | 209 | func openStateDB(prefix string) (*bolt.DB, error) { 210 | if err := os.MkdirAll(prefix, 0700); err != nil { 211 | return nil, err 212 | } 213 | return bolt.Open(filepath.Join(prefix, "state.db"), 0600, &bolt.Options{ 214 | Timeout: 30 * time.Second, // wait while trying to open state file 215 | InitialMmapSize: 4 * 1024 * 1024 * 1024, // preallocated space to avoid writers block 216 | }) 217 | } 218 | -------------------------------------------------------------------------------- /journal/manager.go: -------------------------------------------------------------------------------- 1 | package journal 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/boltdb/bolt" 10 | ) 11 | 12 | type JournalManager interface { 13 | Create(id ID) error 14 | View(id ID, fn JournalIter) error 15 | Update(id ID, fn JournalIter) error 16 | 17 | ForEach(fn JournalIter) error 18 | ForEachUpdate(fn JournalIter) error 19 | 20 | JoinAll(target ID) (*JournalMeta, error) 21 | ListAll() ([]*JournalMeta, error) 22 | ExportAll() (FileMetaList, error) 23 | 24 | Close() error 25 | } 26 | 27 | type JournalIter func(journal Journal, meta *JournalMeta) error 28 | 29 | func NewJournalManager(db *bolt.DB) JournalManager { 30 | return &kvJournalManager{ 31 | db: db, 32 | } 33 | } 34 | 35 | type kvJournalManager struct { 36 | db *bolt.DB 37 | } 38 | 39 | func (kv *kvJournalManager) Create(id ID) error { 40 | return kv.db.Update(func(tx *bolt.Tx) error { 41 | journalID := []byte(id) 42 | mapping, err := tx.CreateBucketIfNotExists(mappingBucket) 43 | if err != nil { 44 | return err 45 | } 46 | journals, err := tx.CreateBucketIfNotExists(journalsBucket) 47 | if err != nil { 48 | return err 49 | } 50 | if mapping.Get(journalID) != nil { 51 | return errors.New("kvJournal: journal mapping exists") 52 | } 53 | if journals.Get(journalID) != nil { 54 | return errors.New("kvJournal: journal exists") 55 | } 56 | if _, err := journals.CreateBucket(journalID); err != nil { 57 | err = fmt.Errorf("kvJournal: failed to create journal bucket: %v", err) 58 | return err 59 | } 60 | meta := &JournalMeta{ 61 | ID: id, 62 | CreatedAt: time.Now().UnixNano(), 63 | } 64 | data, _ := meta.MarshalMsg(nil) 65 | return mapping.Put(journalID, data) 66 | }) 67 | } 68 | 69 | func (kv *kvJournalManager) View(id ID, fn JournalIter) error { 70 | return kv.db.View(func(tx *bolt.Tx) error { 71 | journalID := []byte(id) 72 | mapping := tx.Bucket(mappingBucket) 73 | journals := tx.Bucket(journalsBucket) 74 | data := mapping.Get(journalID) 75 | var meta *JournalMeta 76 | if data == nil { 77 | return errors.New("kvJournal: journal mapping not exists") 78 | } else { 79 | meta = new(JournalMeta) 80 | meta.UnmarshalMsg(data) 81 | } 82 | b := journals.Bucket(journalID) 83 | if b == nil { 84 | return errors.New("kvJournal: journal not exists") 85 | } 86 | journal := NewJournal(id, tx, b) 87 | return fn(journal, meta) 88 | }) 89 | } 90 | 91 | func (kv *kvJournalManager) Update(id ID, fn JournalIter) error { 92 | return kv.db.Update(func(tx *bolt.Tx) error { 93 | journalID := []byte(id) 94 | mapping := tx.Bucket(mappingBucket) 95 | journals := tx.Bucket(journalsBucket) 96 | data := mapping.Get(journalID) 97 | var meta *JournalMeta 98 | if data == nil { 99 | return errors.New("kvJournal: journal mapping not exists") 100 | } else { 101 | meta = new(JournalMeta) 102 | meta.UnmarshalMsg(data) 103 | } 104 | b := journals.Bucket(journalID) 105 | if b == nil { 106 | return errors.New("kvJournal: journal not exists") 107 | } 108 | journal := NewJournal(id, tx, b) 109 | return fn(journal, meta) 110 | }) 111 | } 112 | 113 | func (kv *kvJournalManager) ListAll() (metaList []*JournalMeta, err error) { 114 | err = kv.db.View(func(tx *bolt.Tx) error { 115 | mapping := tx.Bucket(mappingBucket) 116 | journals := tx.Bucket(journalsBucket) 117 | metaList = make([]*JournalMeta, 0, journals.Stats().KeyN) 118 | cur := journals.Cursor() 119 | 120 | id, _ := cur.First() 121 | for id != nil { 122 | journal := NewJournal(ID(id), tx, journals.Bucket(id)) 123 | meta := journal.Meta() 124 | if data := mapping.Get(id); data != nil { 125 | extraMeta := new(JournalMeta) 126 | extraMeta.UnmarshalMsg(data) 127 | meta.CreatedAt = extraMeta.CreatedAt 128 | meta.JoinedAt = extraMeta.JoinedAt 129 | } 130 | metaList = append(metaList, meta) 131 | id, _ = cur.Next() 132 | } 133 | return nil 134 | }) 135 | return 136 | } 137 | 138 | func (kv *kvJournalManager) JoinAll(target ID) (*JournalMeta, error) { 139 | kv.Create(target) // for safety reasons ensure that journal exists 140 | 141 | var targetMeta *JournalMeta 142 | err := kv.db.Update(func(tx *bolt.Tx) error { 143 | mapping := tx.Bucket(mappingBucket) 144 | journals := tx.Bucket(journalsBucket) 145 | cur := journals.Cursor() 146 | 147 | journalID := []byte(target) 148 | targetJournal := NewJournal(target, tx, journals.Bucket(journalID)) 149 | 150 | id, _ := cur.First() 151 | for id != nil { 152 | if bytes.Equal(id, journalID) { 153 | id, _ = cur.Next() 154 | continue 155 | } 156 | journal := NewJournal(ID(id), tx, journals.Bucket(id)) 157 | if _, err := journal.Range("", 0, func(k string, v *FileMeta) error { 158 | if targetJournal.Exists(k) { 159 | // disallow override upon consolidation from older journals 160 | return nil 161 | } 162 | return targetJournal.Set(k, v) 163 | }); err != nil { 164 | return err 165 | } 166 | 167 | meta := journal.Meta() 168 | meta.JoinedAt = time.Now().UnixNano() 169 | meta.ID = target // relocated journal 170 | if data := mapping.Get(id); data != nil { 171 | extraMeta := new(JournalMeta) 172 | extraMeta.UnmarshalMsg(data) 173 | meta.CreatedAt = extraMeta.CreatedAt 174 | } 175 | data, _ := meta.MarshalMsg(nil) 176 | if err := mapping.Put(id, data); err != nil { 177 | return err 178 | } 179 | if err := journals.DeleteBucket(id); err != nil { 180 | return err 181 | } 182 | id, _ = cur.Next() 183 | } 184 | 185 | targetMeta = targetJournal.Meta() 186 | if data := mapping.Get(journalID); data != nil { 187 | extraMeta := new(JournalMeta) 188 | extraMeta.UnmarshalMsg(data) 189 | targetMeta.CreatedAt = extraMeta.CreatedAt 190 | } 191 | data, _ := targetMeta.MarshalMsg(nil) 192 | return mapping.Put(journalID, data) 193 | }) 194 | return targetMeta, err 195 | } 196 | 197 | func (kv *kvJournalManager) ForEach(fn JournalIter) error { 198 | return kv.db.View(func(tx *bolt.Tx) error { 199 | mapping := tx.Bucket(mappingBucket) 200 | journals := tx.Bucket(journalsBucket) 201 | cur := journals.Cursor() 202 | 203 | id, _ := cur.First() 204 | for id != nil { 205 | var meta *JournalMeta 206 | if data := mapping.Get(id); data != nil { 207 | meta = new(JournalMeta) 208 | meta.UnmarshalMsg(data) 209 | } 210 | journal := NewJournal(ID(id), tx, journals.Bucket(id)) 211 | if err := fn(journal, meta); err == RangeStop { 212 | return nil 213 | } else if err != nil { 214 | return err 215 | } 216 | id, _ = cur.Next() 217 | } 218 | return nil 219 | }) 220 | } 221 | 222 | func (kv *kvJournalManager) ForEachUpdate(fn JournalIter) error { 223 | return kv.db.Update(func(tx *bolt.Tx) error { 224 | mapping := tx.Bucket(mappingBucket) 225 | journals := tx.Bucket(journalsBucket) 226 | cur := journals.Cursor() 227 | 228 | id, _ := cur.First() 229 | for id != nil { 230 | var meta *JournalMeta 231 | if data := mapping.Get(id); data != nil { 232 | meta = new(JournalMeta) 233 | meta.UnmarshalMsg(data) 234 | } 235 | journal := NewJournal(ID(id), tx, journals.Bucket(id)) 236 | if err := fn(journal, meta); err == RangeStop { 237 | return nil 238 | } else if err != nil { 239 | return err 240 | } 241 | id, _ = cur.Next() 242 | } 243 | return nil 244 | }) 245 | } 246 | 247 | func (kv *kvJournalManager) ExportAll() (FileMetaList, error) { 248 | var list FileMetaList 249 | err := kv.db.View(func(tx *bolt.Tx) error { 250 | journals := tx.Bucket(journalsBucket) 251 | cur := journals.Cursor() 252 | id, _ := cur.First() 253 | for id != nil { 254 | journal := NewJournal(ID(id), tx, journals.Bucket(id)) 255 | list = append(list, journal.List()...) 256 | id, _ = cur.Next() 257 | } 258 | return nil 259 | }) 260 | return list, err 261 | } 262 | 263 | func (kv *kvJournalManager) Close() error { 264 | return kv.db.Close() 265 | } 266 | 267 | var ( 268 | RangeStop = errors.New("stop") 269 | ForEachStop = RangeStop 270 | ) 271 | 272 | var ( 273 | mappingBucket = []byte("mapping") 274 | journalsBucket = []byte("journals") 275 | ) 276 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | objstore near cache layer 3 |

4 | 5 | # objstore [![Go Report Card](https://goreportcard.com/badge/sphere.software/objstore)](https://goreportcard.com/report/sphere.software/objstore) ![Status Badge](https://img.shields.io/badge/status-open--beta-orange.svg) 6 | _A Multi-Master Distributed Caching Layer for Amazon S3_ 7 | 8 | This project aims to provide an easy to use, self-organising multi-master caching layer for various cloud stoarge backends, e.g. S3. It combines functionality of a simple object storage with added robustness of cross-node journal synchronisation, object replication and cluster auto-discovery. 9 | 10 | We know that Amazon S3 has proven to be fast and reliable, a PaaS solution that acts like a 11 | backbone for many business applications. But the cost of service may become too high 12 | depending on your usage patterns, for example, if your application runs in your own datacenter, then 13 | the file transfer costs will skyrocket. Also request frequency has its limits. 14 | 15 | **Objstore Cluster** aims to mitigate this problem, it's supposed to be running in your datacenter, implementing a near-cache for all files. Its API allows to upload, head, read and delete files by key, like any other object. All related meta-data may be perserved with files as well. This caching layer will upload the file to S3 and store a copy locally, with optional replication among other nodes. Next time you'd access the file, it will be served from a local machine, or its near nodes, in case of a cache miss, it will get the file from S3 directly. 16 | 17 |

18 | objstore cluster overview 19 |

20 | 21 | The cluster must be robust, altrough it's not required to reach the same levels as traditional DBs or other stores that are required to be highly consistent, a certant amount of fault resilience is important because a dropped cache implies a huge (and unplanned) spike in latency and CoS, which may hurt infrastructure and your wallet. And caches may recover very slowly. 22 | 23 | Objstore leverages a P2P discovery mechanism, so once some nodes are started already, another one might join knowing only one physical IP address. The cluster setups a logical network over persistent TCP connections between nodes and uses an internal HTTP API to share events and data between nodes, eliminating the single point of failure. Everything involves zero configuration, except the HTTP load balancer which may be any of your choice. 24 | 25 |

26 | objstore cluster zoom 27 |

28 | 29 | Node disk sizes are required to be identical, the overall limit of the cluster is limited by size of the smallest disk used for data replication. If you want to expand the size linearly, setup another Object Store cluster and tweak your HTTP load balancer. 30 | 31 | ## Installation 32 | 33 | ``` 34 | go get -u sphere.software/objstore/cmd/objstore 35 | ``` 36 | 37 | For local Docker builds: 38 | 39 | ``` 40 | cd deploy && make local 41 | ``` 42 | 43 | For remote / CI Docker builds under Linux: 44 | 45 | ``` 46 | cd deploy && make build 47 | ``` 48 | 49 | ## Server usage 50 | 51 | ``` 52 | $ objstore -h 53 | 54 | Usage: objstore [OPTIONS] 55 | 56 | A Multi-Master Distributed Caching Layer for Amazon S3. 57 | Version 0.1 http://github.com/SphereSoftware/objstore 58 | 59 | Options: 60 | -d, --debug Debug level to use, currently 0/1 suppported. ($APP_DEBUG_LEVEL) 61 | -N, --nodes A list of cluster nodes to join for discovery and journal updates ($APP_CLUSTER_NODES) 62 | -T, --tag="default" Cluster tag name ($APP_CLUSTER_TAGNAME) 63 | --private-addr="0.0.0.0:11999" Listen address for cluster discovery and private API ($NET_PRIVATE_ADDR) 64 | --debug-addr="" Listen address for private API debugging using external tools ($NET_DEBUG_ADDR) 65 | --public-addr="0.0.0.0:10999" Listen address for external access and public HTTP API ($NET_PUBLIC_ADDR) 66 | --state-dir="state/" Directory where to keep local state and journals. ($APP_STATE_DIR) 67 | --files-dir="files/" Directory where to keep local files. ($APP_FILES_DIR) 68 | -R, --region="us-east-1" Amazon S3 region name ($S3_REGION_NAME) 69 | -B, --bucket="00-objstore-test" Amazon S3 bucket name ($S3_BUCKET_NAME) 70 | ``` 71 | 72 | Example use, single node: 73 | 74 | ```bash 75 | $ objstore -d 1 # with debug 76 | 77 | [INFO] node ID: 01BRNEKEZGKFSPAT10KZM5A141 78 | [WARN] no additional cluster nodes specified, current node starts solo 79 | [GIN-debug] Listening and serving HTTP on 0.0.0.0:10999 80 | ``` 81 | 82 | You can start another nodes, on the same machine or another. If starting on the same machine, make sure that data directories are not colliding and the private/public API ports are different. To start a node that will join to the cluster with first one: 83 | 84 | ```bash 85 | $ objstore -d 1 -N localhost:11999 \ 86 | --private-addr="0.0.0.0:11997" --public-addr="0.0.0.0:10997" 87 | 88 | [INFO] node ID: 01BRNKZ01MFSJJDN98F6M0640K 89 | [GIN-debug] Listening and serving HTTP on 0.0.0.0:10997 90 | [INFO] joining to cluster [localhost:11999] 91 | [INFO] sync done 92 | ``` 93 | 94 | By checking both nodes logs, you can see that `/private/v1/sync` has been called from each other. After that journals are in sync. More about journal synchronisation and node failure scenarios will be written soon in a standalone document. 95 | 96 | ## Client usage 97 | 98 | At this moment both nodes are listening on the public HTTP API addresses: 99 | 100 | * `localhost:10999` 101 | * `localhost:10997` 102 | 103 | You don't need to use a load balancer to start utilising the cluster, the requests may be directed to any active node in the cluster. Load balancer would allow to split the workload equally, also it helps to avoid calling unresponsive nodes. 104 | 105 | ### Public API endpoints 106 | 107 | ``` 108 | GET /api/v1/get/:id 109 | GET /api/v1/meta/:id 110 | POST /api/v1/put 111 | POST /api/v1/delete/:id 112 | GET /api/v1/id 113 | GET /api/v1/version 114 | GET /api/v1/ping 115 | GET /api/v1/stats 116 | ``` 117 | 118 | ### How to upload files 119 | 120 | 1. **Generate a new ID.** All files are associated with IDs of [ULID](https://github.com/oklog/ulid) format, so you must generate your own or just ask any node for new ID. 121 | ```bash 122 | $ curl localhost:10999/api/v1/id 123 | 124 | 01BRNMMS1DK3CBD4ZZM2TQ8C5B 125 | ``` 126 | 127 | 2. **Choose consistency level** Three levels are available: 128 | ```go 129 | // ConsistencyLocal flags file for local persistence only, implying 130 | // that the file body will be stored on a single node. Default. 131 | ConsistencyLocal ConsistencyLevel = 0 132 | // ConsistencyS3 flags file for local+S3 persistence, implying that the file 133 | // body will be stored on a single node and Amazon S3. 134 | ConsistencyS3 ConsistencyLevel = 1 135 | // ConsistencyFull flags file to be replicated across all existing nodes in cluster and S3. 136 | ConsistencyFull ConsistencyLevel = 2 137 | ``` 138 | 139 | 3. **Specify headers** The following headers are available: 140 | * `X-Meta-ID` is a previously generated or retrieved [ULID](https://github.com/oklog/ulid); 141 | * `X-Meta-Name` is the file name, used with extension to serve the content with proper type; 142 | * `X-Meta-ConsistencyLevel` specifies the consistency level for the file, it may be upgraded later; 143 | * `X-Meta-UserMeta` specifies any meta data for the file as JSON map, stored in S3 tags. 144 | 145 | 4. **POST** Example, let's upload `test.txt` with replication across cluster and S3. 146 | 147 | ``` 148 | $ curl -d @test.txt -H "X-Meta-ConsistencyLevel: 2" -H "X-Meta-Name: test.txt" \ 149 | -H "X-Meta-ID: 01BRNMMS1DK3CBD4ZZM2TQ8C5B" localhost:10999/api/v1/put 150 | ``` 151 | 152 | ### How to read files 153 | 154 | Accessing a file is straightforward: 155 | 156 | ``` 157 | $ curl localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B 158 | 159 | It works! 160 | ``` 161 | 162 | More is going on under the covers. Apparently the file exists on both nodes and S3, but in case when file is stored only on a single node (with level=1), then the node would fetch this file from another one and serve, this fact can be checked by inspecting the headers: 163 | 164 | ``` 165 | $ curl -v localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B 166 | 167 | < HTTP/1.1 200 OK 168 | < Accept-Ranges: bytes 169 | < Content-Length: 9 170 | < Content-Type: text/plain; charset=utf-8 171 | < Last-Modified: Tue, 29 Aug 2017 00:11:35 GMT 172 | < X-Meta-Consistencylevel: 2 173 | < X-Meta-Id: 01BRNMMS1DK3CBD4ZZM2TQ8C5B 174 | < X-Meta-Name: test.txt 175 | < Date: Tue, 29 Aug 2017 00:19:45 GMT 176 | ``` 177 | 178 | In case when file is fetched from another node, `X-Meta-Symlink: true` will appear. If file is known but has been deleted, a 404 with `X-Meta-Deleted: true` will be served. And if file has been missing locally and fetched from S3, a header flag `X-Meta-Fetched: true` will appear. 179 | 180 | By default, fetching from S3 in case of cache misses is disabled, to get a file with possibility of fetching it from the cloud storage backend, use `X-Meta-Fetch`: 181 | 182 | ```bash 183 | # to check we delete local state and restarted nodes (!) 184 | 185 | $ curl -v localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B 186 | 187 | < HTTP/1.1 404 Not Found 188 | 189 | $ curl -v -H "X-Meta-Fetch: 1" localhost:10999/api/v1/get/01BRNMMS1DK3CBD4ZZM2TQ8C5B 190 | 191 | < HTTP/1.1 200 OK 192 | < Accept-Ranges: bytes 193 | < Content-Length: 9 194 | < Content-Type: text/plain; charset=utf-8 195 | < Last-Modified: Tue, 29 Aug 2017 00:49:17 GMT 196 | < X-Meta-Consistencylevel: 2 197 | < X-Meta-Fetched: true 198 | < X-Meta-Id: 01BRNMMS1DK3CBD4ZZM2TQ8C5B 199 | < X-Meta-Name: test.txt 200 | < Date: Tue, 29 Aug 2017 00:49:17 GMT 201 | 202 | It works! 203 | ``` 204 | 205 | Notice that file has been fetched with `X-Meta-Fetched: true`, it also has all properties saved such as name, content type and the consistency level. The latter means it was also replicated again across the nodes. 206 | 207 | ## Acknowledgements 208 | 209 | The project is in Open Beta stage, please test it before using in something serious. 210 | 211 | ## TODO / Roadmap 212 | 213 | * Implement eviction policy (LRU / ARC / ...) 214 | * Document the internal design 215 | * Improve deployment scripts 216 | * Test coverage 217 | * Implement a sample client 218 | * Benchmarks 219 | 220 | ## License 221 | 222 | MIT 223 | -------------------------------------------------------------------------------- /api/private.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "log" 10 | "mime" 11 | "net" 12 | "net/http" 13 | "net/http/httputil" 14 | "path/filepath" 15 | "strconv" 16 | "strings" 17 | "time" 18 | 19 | "github.com/astranet/astranet" 20 | "github.com/astranet/astranet/addr" 21 | "github.com/gin-gonic/gin" 22 | 23 | "sphere.software/objstore" 24 | ) 25 | 26 | type PrivateServer struct { 27 | router astranet.AstraNet 28 | mux http.Handler 29 | 30 | nodeID string 31 | debug bool 32 | tags []string 33 | } 34 | 35 | func NewPrivateServer(nodeID string, tags ...string) *PrivateServer { 36 | return &PrivateServer{ 37 | nodeID: nodeID, 38 | tags: tags, 39 | 40 | // initializes server+client+router for private net 41 | router: astranet.New().Router().WithEnv(tags...), 42 | } 43 | } 44 | 45 | func (p *PrivateServer) SetDebug(enabled bool) { 46 | p.debug = enabled 47 | } 48 | 49 | func (p *PrivateServer) Env() []string { 50 | return p.tags 51 | } 52 | 53 | func (p *PrivateServer) Router() astranet.AstraNet { 54 | return p.router 55 | } 56 | 57 | // ListenAndServe initializes a HTTP listener for private services, starts 58 | // listening on a TCP address for virtual network transport. 59 | func (p *PrivateServer) ListenAndServe(addr string) error { 60 | listener, err := p.router.Bind("", "objstore-"+p.nodeID) 61 | if err != nil { 62 | return err 63 | } 64 | if p.debug { 65 | log.Println("ListenAndServe on", addr, "with service", "objstore-"+p.nodeID) 66 | log.Println(p.router.Services()) 67 | } 68 | // start a HTTP server using node's private listener 69 | go http.Serve(listener, p.mux) 70 | 71 | if err = p.router.ListenAndServe("tcp4", addr); err == nil { 72 | p.router.Join("tcp4", addr) 73 | } 74 | return err 75 | } 76 | 77 | const defaultPort = "11999" 78 | 79 | // JoinCluster connects to another machines via TCP to join the virtual network. 80 | func (p *PrivateServer) JoinCluster(nodes []string) error { 81 | var failed []string 82 | for _, nodeAddr := range nodes { 83 | if _, _, err := net.SplitHostPort(nodeAddr); err != nil { 84 | nodeAddr = nodeAddr + ":" + defaultPort 85 | } 86 | if err := p.router.Join("tcp4", nodeAddr); err != nil { 87 | failed = append(failed, nodeAddr) 88 | } 89 | } 90 | if len(failed) > 0 { 91 | return fmt.Errorf("failed to join nodes: %v", failed) 92 | } 93 | p.router.Services() 94 | return nil 95 | } 96 | 97 | func newHTTPTransport(router astranet.AstraNet) *http.Transport { 98 | return &http.Transport{ 99 | DisableKeepAlives: true, 100 | Dial: func(network, addr string) (net.Conn, error) { 101 | host, _, err := net.SplitHostPort(addr) 102 | if err != nil { 103 | return nil, err 104 | } 105 | return router.Dial(network, host) 106 | }, 107 | } 108 | } 109 | 110 | // ExposeAPI initiates HTTP routing to the private API via loopback. 111 | func (p *PrivateServer) ExposeAPI(addr string) error { 112 | privateProxy := &httputil.ReverseProxy{ 113 | Transport: newHTTPTransport(p.router), 114 | FlushInterval: time.Millisecond * 10, 115 | Director: func(req *http.Request) { 116 | req.URL.Scheme = "http" 117 | req.URL.Host = "objstore-" + p.nodeID 118 | }, 119 | } 120 | return http.ListenAndServe(addr, privateProxy) 121 | } 122 | 123 | type NodeInfo struct { 124 | ID string `json:"id"` 125 | Addr string `json:"addr"` 126 | VAddr string `json:"vaddr"` 127 | } 128 | 129 | type NodeIter func(id, addr, vaddr string) error 130 | 131 | var ( 132 | RangeStop = errors.New("stop") 133 | ForEachStop = RangeStop 134 | ) 135 | 136 | func forEachNode(router astranet.AstraNet, iterFunc NodeIter) error { 137 | services := router.Services() 138 | seen := make(map[string]bool) 139 | for _, info := range services { 140 | if !strings.HasPrefix(info.Service, "objstore-") { 141 | continue 142 | } 143 | if info.Upstream == nil { 144 | continue 145 | } 146 | nodeID := strings.TrimPrefix(strings.Split(info.Service, ".")[0], "objstore-") 147 | host, _, _ := net.SplitHostPort(info.Upstream.RAddr().String()) 148 | if seen[nodeID+host] { 149 | continue 150 | } else { 151 | seen[nodeID+host] = true 152 | } 153 | vaddr := getAddr(info.Host, info.Port) 154 | if err := iterFunc(nodeID, host, vaddr); err == RangeStop { 155 | return nil 156 | } else if err != nil { 157 | return err 158 | } 159 | } 160 | return nil 161 | } 162 | 163 | func getAddr(host uint64, port uint32) string { 164 | return fmt.Sprintf("%s:%d", addr.Uint2Host(host), port) 165 | } 166 | 167 | func (p *PrivateServer) RouteAPI(store objstore.Store) { 168 | r := gin.Default() 169 | r.GET("/private/v1/ping", p.PingHandler()) 170 | r.GET("/private/v1/nodes", p.ListNodesHandler()) 171 | r.POST("/private/v1/announce", p.AnnounceHandler(store)) 172 | r.GET("/private/v1/get/:id", p.GetHandler(store)) 173 | r.POST("/private/v1/message", p.MessageHandler(store)) 174 | r.POST("/private/v1/put", p.PutHandler(store)) 175 | r.POST("/private/v1/sync", p.SyncHandler(store)) 176 | r.POST("/private/v1/delete/:id", p.DeleteHandler(store)) 177 | p.mux = r 178 | } 179 | 180 | func (p *PrivateServer) PingHandler() gin.HandlerFunc { 181 | return func(c *gin.Context) { 182 | c.String(200, p.nodeID) 183 | } 184 | } 185 | 186 | func (p *PrivateServer) ListNodesHandler() gin.HandlerFunc { 187 | return func(c *gin.Context) { 188 | var nodes []NodeInfo 189 | if err := forEachNode(p.router, func(id, addr, vaddr string) error { 190 | nodes = append(nodes, NodeInfo{ 191 | ID: id, 192 | Addr: addr, 193 | VAddr: vaddr, 194 | }) 195 | return nil 196 | }); err != nil { 197 | c.String(500, "error: %v", err) 198 | return 199 | } 200 | c.JSON(200, nodes) 201 | } 202 | } 203 | 204 | func (p *PrivateServer) AnnounceHandler(store objstore.Store) gin.HandlerFunc { 205 | return func(c *gin.Context) { 206 | var event *objstore.EventAnnounce 207 | if err := c.BindJSON(&event); err != nil { 208 | return 209 | } 210 | store.ReceiveEventAnnounce(event) 211 | c.Status(200) 212 | } 213 | } 214 | 215 | func (p *PrivateServer) MessageHandler(store objstore.Store) gin.HandlerFunc { 216 | return func(c *gin.Context) { 217 | r := io.LimitReader(c.Request.Body, 8*1024) // 8kB limit 218 | data, _ := ioutil.ReadAll(r) 219 | c.Request.Body.Close() 220 | store.EmitEventAnnounce(&objstore.EventAnnounce{ 221 | Type: objstore.EventOpaqueData, 222 | OpaqueData: data, 223 | }) 224 | c.Status(200) 225 | } 226 | } 227 | 228 | func (p *PrivateServer) GetHandler(store objstore.Store) gin.HandlerFunc { 229 | return func(c *gin.Context) { 230 | r, meta, err := store.GetObject(c.Param("id")) 231 | if err == objstore.ErrNotFound { 232 | c.Status(404) 233 | return 234 | } else if err != nil { 235 | c.String(500, "error: %v", err) 236 | return 237 | } 238 | serveObject(c, r, meta) 239 | } 240 | } 241 | 242 | func serveMeta(c *gin.Context, meta *objstore.FileMeta) { 243 | c.Header("X-Meta-ID", meta.ID) 244 | if len(meta.Name) > 0 { 245 | c.Header("X-Meta-Name", meta.Name) 246 | } 247 | if len(meta.UserMeta) > 0 { 248 | user, _ := json.Marshal(meta.UserMeta) 249 | c.Header("X-Meta-UserMeta", string(user)) 250 | } 251 | c.Header("X-Meta-ConsistencyLevel", strconv.Itoa(int(meta.Consistency))) 252 | if meta.IsSymlink { 253 | c.Header("X-Meta-Symlink", "true") 254 | } 255 | if meta.IsFetched { 256 | c.Header("X-Meta-Fetched", "true") 257 | } 258 | if meta.IsDeleted { 259 | c.Header("X-Meta-Deleted", "true") 260 | } 261 | } 262 | 263 | func serveObject(c *gin.Context, r io.ReadCloser, meta *objstore.FileMeta) { 264 | serveMeta(c, meta) 265 | ts := time.Unix(0, meta.Timestamp) 266 | if seekable, ok := r.(io.ReadSeeker); ok { 267 | http.ServeContent(c.Writer, c.Request, meta.Name, ts, seekable) 268 | return 269 | } 270 | // actually do all the work http.ServeContent does, but without support 271 | // of ranges and partial reads due to lack of io.Seeker interface. 272 | if !ts.IsZero() { 273 | c.Header("Last-Modified", ts.UTC().Format(http.TimeFormat)) 274 | } 275 | ctype := mime.TypeByExtension(filepath.Ext(meta.Name)) 276 | c.Header("Content-Type", ctype) 277 | c.Header("Content-Length", strconv.FormatInt(meta.Size, 10)) 278 | io.CopyN(c.Writer, r, meta.Size) 279 | } 280 | 281 | func (p *PrivateServer) PutHandler(store objstore.Store) gin.HandlerFunc { 282 | return func(c *gin.Context) { 283 | putObject(c, store) 284 | } 285 | } 286 | 287 | func putObject(c *gin.Context, store objstore.Store) { 288 | userMeta := func(data string) map[string]string { 289 | if len(data) == 0 { 290 | return nil 291 | } 292 | var v map[string]string 293 | json.Unmarshal([]byte(data), &v) 294 | return v 295 | } 296 | size, _ := strconv.ParseInt(c.Request.Header.Get("Content-Length"), 10, 64) 297 | meta := &objstore.FileMeta{ 298 | ID: c.Request.Header.Get("X-Meta-ID"), 299 | Name: c.Request.Header.Get("X-Meta-Name"), 300 | UserMeta: userMeta(c.Request.Header.Get("X-Meta-UserMeta")), 301 | Timestamp: time.Now().UnixNano(), 302 | Size: size, 303 | } 304 | if len(meta.ID) == 0 { 305 | c.String(400, "error: ID not specified, use /id to get one") 306 | return 307 | } else if !objstore.CheckID(meta.ID) { 308 | err := fmt.Errorf("objstore: not a valid ULID: %s", meta.ID) 309 | c.String(400, "error: %v", err) 310 | return 311 | } 312 | levelData := c.Request.Header.Get("X-Meta-ConsistencyLevel") 313 | if len(levelData) == 0 { 314 | level, _ := (objstore.ConsistencyLevel)(0).Check() 315 | meta.Consistency = level 316 | } else { 317 | n, _ := strconv.Atoi(levelData) 318 | level, err := (objstore.ConsistencyLevel)(n).Check() 319 | if err != nil { 320 | c.String(400, "error: %v", err) 321 | return 322 | } 323 | meta.Consistency = level 324 | } 325 | if _, err := store.PutObject(c.Request.Body, meta); err != nil { 326 | c.String(400, "error: %v", err) 327 | return 328 | } 329 | c.Status(200) 330 | } 331 | 332 | type SyncResponse struct { 333 | Added objstore.FileMetaList `json:"list_added"` 334 | Deleted objstore.FileMetaList `json:"list_deleted"` 335 | } 336 | 337 | func (p *PrivateServer) SyncHandler(store objstore.Store) gin.HandlerFunc { 338 | return func(c *gin.Context) { 339 | var list objstore.FileMetaList 340 | if err := c.BindJSON(&list); err != nil { 341 | return 342 | } 343 | added, deleted, err := store.Diff(list) 344 | if err != nil { 345 | c.String(400, "error: %v", err) 346 | return 347 | } 348 | c.JSON(200, SyncResponse{ 349 | Added: added, 350 | Deleted: deleted, 351 | }) 352 | } 353 | } 354 | 355 | func deleteObject(c *gin.Context, store objstore.Store) { 356 | meta, err := store.DeleteObject(c.Param("id")) 357 | if err == objstore.ErrNotFound { 358 | c.Status(404) 359 | return 360 | } else if err != nil { 361 | c.String(500, "error: %v", err) 362 | return 363 | } 364 | if meta != nil { 365 | serveMeta(c, meta) 366 | } 367 | c.Status(200) 368 | } 369 | 370 | func (p *PrivateServer) DeleteHandler(store objstore.Store) gin.HandlerFunc { 371 | return func(c *gin.Context) { 372 | deleteObject(c, store) 373 | } 374 | } 375 | -------------------------------------------------------------------------------- /journal/journal.go: -------------------------------------------------------------------------------- 1 | // Package journal is responsible for maintaining the inner state of the OBJSTORE, 2 | // journals represent managed event logs that can be diffed, joined and stored as 3 | // in-memory B-tree or in a BoltDB bucket. All operations on BoltDB are performed 4 | // in the context of a transaction, so journals are ACID-compatible. 5 | package journal 6 | 7 | import ( 8 | "errors" 9 | "fmt" 10 | "io" 11 | "strings" 12 | "sync" 13 | "time" 14 | 15 | "github.com/boltdb/bolt" 16 | "github.com/cznic/b" 17 | ) 18 | 19 | type Journal interface { 20 | ID() ID 21 | Get(k string) *FileMeta 22 | Exists(k string) bool 23 | Set(k string, m *FileMeta) error 24 | Delete(k string) error 25 | Diff(j Journal) (added FileMetaList, deleted FileMetaList) 26 | Range(start string, limit int, fn func(k string, v *FileMeta) error) (string, error) 27 | Join(target Journal, mapping Mapping) error 28 | List() FileMetaList 29 | Close() error 30 | Meta() *JournalMeta 31 | } 32 | 33 | type kvJournal struct { 34 | id ID 35 | 36 | b *bolt.Bucket 37 | tx *bolt.Tx 38 | } 39 | 40 | type btreeJournal struct { 41 | id ID 42 | 43 | t *b.Tree 44 | mux *sync.Mutex 45 | closed bool 46 | } 47 | 48 | func (b *btreeJournal) Close() error { 49 | b.mux.Lock() 50 | defer b.mux.Unlock() 51 | b.t.Close() 52 | b.closed = true 53 | return nil 54 | } 55 | 56 | func (j *kvJournal) Close() error { 57 | // no-op as kvJournal is managed by BoltDB transaction 58 | return nil 59 | } 60 | 61 | func (b *btreeJournal) Get(k string) *FileMeta { 62 | b.mux.Lock() 63 | if b.closed { 64 | b.mux.Unlock() 65 | return nil 66 | } 67 | v, ok := b.t.Get(k) 68 | b.mux.Unlock() 69 | if ok { 70 | return v.(*FileMeta) 71 | } 72 | return nil 73 | } 74 | 75 | func (b *btreeJournal) Exists(k string) bool { 76 | b.mux.Lock() 77 | if b.closed { 78 | b.mux.Unlock() 79 | return false 80 | } 81 | _, ok := b.t.Get(k) 82 | b.mux.Unlock() 83 | return ok 84 | } 85 | 86 | func (b *btreeJournal) Set(k string, m *FileMeta) error { 87 | if m == nil { 88 | return errors.New("journal: nil entries not allowed") 89 | } 90 | if len(k) == 0 { 91 | return errors.New("journal: zero-length keys not allowed") 92 | } 93 | b.mux.Lock() 94 | if b.closed { 95 | b.mux.Unlock() 96 | return closedErr 97 | } 98 | b.t.Set(k, m) 99 | b.mux.Unlock() 100 | return nil 101 | } 102 | 103 | func (b *btreeJournal) Delete(k string) error { 104 | if len(k) == 0 { 105 | return errors.New("journal: zero-length keys not allowed") 106 | } 107 | b.mux.Lock() 108 | if b.closed { 109 | b.mux.Unlock() 110 | return closedErr 111 | } 112 | b.t.Delete(k) 113 | b.mux.Unlock() 114 | return nil 115 | } 116 | 117 | var closedErr = errors.New("journal: closed already") 118 | 119 | func (b *btreeJournal) Range(start string, limit int, fn func(k string, v *FileMeta) error) (string, error) { 120 | b.mux.Lock() 121 | if b.closed { 122 | b.mux.Unlock() 123 | return "", closedErr 124 | } 125 | iter, ok := b.t.Seek(start) 126 | b.mux.Unlock() 127 | if !ok { 128 | return "", nil 129 | } 130 | defer iter.Close() 131 | 132 | var processed int 133 | var lastK string 134 | for { 135 | b.mux.Lock() 136 | if b.closed { 137 | b.mux.Unlock() 138 | return "", closedErr 139 | } 140 | k, v, err := iter.Next() 141 | b.mux.Unlock() 142 | if err == nil { 143 | lastK = k.(string) 144 | if err := fn(k.(string), v.(*FileMeta)); err == ErrRangeStop { 145 | return lastK, nil 146 | } else if err != nil { 147 | return lastK, err 148 | } 149 | } else { 150 | return "", nil 151 | } 152 | processed++ 153 | if limit > 0 && processed >= limit { 154 | break 155 | } 156 | } 157 | return lastK, nil 158 | } 159 | 160 | func (b *btreeJournal) Join(target Journal, mapping Mapping) error { 161 | return errors.New("journal: unjoinable journals") 162 | } 163 | 164 | func (b *btreeJournal) Meta() *JournalMeta { 165 | firstKey, _ := b.t.First() 166 | lastKey, _ := b.t.Last() 167 | return &JournalMeta{ 168 | ID: b.id, 169 | 170 | FirstKey: firstKey.(string), 171 | LastKey: lastKey.(string), 172 | CountTotal: b.t.Len(), 173 | } 174 | } 175 | 176 | func (b *btreeJournal) ID() ID { 177 | return b.id 178 | } 179 | 180 | func (b *btreeJournal) List() FileMetaList { 181 | b.mux.Lock() 182 | if b.closed { 183 | b.mux.Unlock() 184 | return nil 185 | } 186 | iter, err := b.t.SeekFirst() 187 | b.mux.Unlock() 188 | if err == nil { 189 | defer iter.Close() 190 | } 191 | 192 | var list FileMetaList 193 | var v interface{} 194 | for err == nil { 195 | b.mux.Lock() 196 | _, v, err = iter.Next() 197 | b.mux.Unlock() 198 | if err == nil { 199 | list = append(list, v.(*FileMeta)) 200 | } 201 | } 202 | return list 203 | } 204 | 205 | func (prev *btreeJournal) Diff(next Journal) (added FileMetaList, deleted FileMetaList) { 206 | switch next := next.(type) { 207 | case *btreeJournal: 208 | prev.mux.Lock() 209 | if prev.closed { 210 | prev.mux.Unlock() 211 | return nil, nil 212 | } 213 | prevIter, prevErr := prev.t.SeekFirst() 214 | prev.mux.Unlock() 215 | if prevErr == nil { 216 | defer prevIter.Close() 217 | } 218 | next.mux.Lock() 219 | if next.closed { 220 | next.mux.Unlock() 221 | return nil, nil 222 | } 223 | nextIter, nextErr := next.t.SeekFirst() 224 | next.mux.Unlock() 225 | if nextErr == nil { 226 | defer nextIter.Close() 227 | } 228 | 229 | switch { 230 | case prevErr == io.EOF && nextErr == io.EOF: 231 | return nil, nil 232 | case prevErr == io.EOF: 233 | // all added 234 | return next.List(), nil 235 | case nextErr == io.EOF: 236 | // all deleted 237 | return nil, prev.List() 238 | default: 239 | prev.mux.Lock() 240 | prevK, prevV, prevErr := prevIter.Next() 241 | prev.mux.Unlock() 242 | next.mux.Lock() 243 | nextK, nextV, nextErr := nextIter.Next() 244 | next.mux.Unlock() 245 | 246 | for { 247 | switch { 248 | case prevErr == io.EOF: 249 | if nextErr == io.EOF { 250 | // done 251 | return 252 | } 253 | added = append(added, nextV.(*FileMeta)) 254 | // move next iterator 255 | next.mux.Lock() 256 | nextK, nextV, nextErr = nextIter.Next() 257 | next.mux.Unlock() 258 | case nextErr == io.EOF: 259 | if prevErr == io.EOF { 260 | // done 261 | return 262 | } 263 | deleted = append(deleted, prevV.(*FileMeta)) 264 | // move prev iterator 265 | prev.mux.Lock() 266 | prevK, prevV, prevErr = prevIter.Next() 267 | prev.mux.Unlock() 268 | default: 269 | prevCmp := strings.Compare(prevK.(string), nextK.(string)) 270 | switch { 271 | case prevCmp < 0: // nextK > prevK 272 | // prevK has been deleted 273 | deleted = append(deleted, prevV.(*FileMeta)) 274 | // advance prev iter 275 | prev.mux.Lock() 276 | prevK, prevV, prevErr = prevIter.Next() 277 | prev.mux.Unlock() 278 | case prevCmp > 0: // nextK < prevK 279 | // nextK has been insterted 280 | added = append(added, nextV.(*FileMeta)) 281 | // advance next iter 282 | next.mux.Lock() 283 | nextK, nextV, nextErr = nextIter.Next() 284 | next.mux.Unlock() 285 | default: 286 | // same key -> advance iterators 287 | prev.mux.Lock() 288 | prevK, prevV, prevErr = prevIter.Next() 289 | prev.mux.Unlock() 290 | next.mux.Lock() 291 | nextK, nextV, nextErr = nextIter.Next() 292 | next.mux.Unlock() 293 | } 294 | } 295 | } 296 | } 297 | case *kvJournal: 298 | prev.mux.Lock() 299 | if prev.closed { 300 | prev.mux.Unlock() 301 | return nil, nil 302 | } 303 | prevIter, prevErr := prev.t.SeekFirst() 304 | prev.mux.Unlock() 305 | if prevErr == nil { 306 | defer prevIter.Close() 307 | } 308 | nextIter := next.b.Cursor() 309 | 310 | switch { 311 | case prevErr == io.EOF && nextIter == nil: 312 | return nil, nil 313 | case prevErr == io.EOF: 314 | // all added 315 | return next.List(), nil 316 | case nextIter == nil: 317 | // all deleted 318 | return nil, prev.List() 319 | default: 320 | prev.mux.Lock() 321 | prevK, prevV, prevErr := prevIter.Next() 322 | prev.mux.Unlock() 323 | nextK, nextV := nextIter.Next() 324 | 325 | for { 326 | switch { 327 | case prevErr == io.EOF: 328 | if nextK == nil { 329 | // done 330 | return 331 | } 332 | if nextV != nil { 333 | meta := new(FileMeta) 334 | meta.UnmarshalMsg(nextV) 335 | added = append(added, meta) 336 | } 337 | // move next iterator 338 | nextK, nextV = nextIter.Next() 339 | case nextK == nil: 340 | if prevErr == io.EOF { 341 | // done 342 | return 343 | } 344 | deleted = append(deleted, prevV.(*FileMeta)) 345 | // move prev iterator 346 | prev.mux.Lock() 347 | prevK, prevV, prevErr = prevIter.Next() 348 | prev.mux.Unlock() 349 | default: 350 | prevCmp := strings.Compare(prevK.(string), string(nextK)) 351 | switch { 352 | case prevCmp < 0: // nextK > prevK 353 | // prevK has been deleted 354 | deleted = append(deleted, prevV.(*FileMeta)) 355 | // advance prev iter 356 | prev.mux.Lock() 357 | prevK, prevV, prevErr = prevIter.Next() 358 | prev.mux.Unlock() 359 | case prevCmp > 0: // nextK < prevK 360 | // nextK has been insterted 361 | if nextV != nil { 362 | meta := new(FileMeta) 363 | meta.UnmarshalMsg(nextV) 364 | added = append(added, meta) 365 | } 366 | // advance next iter 367 | nextK, nextV = nextIter.Next() 368 | default: 369 | // same key -> advance iterators 370 | prev.mux.Lock() 371 | prevK, prevV, prevErr = prevIter.Next() 372 | prev.mux.Unlock() 373 | nextK, nextV = nextIter.Next() 374 | } 375 | } 376 | } 377 | } 378 | default: 379 | panic("indifferentiable types") 380 | } 381 | } 382 | 383 | func (prev *kvJournal) Diff(next Journal) (added FileMetaList, deleted FileMetaList) { 384 | switch next := next.(type) { 385 | case *kvJournal: 386 | prevIter := prev.b.Cursor() 387 | nextIter := next.b.Cursor() 388 | 389 | switch { 390 | case prevIter == nil && nextIter == nil: 391 | return nil, nil 392 | case prevIter == nil: 393 | // all added 394 | return next.List(), nil 395 | case nextIter == nil: 396 | // all deleted 397 | return nil, prev.List() 398 | default: 399 | prevK, prevV := prevIter.Next() 400 | nextK, nextV := nextIter.Next() 401 | 402 | for { 403 | switch { 404 | case prevK == nil: 405 | if nextK == nil { 406 | // done 407 | return 408 | } 409 | if nextV != nil { 410 | meta := new(FileMeta) 411 | meta.UnmarshalMsg(nextV) 412 | added = append(added, meta) 413 | } 414 | // move next iterator 415 | nextK, nextV = nextIter.Next() 416 | case nextK == nil: 417 | if prevK == nil { 418 | // done 419 | return 420 | } 421 | if prevV != nil { 422 | meta := new(FileMeta) 423 | meta.UnmarshalMsg(prevV) 424 | deleted = append(deleted, meta) 425 | } 426 | // move prev iterator 427 | prevK, prevV = prevIter.Next() 428 | default: 429 | prevCmp := strings.Compare(string(prevK), string(nextK)) 430 | switch { 431 | case prevCmp < 0: // nextK > prevK 432 | // prevK has been deleted 433 | if prevV != nil { 434 | meta := new(FileMeta) 435 | meta.UnmarshalMsg(prevV) 436 | deleted = append(deleted, meta) 437 | } 438 | // advance prev iter 439 | prevK, prevV = prevIter.Next() 440 | case prevCmp > 0: // nextK < prevK 441 | // nextK has been insterted 442 | if nextV != nil { 443 | meta := new(FileMeta) 444 | meta.UnmarshalMsg(nextV) 445 | added = append(added, meta) 446 | } 447 | // advance next iter 448 | nextK, nextV = nextIter.Next() 449 | default: 450 | // same key -> advance iterators 451 | prevK, prevV = prevIter.Next() 452 | nextK, nextV = nextIter.Next() 453 | } 454 | } 455 | } 456 | } 457 | case *btreeJournal: 458 | next.mux.Lock() 459 | if next.closed { 460 | next.mux.Unlock() 461 | return nil, nil 462 | } 463 | nextIter, nextErr := next.t.SeekFirst() 464 | next.mux.Unlock() 465 | if nextErr == nil { 466 | defer nextIter.Close() 467 | } 468 | prevIter := prev.b.Cursor() 469 | 470 | switch { 471 | case nextErr == io.EOF && prevIter == nil: 472 | return nil, nil 473 | case nextErr == io.EOF: 474 | // all added 475 | return prev.List(), nil 476 | case prevIter == nil: 477 | // all deleted 478 | return nil, next.List() 479 | default: 480 | next.mux.Lock() 481 | nextK, nextV, nextErr := nextIter.Next() 482 | next.mux.Unlock() 483 | prevK, prevV := prevIter.Next() 484 | 485 | for { 486 | switch { 487 | case nextErr == io.EOF: 488 | if prevK == nil { 489 | // done 490 | return 491 | } 492 | if prevV != nil { 493 | meta := new(FileMeta) 494 | meta.UnmarshalMsg(prevV) 495 | added = append(added, meta) 496 | } 497 | // move prev iterator 498 | prevK, prevV = prevIter.Next() 499 | case prevK == nil: 500 | if nextErr == io.EOF { 501 | // done 502 | return 503 | } 504 | deleted = append(deleted, nextV.(*FileMeta)) 505 | // move next iterator 506 | next.mux.Lock() 507 | nextK, nextV, nextErr = nextIter.Next() 508 | next.mux.Unlock() 509 | default: 510 | nextCmp := strings.Compare(nextK.(string), string(prevK)) 511 | switch { 512 | case nextCmp < 0: // prevK > nextK 513 | // nextK has been deleted 514 | deleted = append(deleted, nextV.(*FileMeta)) 515 | // advance next iter 516 | next.mux.Lock() 517 | nextK, nextV, nextErr = nextIter.Next() 518 | next.mux.Unlock() 519 | case nextCmp > 0: // prevK < nextK 520 | // prevK has been insterted 521 | if prevV != nil { 522 | meta := new(FileMeta) 523 | meta.UnmarshalMsg(prevV) 524 | added = append(added, meta) 525 | } 526 | // advance prev iter 527 | prevK, prevV = prevIter.Next() 528 | default: 529 | // same key -> advance iterators 530 | next.mux.Lock() 531 | nextK, nextV, nextErr = nextIter.Next() 532 | next.mux.Unlock() 533 | prevK, prevV = prevIter.Next() 534 | } 535 | } 536 | } 537 | } 538 | default: 539 | panic("journal: indifferentiable types") 540 | } 541 | } 542 | 543 | // Join appends the current journal to the target one, reassigning atomically the mapping. 544 | func (j *kvJournal) Join(target Journal, mapping Mapping) error { 545 | kvTarget, ok := target.(*kvJournal) 546 | if !ok { 547 | return errors.New("journal: unjoinable journals") 548 | } 549 | meta := mapping.Get(j.id) 550 | if meta == nil { 551 | // somehow mapping not available in the current Tx 552 | meta = j.Meta() 553 | } else if meta.ID != j.id { 554 | // ID mismatch -> already joined? 555 | err := fmt.Errorf("journal: already joined %s -> %s", j.id, meta.ID) 556 | return err 557 | } 558 | cur := j.b.Cursor() 559 | k, v := cur.First() 560 | var copied int 561 | for k != nil { 562 | if v == nil { 563 | continue 564 | } 565 | copied++ 566 | if err := kvTarget.b.Put(k, v); err != nil { 567 | return err 568 | } 569 | k, v = cur.Next() 570 | } 571 | 572 | meta.JoinedAt = time.Now().UnixNano() 573 | meta.ID = target.ID() // relocate mapping 574 | mapping.Set(j.id, meta) 575 | return nil 576 | } 577 | 578 | func (j *kvJournal) Range(start string, limit int, fn func(k string, v *FileMeta) error) (string, error) { 579 | cur := j.b.Cursor() 580 | k, v := cur.Seek([]byte(start)) 581 | var processed int 582 | for k != nil { 583 | var meta *FileMeta 584 | if v != nil { 585 | meta = new(FileMeta) 586 | meta.UnmarshalMsg(v) 587 | } 588 | if err := fn(string(k), meta); err == ErrRangeStop { 589 | return string(k), nil 590 | } else if err != nil { 591 | return string(k), err 592 | } 593 | k, v = cur.Next() 594 | processed++ 595 | if limit > 0 && processed >= limit { 596 | return string(k), nil 597 | } 598 | } 599 | return "", nil 600 | } 601 | 602 | func (j *kvJournal) Get(k string) *FileMeta { 603 | data := j.b.Get([]byte(k)) 604 | if data == nil { 605 | return nil 606 | } 607 | meta := new(FileMeta) 608 | meta.UnmarshalMsg(data) 609 | return meta 610 | } 611 | 612 | func (j *kvJournal) Exists(k string) bool { 613 | return j.b.Get([]byte(k)) != nil 614 | } 615 | 616 | func (j *kvJournal) Set(k string, m *FileMeta) error { 617 | v, err := m.MarshalMsg(nil) 618 | if err != nil { 619 | return err 620 | } 621 | return j.b.Put([]byte(k), v) 622 | } 623 | 624 | func (j *kvJournal) Delete(k string) error { 625 | return j.b.Delete([]byte(k)) 626 | } 627 | 628 | func (j *kvJournal) List() FileMetaList { 629 | cur := j.b.Cursor() 630 | k, v := cur.First() 631 | var list FileMetaList 632 | for k != nil { 633 | if v != nil { 634 | meta := new(FileMeta) 635 | meta.UnmarshalMsg(v) 636 | list = append(list, meta) 637 | } 638 | k, v = cur.Next() 639 | } 640 | return list 641 | } 642 | 643 | func (j *kvJournal) ID() ID { 644 | return j.id 645 | } 646 | 647 | func (j *kvJournal) Meta() *JournalMeta { 648 | cur := j.b.Cursor() 649 | firstKey, _ := cur.First() 650 | lastKey, _ := cur.Last() 651 | return &JournalMeta{ 652 | ID: j.id, 653 | FirstKey: string(firstKey), 654 | LastKey: string(lastKey), 655 | CountTotal: j.b.Stats().KeyN, 656 | } 657 | } 658 | 659 | var ErrRangeStop = errors.New("range stop") 660 | 661 | // NewJournal creates a new journal backed by a BoltDB bucket, 662 | // in the context of a transaction. 663 | func NewJournal(id ID, tx *bolt.Tx, bucket *bolt.Bucket) Journal { 664 | return &kvJournal{ 665 | id: id, 666 | tx: tx, 667 | b: bucket, 668 | } 669 | } 670 | 671 | // MakeJournal allows to represent a serialized list of events 672 | // as an in-memory journal compatible with journals backed by a real KV store. 673 | func MakeJournal(id ID, events FileMetaList) Journal { 674 | j := &btreeJournal{ 675 | id: id, 676 | mux: new(sync.Mutex), 677 | t: b.TreeNew(func(a interface{}, b interface{}) int { 678 | return strings.Compare(a.(string), b.(string)) 679 | }), 680 | } 681 | for i := range events { 682 | j.t.Set(string(events[i].ID), events[i]) 683 | } 684 | return j 685 | } 686 | -------------------------------------------------------------------------------- /journal/meta_gen.go: -------------------------------------------------------------------------------- 1 | package journal 2 | 3 | // NOTE: THIS FILE WAS PRODUCED BY THE 4 | // MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) 5 | // DO NOT EDIT 6 | 7 | import "github.com/tinylib/msgp/msgp" 8 | 9 | // DecodeMsg implements msgp.Decodable 10 | func (z *ConsistencyLevel) DecodeMsg(dc *msgp.Reader) (err error) { 11 | { 12 | var zxvk int 13 | zxvk, err = dc.ReadInt() 14 | (*z) = ConsistencyLevel(zxvk) 15 | } 16 | if err != nil { 17 | return 18 | } 19 | return 20 | } 21 | 22 | // EncodeMsg implements msgp.Encodable 23 | func (z ConsistencyLevel) EncodeMsg(en *msgp.Writer) (err error) { 24 | err = en.WriteInt(int(z)) 25 | if err != nil { 26 | return 27 | } 28 | return 29 | } 30 | 31 | // MarshalMsg implements msgp.Marshaler 32 | func (z ConsistencyLevel) MarshalMsg(b []byte) (o []byte, err error) { 33 | o = msgp.Require(b, z.Msgsize()) 34 | o = msgp.AppendInt(o, int(z)) 35 | return 36 | } 37 | 38 | // UnmarshalMsg implements msgp.Unmarshaler 39 | func (z *ConsistencyLevel) UnmarshalMsg(bts []byte) (o []byte, err error) { 40 | { 41 | var zbzg int 42 | zbzg, bts, err = msgp.ReadIntBytes(bts) 43 | (*z) = ConsistencyLevel(zbzg) 44 | } 45 | if err != nil { 46 | return 47 | } 48 | o = bts 49 | return 50 | } 51 | 52 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message 53 | func (z ConsistencyLevel) Msgsize() (s int) { 54 | s = msgp.IntSize 55 | return 56 | } 57 | 58 | // DecodeMsg implements msgp.Decodable 59 | func (z *FileMeta) DecodeMsg(dc *msgp.Reader) (err error) { 60 | var field []byte 61 | _ = field 62 | var zajw uint32 63 | zajw, err = dc.ReadMapHeader() 64 | if err != nil { 65 | return 66 | } 67 | for zajw > 0 { 68 | zajw-- 69 | field, err = dc.ReadMapKeyPtr() 70 | if err != nil { 71 | return 72 | } 73 | switch msgp.UnsafeString(field) { 74 | case "ID": 75 | z.ID, err = dc.ReadString() 76 | if err != nil { 77 | return 78 | } 79 | case "Name": 80 | z.Name, err = dc.ReadString() 81 | if err != nil { 82 | return 83 | } 84 | case "Size": 85 | z.Size, err = dc.ReadInt64() 86 | if err != nil { 87 | return 88 | } 89 | case "Timestamp": 90 | z.Timestamp, err = dc.ReadInt64() 91 | if err != nil { 92 | return 93 | } 94 | case "UserMeta": 95 | var zwht uint32 96 | zwht, err = dc.ReadMapHeader() 97 | if err != nil { 98 | return 99 | } 100 | if z.UserMeta == nil && zwht > 0 { 101 | z.UserMeta = make(map[string]string, zwht) 102 | } else if len(z.UserMeta) > 0 { 103 | for key := range z.UserMeta { 104 | delete(z.UserMeta, key) 105 | } 106 | } 107 | for zwht > 0 { 108 | zwht-- 109 | var zbai string 110 | var zcmr string 111 | zbai, err = dc.ReadString() 112 | if err != nil { 113 | return 114 | } 115 | zcmr, err = dc.ReadString() 116 | if err != nil { 117 | return 118 | } 119 | z.UserMeta[zbai] = zcmr 120 | } 121 | case "IsSymlink": 122 | z.IsSymlink, err = dc.ReadBool() 123 | if err != nil { 124 | return 125 | } 126 | case "Consistency": 127 | { 128 | var zhct int 129 | zhct, err = dc.ReadInt() 130 | z.Consistency = ConsistencyLevel(zhct) 131 | } 132 | if err != nil { 133 | return 134 | } 135 | case "IsDeleted": 136 | z.IsDeleted, err = dc.ReadBool() 137 | if err != nil { 138 | return 139 | } 140 | case "IsFetched": 141 | z.IsFetched, err = dc.ReadBool() 142 | if err != nil { 143 | return 144 | } 145 | default: 146 | err = dc.Skip() 147 | if err != nil { 148 | return 149 | } 150 | } 151 | } 152 | return 153 | } 154 | 155 | // EncodeMsg implements msgp.Encodable 156 | func (z *FileMeta) EncodeMsg(en *msgp.Writer) (err error) { 157 | // map header, size 9 158 | // write "ID" 159 | err = en.Append(0x89, 0xa2, 0x49, 0x44) 160 | if err != nil { 161 | return err 162 | } 163 | err = en.WriteString(z.ID) 164 | if err != nil { 165 | return 166 | } 167 | // write "Name" 168 | err = en.Append(0xa4, 0x4e, 0x61, 0x6d, 0x65) 169 | if err != nil { 170 | return err 171 | } 172 | err = en.WriteString(z.Name) 173 | if err != nil { 174 | return 175 | } 176 | // write "Size" 177 | err = en.Append(0xa4, 0x53, 0x69, 0x7a, 0x65) 178 | if err != nil { 179 | return err 180 | } 181 | err = en.WriteInt64(z.Size) 182 | if err != nil { 183 | return 184 | } 185 | // write "Timestamp" 186 | err = en.Append(0xa9, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70) 187 | if err != nil { 188 | return err 189 | } 190 | err = en.WriteInt64(z.Timestamp) 191 | if err != nil { 192 | return 193 | } 194 | // write "UserMeta" 195 | err = en.Append(0xa8, 0x55, 0x73, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x61) 196 | if err != nil { 197 | return err 198 | } 199 | err = en.WriteMapHeader(uint32(len(z.UserMeta))) 200 | if err != nil { 201 | return 202 | } 203 | for zbai, zcmr := range z.UserMeta { 204 | err = en.WriteString(zbai) 205 | if err != nil { 206 | return 207 | } 208 | err = en.WriteString(zcmr) 209 | if err != nil { 210 | return 211 | } 212 | } 213 | // write "IsSymlink" 214 | err = en.Append(0xa9, 0x49, 0x73, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b) 215 | if err != nil { 216 | return err 217 | } 218 | err = en.WriteBool(z.IsSymlink) 219 | if err != nil { 220 | return 221 | } 222 | // write "Consistency" 223 | err = en.Append(0xab, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x79) 224 | if err != nil { 225 | return err 226 | } 227 | err = en.WriteInt(int(z.Consistency)) 228 | if err != nil { 229 | return 230 | } 231 | // write "IsDeleted" 232 | err = en.Append(0xa9, 0x49, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x64) 233 | if err != nil { 234 | return err 235 | } 236 | err = en.WriteBool(z.IsDeleted) 237 | if err != nil { 238 | return 239 | } 240 | // write "IsFetched" 241 | err = en.Append(0xa9, 0x49, 0x73, 0x46, 0x65, 0x74, 0x63, 0x68, 0x65, 0x64) 242 | if err != nil { 243 | return err 244 | } 245 | err = en.WriteBool(z.IsFetched) 246 | if err != nil { 247 | return 248 | } 249 | return 250 | } 251 | 252 | // MarshalMsg implements msgp.Marshaler 253 | func (z *FileMeta) MarshalMsg(b []byte) (o []byte, err error) { 254 | o = msgp.Require(b, z.Msgsize()) 255 | // map header, size 9 256 | // string "ID" 257 | o = append(o, 0x89, 0xa2, 0x49, 0x44) 258 | o = msgp.AppendString(o, z.ID) 259 | // string "Name" 260 | o = append(o, 0xa4, 0x4e, 0x61, 0x6d, 0x65) 261 | o = msgp.AppendString(o, z.Name) 262 | // string "Size" 263 | o = append(o, 0xa4, 0x53, 0x69, 0x7a, 0x65) 264 | o = msgp.AppendInt64(o, z.Size) 265 | // string "Timestamp" 266 | o = append(o, 0xa9, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70) 267 | o = msgp.AppendInt64(o, z.Timestamp) 268 | // string "UserMeta" 269 | o = append(o, 0xa8, 0x55, 0x73, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x61) 270 | o = msgp.AppendMapHeader(o, uint32(len(z.UserMeta))) 271 | for zbai, zcmr := range z.UserMeta { 272 | o = msgp.AppendString(o, zbai) 273 | o = msgp.AppendString(o, zcmr) 274 | } 275 | // string "IsSymlink" 276 | o = append(o, 0xa9, 0x49, 0x73, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b) 277 | o = msgp.AppendBool(o, z.IsSymlink) 278 | // string "Consistency" 279 | o = append(o, 0xab, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x79) 280 | o = msgp.AppendInt(o, int(z.Consistency)) 281 | // string "IsDeleted" 282 | o = append(o, 0xa9, 0x49, 0x73, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x64) 283 | o = msgp.AppendBool(o, z.IsDeleted) 284 | // string "IsFetched" 285 | o = append(o, 0xa9, 0x49, 0x73, 0x46, 0x65, 0x74, 0x63, 0x68, 0x65, 0x64) 286 | o = msgp.AppendBool(o, z.IsFetched) 287 | return 288 | } 289 | 290 | // UnmarshalMsg implements msgp.Unmarshaler 291 | func (z *FileMeta) UnmarshalMsg(bts []byte) (o []byte, err error) { 292 | var field []byte 293 | _ = field 294 | var zcua uint32 295 | zcua, bts, err = msgp.ReadMapHeaderBytes(bts) 296 | if err != nil { 297 | return 298 | } 299 | for zcua > 0 { 300 | zcua-- 301 | field, bts, err = msgp.ReadMapKeyZC(bts) 302 | if err != nil { 303 | return 304 | } 305 | switch msgp.UnsafeString(field) { 306 | case "ID": 307 | z.ID, bts, err = msgp.ReadStringBytes(bts) 308 | if err != nil { 309 | return 310 | } 311 | case "Name": 312 | z.Name, bts, err = msgp.ReadStringBytes(bts) 313 | if err != nil { 314 | return 315 | } 316 | case "Size": 317 | z.Size, bts, err = msgp.ReadInt64Bytes(bts) 318 | if err != nil { 319 | return 320 | } 321 | case "Timestamp": 322 | z.Timestamp, bts, err = msgp.ReadInt64Bytes(bts) 323 | if err != nil { 324 | return 325 | } 326 | case "UserMeta": 327 | var zxhx uint32 328 | zxhx, bts, err = msgp.ReadMapHeaderBytes(bts) 329 | if err != nil { 330 | return 331 | } 332 | if z.UserMeta == nil && zxhx > 0 { 333 | z.UserMeta = make(map[string]string, zxhx) 334 | } else if len(z.UserMeta) > 0 { 335 | for key := range z.UserMeta { 336 | delete(z.UserMeta, key) 337 | } 338 | } 339 | for zxhx > 0 { 340 | var zbai string 341 | var zcmr string 342 | zxhx-- 343 | zbai, bts, err = msgp.ReadStringBytes(bts) 344 | if err != nil { 345 | return 346 | } 347 | zcmr, bts, err = msgp.ReadStringBytes(bts) 348 | if err != nil { 349 | return 350 | } 351 | z.UserMeta[zbai] = zcmr 352 | } 353 | case "IsSymlink": 354 | z.IsSymlink, bts, err = msgp.ReadBoolBytes(bts) 355 | if err != nil { 356 | return 357 | } 358 | case "Consistency": 359 | { 360 | var zlqf int 361 | zlqf, bts, err = msgp.ReadIntBytes(bts) 362 | z.Consistency = ConsistencyLevel(zlqf) 363 | } 364 | if err != nil { 365 | return 366 | } 367 | case "IsDeleted": 368 | z.IsDeleted, bts, err = msgp.ReadBoolBytes(bts) 369 | if err != nil { 370 | return 371 | } 372 | case "IsFetched": 373 | z.IsFetched, bts, err = msgp.ReadBoolBytes(bts) 374 | if err != nil { 375 | return 376 | } 377 | default: 378 | bts, err = msgp.Skip(bts) 379 | if err != nil { 380 | return 381 | } 382 | } 383 | } 384 | o = bts 385 | return 386 | } 387 | 388 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message 389 | func (z *FileMeta) Msgsize() (s int) { 390 | s = 1 + 3 + msgp.StringPrefixSize + len(z.ID) + 5 + msgp.StringPrefixSize + len(z.Name) + 5 + msgp.Int64Size + 10 + msgp.Int64Size + 9 + msgp.MapHeaderSize 391 | if z.UserMeta != nil { 392 | for zbai, zcmr := range z.UserMeta { 393 | _ = zcmr 394 | s += msgp.StringPrefixSize + len(zbai) + msgp.StringPrefixSize + len(zcmr) 395 | } 396 | } 397 | s += 10 + msgp.BoolSize + 12 + msgp.IntSize + 10 + msgp.BoolSize + 10 + msgp.BoolSize 398 | return 399 | } 400 | 401 | // DecodeMsg implements msgp.Decodable 402 | func (z *FileMetaList) DecodeMsg(dc *msgp.Reader) (err error) { 403 | var zjfb uint32 404 | zjfb, err = dc.ReadArrayHeader() 405 | if err != nil { 406 | return 407 | } 408 | if cap((*z)) >= int(zjfb) { 409 | (*z) = (*z)[:zjfb] 410 | } else { 411 | (*z) = make(FileMetaList, zjfb) 412 | } 413 | for zpks := range *z { 414 | if dc.IsNil() { 415 | err = dc.ReadNil() 416 | if err != nil { 417 | return 418 | } 419 | (*z)[zpks] = nil 420 | } else { 421 | if (*z)[zpks] == nil { 422 | (*z)[zpks] = new(FileMeta) 423 | } 424 | err = (*z)[zpks].DecodeMsg(dc) 425 | if err != nil { 426 | return 427 | } 428 | } 429 | } 430 | return 431 | } 432 | 433 | // EncodeMsg implements msgp.Encodable 434 | func (z FileMetaList) EncodeMsg(en *msgp.Writer) (err error) { 435 | err = en.WriteArrayHeader(uint32(len(z))) 436 | if err != nil { 437 | return 438 | } 439 | for zcxo := range z { 440 | if z[zcxo] == nil { 441 | err = en.WriteNil() 442 | if err != nil { 443 | return 444 | } 445 | } else { 446 | err = z[zcxo].EncodeMsg(en) 447 | if err != nil { 448 | return 449 | } 450 | } 451 | } 452 | return 453 | } 454 | 455 | // MarshalMsg implements msgp.Marshaler 456 | func (z FileMetaList) MarshalMsg(b []byte) (o []byte, err error) { 457 | o = msgp.Require(b, z.Msgsize()) 458 | o = msgp.AppendArrayHeader(o, uint32(len(z))) 459 | for zcxo := range z { 460 | if z[zcxo] == nil { 461 | o = msgp.AppendNil(o) 462 | } else { 463 | o, err = z[zcxo].MarshalMsg(o) 464 | if err != nil { 465 | return 466 | } 467 | } 468 | } 469 | return 470 | } 471 | 472 | // UnmarshalMsg implements msgp.Unmarshaler 473 | func (z *FileMetaList) UnmarshalMsg(bts []byte) (o []byte, err error) { 474 | var zrsw uint32 475 | zrsw, bts, err = msgp.ReadArrayHeaderBytes(bts) 476 | if err != nil { 477 | return 478 | } 479 | if cap((*z)) >= int(zrsw) { 480 | (*z) = (*z)[:zrsw] 481 | } else { 482 | (*z) = make(FileMetaList, zrsw) 483 | } 484 | for zeff := range *z { 485 | if msgp.IsNil(bts) { 486 | bts, err = msgp.ReadNilBytes(bts) 487 | if err != nil { 488 | return 489 | } 490 | (*z)[zeff] = nil 491 | } else { 492 | if (*z)[zeff] == nil { 493 | (*z)[zeff] = new(FileMeta) 494 | } 495 | bts, err = (*z)[zeff].UnmarshalMsg(bts) 496 | if err != nil { 497 | return 498 | } 499 | } 500 | } 501 | o = bts 502 | return 503 | } 504 | 505 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message 506 | func (z FileMetaList) Msgsize() (s int) { 507 | s = msgp.ArrayHeaderSize 508 | for zxpk := range z { 509 | if z[zxpk] == nil { 510 | s += msgp.NilSize 511 | } else { 512 | s += z[zxpk].Msgsize() 513 | } 514 | } 515 | return 516 | } 517 | 518 | // DecodeMsg implements msgp.Decodable 519 | func (z *ID) DecodeMsg(dc *msgp.Reader) (err error) { 520 | { 521 | var zdnj string 522 | zdnj, err = dc.ReadString() 523 | (*z) = ID(zdnj) 524 | } 525 | if err != nil { 526 | return 527 | } 528 | return 529 | } 530 | 531 | // EncodeMsg implements msgp.Encodable 532 | func (z ID) EncodeMsg(en *msgp.Writer) (err error) { 533 | err = en.WriteString(string(z)) 534 | if err != nil { 535 | return 536 | } 537 | return 538 | } 539 | 540 | // MarshalMsg implements msgp.Marshaler 541 | func (z ID) MarshalMsg(b []byte) (o []byte, err error) { 542 | o = msgp.Require(b, z.Msgsize()) 543 | o = msgp.AppendString(o, string(z)) 544 | return 545 | } 546 | 547 | // UnmarshalMsg implements msgp.Unmarshaler 548 | func (z *ID) UnmarshalMsg(bts []byte) (o []byte, err error) { 549 | { 550 | var zobc string 551 | zobc, bts, err = msgp.ReadStringBytes(bts) 552 | (*z) = ID(zobc) 553 | } 554 | if err != nil { 555 | return 556 | } 557 | o = bts 558 | return 559 | } 560 | 561 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message 562 | func (z ID) Msgsize() (s int) { 563 | s = msgp.StringPrefixSize + len(string(z)) 564 | return 565 | } 566 | 567 | // DecodeMsg implements msgp.Decodable 568 | func (z *JournalMeta) DecodeMsg(dc *msgp.Reader) (err error) { 569 | var field []byte 570 | _ = field 571 | var zsnv uint32 572 | zsnv, err = dc.ReadMapHeader() 573 | if err != nil { 574 | return 575 | } 576 | for zsnv > 0 { 577 | zsnv-- 578 | field, err = dc.ReadMapKeyPtr() 579 | if err != nil { 580 | return 581 | } 582 | switch msgp.UnsafeString(field) { 583 | case "ID": 584 | { 585 | var zkgt string 586 | zkgt, err = dc.ReadString() 587 | z.ID = ID(zkgt) 588 | } 589 | if err != nil { 590 | return 591 | } 592 | case "CreatedAt": 593 | z.CreatedAt, err = dc.ReadInt64() 594 | if err != nil { 595 | return 596 | } 597 | case "JoinedAt": 598 | z.JoinedAt, err = dc.ReadInt64() 599 | if err != nil { 600 | return 601 | } 602 | case "FirstKey": 603 | z.FirstKey, err = dc.ReadString() 604 | if err != nil { 605 | return 606 | } 607 | case "LastKey": 608 | z.LastKey, err = dc.ReadString() 609 | if err != nil { 610 | return 611 | } 612 | case "CountTotal": 613 | z.CountTotal, err = dc.ReadInt() 614 | if err != nil { 615 | return 616 | } 617 | default: 618 | err = dc.Skip() 619 | if err != nil { 620 | return 621 | } 622 | } 623 | } 624 | return 625 | } 626 | 627 | // EncodeMsg implements msgp.Encodable 628 | func (z *JournalMeta) EncodeMsg(en *msgp.Writer) (err error) { 629 | // map header, size 6 630 | // write "ID" 631 | err = en.Append(0x86, 0xa2, 0x49, 0x44) 632 | if err != nil { 633 | return err 634 | } 635 | err = en.WriteString(string(z.ID)) 636 | if err != nil { 637 | return 638 | } 639 | // write "CreatedAt" 640 | err = en.Append(0xa9, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74) 641 | if err != nil { 642 | return err 643 | } 644 | err = en.WriteInt64(z.CreatedAt) 645 | if err != nil { 646 | return 647 | } 648 | // write "JoinedAt" 649 | err = en.Append(0xa8, 0x4a, 0x6f, 0x69, 0x6e, 0x65, 0x64, 0x41, 0x74) 650 | if err != nil { 651 | return err 652 | } 653 | err = en.WriteInt64(z.JoinedAt) 654 | if err != nil { 655 | return 656 | } 657 | // write "FirstKey" 658 | err = en.Append(0xa8, 0x46, 0x69, 0x72, 0x73, 0x74, 0x4b, 0x65, 0x79) 659 | if err != nil { 660 | return err 661 | } 662 | err = en.WriteString(z.FirstKey) 663 | if err != nil { 664 | return 665 | } 666 | // write "LastKey" 667 | err = en.Append(0xa7, 0x4c, 0x61, 0x73, 0x74, 0x4b, 0x65, 0x79) 668 | if err != nil { 669 | return err 670 | } 671 | err = en.WriteString(z.LastKey) 672 | if err != nil { 673 | return 674 | } 675 | // write "CountTotal" 676 | err = en.Append(0xaa, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x54, 0x6f, 0x74, 0x61, 0x6c) 677 | if err != nil { 678 | return err 679 | } 680 | err = en.WriteInt(z.CountTotal) 681 | if err != nil { 682 | return 683 | } 684 | return 685 | } 686 | 687 | // MarshalMsg implements msgp.Marshaler 688 | func (z *JournalMeta) MarshalMsg(b []byte) (o []byte, err error) { 689 | o = msgp.Require(b, z.Msgsize()) 690 | // map header, size 6 691 | // string "ID" 692 | o = append(o, 0x86, 0xa2, 0x49, 0x44) 693 | o = msgp.AppendString(o, string(z.ID)) 694 | // string "CreatedAt" 695 | o = append(o, 0xa9, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74) 696 | o = msgp.AppendInt64(o, z.CreatedAt) 697 | // string "JoinedAt" 698 | o = append(o, 0xa8, 0x4a, 0x6f, 0x69, 0x6e, 0x65, 0x64, 0x41, 0x74) 699 | o = msgp.AppendInt64(o, z.JoinedAt) 700 | // string "FirstKey" 701 | o = append(o, 0xa8, 0x46, 0x69, 0x72, 0x73, 0x74, 0x4b, 0x65, 0x79) 702 | o = msgp.AppendString(o, z.FirstKey) 703 | // string "LastKey" 704 | o = append(o, 0xa7, 0x4c, 0x61, 0x73, 0x74, 0x4b, 0x65, 0x79) 705 | o = msgp.AppendString(o, z.LastKey) 706 | // string "CountTotal" 707 | o = append(o, 0xaa, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x54, 0x6f, 0x74, 0x61, 0x6c) 708 | o = msgp.AppendInt(o, z.CountTotal) 709 | return 710 | } 711 | 712 | // UnmarshalMsg implements msgp.Unmarshaler 713 | func (z *JournalMeta) UnmarshalMsg(bts []byte) (o []byte, err error) { 714 | var field []byte 715 | _ = field 716 | var zema uint32 717 | zema, bts, err = msgp.ReadMapHeaderBytes(bts) 718 | if err != nil { 719 | return 720 | } 721 | for zema > 0 { 722 | zema-- 723 | field, bts, err = msgp.ReadMapKeyZC(bts) 724 | if err != nil { 725 | return 726 | } 727 | switch msgp.UnsafeString(field) { 728 | case "ID": 729 | { 730 | var zpez string 731 | zpez, bts, err = msgp.ReadStringBytes(bts) 732 | z.ID = ID(zpez) 733 | } 734 | if err != nil { 735 | return 736 | } 737 | case "CreatedAt": 738 | z.CreatedAt, bts, err = msgp.ReadInt64Bytes(bts) 739 | if err != nil { 740 | return 741 | } 742 | case "JoinedAt": 743 | z.JoinedAt, bts, err = msgp.ReadInt64Bytes(bts) 744 | if err != nil { 745 | return 746 | } 747 | case "FirstKey": 748 | z.FirstKey, bts, err = msgp.ReadStringBytes(bts) 749 | if err != nil { 750 | return 751 | } 752 | case "LastKey": 753 | z.LastKey, bts, err = msgp.ReadStringBytes(bts) 754 | if err != nil { 755 | return 756 | } 757 | case "CountTotal": 758 | z.CountTotal, bts, err = msgp.ReadIntBytes(bts) 759 | if err != nil { 760 | return 761 | } 762 | default: 763 | bts, err = msgp.Skip(bts) 764 | if err != nil { 765 | return 766 | } 767 | } 768 | } 769 | o = bts 770 | return 771 | } 772 | 773 | // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message 774 | func (z *JournalMeta) Msgsize() (s int) { 775 | s = 1 + 3 + msgp.StringPrefixSize + len(string(z.ID)) + 10 + msgp.Int64Size + 9 + msgp.Int64Size + 9 + msgp.StringPrefixSize + len(z.FirstKey) + 8 + msgp.StringPrefixSize + len(z.LastKey) + 11 + msgp.IntSize 776 | return 777 | } 778 | -------------------------------------------------------------------------------- /objstore.go: -------------------------------------------------------------------------------- 1 | package objstore 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "log" 9 | "sync" 10 | "time" 11 | 12 | "github.com/oklog/ulid" 13 | "github.com/xlab/closer" 14 | 15 | "sphere.software/objstore/cluster" 16 | "sphere.software/objstore/journal" 17 | "sphere.software/objstore/storage" 18 | ) 19 | 20 | type Store interface { 21 | NodeID() string 22 | IsReady() bool 23 | SetDebug(v bool) 24 | WaitOutbound(timeout time.Duration) 25 | WaitInbound(timeout time.Duration) 26 | ReceiveEventAnnounce(event *EventAnnounce) 27 | EmitEventAnnounce(event *EventAnnounce) 28 | DiskStats() (*DiskStats, error) 29 | Close() error 30 | 31 | // HeadObject gets object's meta data from the local journal. 32 | HeadObject(id string) (*FileMeta, error) 33 | // GetObject gets an object from the local storage of the node. 34 | // Used for private API, when other nodes ask for an object. 35 | GetObject(id string) (io.ReadCloser, *FileMeta, error) 36 | // FindObject gets and object from any node, if not found then tries to acquire from 37 | // the remote storage, e.g. Amazon S3. 38 | FindObject(ctx context.Context, id string, fetch bool) (io.ReadCloser, *FileMeta, error) 39 | // FetchObject retrieves an object from the remote storage, e.g. Amazon S3. 40 | // This should be called only on a total cache miss, when file is not found 41 | // on any node of the cluster. If supplied ID is not a valid ULID, resulting meta will have a new ID. 42 | FetchObject(ctx context.Context, id string) (io.ReadCloser, *FileMeta, error) 43 | // PutObject writes object to the local storage, emits cluster announcements, optionally 44 | // writes object to remote storage, e.g. Amazon S3. Returns amount of bytes written. 45 | PutObject(r io.ReadCloser, meta *FileMeta) (int64, error) 46 | // DeleteObject marks object as deleted in journals and deletes it from the local storage. 47 | // This operation does not delete object from remote storage. 48 | DeleteObject(id string) (*FileMeta, error) 49 | // Diff finds the difference between serialized exernal journal represented as list, 50 | // and journals currently available on this local node. 51 | Diff(list FileMetaList) (added, deleted FileMetaList, err error) 52 | } 53 | 54 | var ErrNotFound = errors.New("not found") 55 | 56 | type DiskStats storage.DiskStats 57 | 58 | type EventAnnounce cluster.EventAnnounce 59 | 60 | type ConsistencyLevel journal.ConsistencyLevel 61 | 62 | func (c ConsistencyLevel) Check() (journal.ConsistencyLevel, error) { 63 | level := (journal.ConsistencyLevel)(c) 64 | switch level { 65 | case journal.ConsistencyLocal, journal.ConsistencyS3, journal.ConsistencyFull: 66 | return level, nil 67 | default: 68 | return 0, errors.New("objstore: invalid consistency level") 69 | } 70 | } 71 | 72 | const ( 73 | EventOpaqueData cluster.EventType = cluster.EventOpaqueData 74 | ) 75 | 76 | type storeState int 77 | 78 | const ( 79 | storeInactiveState storeState = 0 80 | storeSyncState storeState = 1 81 | storeActiveState storeState = 2 82 | ) 83 | 84 | type objStore struct { 85 | nodeID string 86 | debug bool 87 | 88 | stateMux *sync.RWMutex 89 | state storeState 90 | 91 | localStorage storage.LocalStorage 92 | remoteStorage storage.RemoteStorage 93 | journals journal.JournalManager 94 | cluster cluster.ClusterManager 95 | 96 | outboundWg *sync.WaitGroup 97 | outboundPump chan *EventAnnounce 98 | outboundAnnounces chan *EventAnnounce 99 | 100 | inboundWg *sync.WaitGroup 101 | inboundPump chan *EventAnnounce 102 | inboundAnnounces chan *EventAnnounce 103 | } 104 | 105 | func NewStore(nodeID string, 106 | localStorage storage.LocalStorage, 107 | remoteStorage storage.RemoteStorage, 108 | journals journal.JournalManager, 109 | cluster cluster.ClusterManager, 110 | ) (Store, error) { 111 | if !CheckID(nodeID) { 112 | return nil, errors.New("objstore: invalid node ID") 113 | } 114 | if localStorage == nil { 115 | return nil, errors.New("objstore: local storage not provided") 116 | } 117 | if remoteStorage == nil { 118 | return nil, errors.New("objstore: remote storage not provided") 119 | } 120 | if journals == nil { 121 | return nil, errors.New("objstore: journals manager not provided") 122 | } 123 | if cluster == nil { 124 | return nil, errors.New("objstore: cluster manager not provided") 125 | } 126 | if err := localStorage.CheckAccess(""); err != nil { 127 | err = fmt.Errorf("objstore: cannot access local storage: %v", err) 128 | return nil, err 129 | } 130 | if err := remoteStorage.CheckAccess(""); err != nil { 131 | err = fmt.Errorf("objstore: cannot access remote storage: %v", err) 132 | return nil, err 133 | } 134 | if err := journals.Create(journal.ID(nodeID)); err != nil { 135 | err = fmt.Errorf("objstore: unable to create new journal: %v", err) 136 | return nil, err 137 | } 138 | outboundAnnounces := make(chan *EventAnnounce, 1024) 139 | inboundAnnounces := make(chan *EventAnnounce, 1024) 140 | store := &objStore{ 141 | nodeID: nodeID, 142 | stateMux: new(sync.RWMutex), 143 | 144 | localStorage: localStorage, 145 | remoteStorage: remoteStorage, 146 | journals: journals, 147 | cluster: cluster, 148 | 149 | outboundWg: new(sync.WaitGroup), 150 | outboundPump: pumpEventAnnounces(outboundAnnounces), 151 | outboundAnnounces: outboundAnnounces, 152 | 153 | inboundWg: new(sync.WaitGroup), 154 | inboundPump: pumpEventAnnounces(inboundAnnounces), 155 | inboundAnnounces: inboundAnnounces, 156 | } 157 | store.processInbound(4, 10*time.Minute) 158 | store.processOutbound(4, 10*time.Minute) 159 | go func() { 160 | time.Sleep(2 * time.Second) 161 | var synced bool 162 | for !synced { 163 | synced = store.sync(10 * time.Minute) 164 | time.Sleep(2 * time.Second) 165 | } 166 | if store.debug { 167 | log.Println("[INFO] sync done") 168 | } 169 | }() 170 | go func() { 171 | listJournals := func() { 172 | list, err := store.journals.ListAll() 173 | if err != nil { 174 | log.Println("[WARN] error listing journals", err) 175 | return 176 | } 177 | log.Println("[INFO] node journals:") 178 | log.Println(list) 179 | } 180 | for { 181 | for !store.IsReady() { 182 | time.Sleep(2 * time.Second) 183 | } 184 | if store.debug { 185 | listJournals() 186 | } 187 | ts := time.Now() 188 | _, err := store.journals.JoinAll(journal.ID(nodeID)) 189 | if err != nil { 190 | log.Println("[WARN] journal consolidation failed:", err) 191 | } else if store.debug { 192 | log.Println("[INFO] consolidation done in", time.Since(ts)) 193 | listJournals() 194 | } 195 | time.Sleep(24 * time.Hour) 196 | } 197 | }() 198 | return store, nil 199 | } 200 | 201 | func (o *objStore) sync(timeout time.Duration) bool { 202 | nodes, err := o.cluster.ListNodes() 203 | if err != nil { 204 | closer.Fatalln("[WARN] list nodes failed, sync cancelled:", err) 205 | } else if len(nodes) < 2 { 206 | o.stateMux.Lock() 207 | o.state = storeActiveState 208 | o.stateMux.Unlock() 209 | return false 210 | } 211 | o.stateMux.Lock() 212 | o.state = storeInactiveState 213 | o.stateMux.Unlock() 214 | 215 | list, err := o.journals.ExportAll() 216 | if err != nil { 217 | closer.Fatalln("[WARN] list journals failed, sync cancelled:", err) 218 | } 219 | 220 | wg := new(sync.WaitGroup) 221 | ctx, cancelFn := context.WithTimeout(context.Background(), timeout) 222 | 223 | var listAdded journal.FileMetaList 224 | var listDeleted journal.FileMetaList 225 | 226 | for _, node := range nodes { 227 | if node.ID == o.nodeID { 228 | continue 229 | } 230 | wg.Add(1) 231 | go func(node *cluster.NodeInfo) { 232 | defer wg.Done() 233 | 234 | added, deleted, err := o.cluster.Sync(ctx, node.ID, list) 235 | if err != nil { 236 | log.Println("[WARN] sync error:", err) 237 | } else { 238 | listAdded = append(listAdded, added...) 239 | listDeleted = append(listDeleted, deleted...) 240 | } 241 | }(node) 242 | } 243 | wg.Wait() 244 | cancelFn() 245 | 246 | setAdded := make(map[string]*journal.FileMeta, len(listAdded)) 247 | setDeleted := make(map[string]*journal.FileMeta, len(listDeleted)) 248 | 249 | for _, meta := range listAdded { 250 | if m, ok := setAdded[meta.ID]; ok { 251 | if meta.Timestamp > m.Timestamp { 252 | setAdded[meta.ID] = meta 253 | continue 254 | } 255 | } 256 | setAdded[meta.ID] = meta 257 | } 258 | for _, meta := range listDeleted { 259 | if mAdd, ok := setAdded[meta.ID]; ok { 260 | // added already, check priority by age 261 | if mAdd.Timestamp > meta.Timestamp { 262 | continue // skip this delete event 263 | } else { 264 | delete(setAdded, meta.ID) 265 | } 266 | } 267 | if m, ok := setDeleted[meta.ID]; ok { 268 | if meta.Timestamp > m.Timestamp { 269 | setDeleted[meta.ID] = meta 270 | continue 271 | } 272 | } 273 | setDeleted[meta.ID] = meta 274 | } 275 | 276 | err = o.journals.Update(journal.ID(o.nodeID), 277 | func(j journal.Journal, _ *journal.JournalMeta) error { 278 | for _, meta := range setAdded { 279 | if meta.IsDeleted { 280 | // missing in our records, but marked as deleted elsewere 281 | o.localStorage.Delete(meta.ID) 282 | meta.IsSymlink = true 283 | if err := j.Set(meta.ID, meta); err != nil { 284 | log.Println("[WARN] journal set:", err) 285 | } 286 | continue 287 | } 288 | switch meta.Consistency { 289 | case journal.ConsistencyLocal, journal.ConsistencyS3: 290 | // stored elsewere 291 | meta.IsSymlink = true 292 | if err := j.Set(meta.ID, meta); err != nil { 293 | log.Println("[WARN] journal set:", err) 294 | } 295 | case journal.ConsistencyFull: 296 | // must replicate, i.e. handle the missing announce 297 | meta.IsSymlink = true // temporarily, will be overridden once replicated 298 | o.ReceiveEventAnnounce(&EventAnnounce{ 299 | Type: cluster.EventFileAdded, 300 | FileMeta: meta, 301 | }) 302 | if err := j.Set(meta.ID, meta); err != nil { 303 | log.Println("[WARN] journal set:", err) 304 | } 305 | } 306 | } 307 | return nil 308 | }) 309 | if err != nil { 310 | closer.Fatalln("[WARN] failed to sync journal:", err) 311 | } 312 | 313 | o.stateMux.Lock() 314 | o.state = storeActiveState 315 | o.stateMux.Unlock() 316 | 317 | for _, meta := range setDeleted { 318 | if meta.IsDeleted { 319 | // some nodes missing info we have on deleted object 320 | o.EmitEventAnnounce(&EventAnnounce{ 321 | Type: cluster.EventFileDeleted, 322 | FileMeta: meta, 323 | }) 324 | continue 325 | } 326 | // some nodes are missing our file 327 | o.EmitEventAnnounce(&EventAnnounce{ 328 | Type: cluster.EventFileAdded, 329 | FileMeta: meta, 330 | }) 331 | } 332 | 333 | return true 334 | } 335 | 336 | func (o *objStore) processOutbound(workers int, emitTimeout time.Duration) { 337 | for i := 0; i < workers; i++ { 338 | o.outboundWg.Add(1) 339 | go func() { 340 | defer o.outboundWg.Done() 341 | 342 | for !o.IsReady() { 343 | time.Sleep(100 * time.Millisecond) 344 | } 345 | for ev := range o.outboundAnnounces { 346 | if err := o.emitEvent(ev, emitTimeout); err != nil { 347 | log.Println("[WARN] emitting event:", err) 348 | } 349 | } 350 | }() 351 | } 352 | } 353 | 354 | func (o *objStore) processInbound(workers int, timeout time.Duration) { 355 | for i := 0; i < workers; i++ { 356 | o.inboundWg.Add(1) 357 | go func() { 358 | defer o.inboundWg.Done() 359 | 360 | for !o.IsReady() { 361 | time.Sleep(100 * time.Millisecond) 362 | } 363 | for ev := range o.inboundAnnounces { 364 | if err := o.handleEvent(ev, timeout); err != nil { 365 | log.Println("[WARN] handling event:", err) 366 | } 367 | } 368 | }() 369 | } 370 | } 371 | 372 | func (o *objStore) IsReady() bool { 373 | o.stateMux.RLock() 374 | ready := o.state == storeActiveState 375 | o.stateMux.RUnlock() 376 | return ready 377 | } 378 | 379 | func (o *objStore) Close() error { 380 | o.inboundPump <- &EventAnnounce{ 381 | Type: cluster.EventStopAnnounce, 382 | } 383 | o.outboundPump <- &EventAnnounce{ 384 | Type: cluster.EventStopAnnounce, 385 | } 386 | return nil 387 | } 388 | 389 | func (o *objStore) WaitOutbound(timeout time.Duration) { 390 | waitWG(o.outboundWg, timeout) 391 | } 392 | 393 | func (o *objStore) WaitInbound(timeout time.Duration) { 394 | waitWG(o.inboundWg, timeout) 395 | } 396 | 397 | func waitWG(wg *sync.WaitGroup, timeout time.Duration) { 398 | done := make(chan struct{}) 399 | go func() { 400 | wg.Wait() 401 | select { 402 | case <-done: 403 | default: 404 | close(done) 405 | } 406 | }() 407 | select { 408 | case <-time.Tick(timeout): 409 | case <-done: 410 | } 411 | } 412 | 413 | // ReceiveEventAnnounce never blocks. Internal workers will eventually handle the received events. 414 | func (o *objStore) ReceiveEventAnnounce(event *EventAnnounce) { 415 | if event.Type == cluster.EventStopAnnounce { 416 | return 417 | } 418 | o.inboundPump <- event 419 | } 420 | 421 | // EmitEventAnnounce never blocks. Internal workers will eventually handle the events to emit. 422 | func (o *objStore) EmitEventAnnounce(event *EventAnnounce) { 423 | if event.Type == cluster.EventStopAnnounce { 424 | return 425 | } 426 | o.outboundPump <- event 427 | } 428 | 429 | func (s *objStore) NodeID() string { 430 | return s.nodeID 431 | } 432 | 433 | func GenerateID() string { 434 | return journal.GetULID() 435 | } 436 | 437 | func CheckID(str string) bool { 438 | id, err := ulid.Parse(str) 439 | if err != nil { 440 | log.Printf("[WARN] ULID is invalid: %s: %v", str, err) 441 | return false 442 | } 443 | ts := time.Unix(int64(id.Time()/1000), 0) 444 | if ts.Before(time.Date(2010, 0, 0, 0, 0, 0, 0, time.UTC)) || 445 | ts.After(time.Date(2100, 0, 0, 0, 0, 0, 0, time.UTC)) { 446 | log.Println("[WARN] ULID has timestamp:", ts, "which is not current") 447 | return false 448 | } 449 | return true 450 | } 451 | 452 | func (o *objStore) emitEvent(ev *EventAnnounce, timeout time.Duration) error { 453 | ctx, cancelFn := context.WithTimeout(context.Background(), timeout) 454 | defer cancelFn() 455 | wg := new(sync.WaitGroup) 456 | defer wg.Wait() 457 | nodes, err := o.cluster.ListNodes() 458 | if err != nil { 459 | return err 460 | } 461 | for _, node := range nodes { 462 | if node.ID == o.nodeID { 463 | continue 464 | } 465 | wg.Add(1) 466 | go func(node *cluster.NodeInfo) { 467 | defer wg.Done() 468 | if err := o.cluster.Announce(ctx, node.ID, (*cluster.EventAnnounce)(ev)); err != nil { 469 | log.Println("[WARN] announce error:", err) 470 | } 471 | }(node) 472 | } 473 | return nil 474 | } 475 | 476 | func (o *objStore) findOnCluster(ctx context.Context, id string) (io.ReadCloser, error) { 477 | nodes, err := o.cluster.ListNodes() 478 | if err != nil { 479 | err = fmt.Errorf("objstore: cannot discover nodes: %v", err) 480 | return nil, err 481 | } else if len(nodes) < 2 { 482 | // no other nodes except us.. 483 | return nil, ErrNotFound 484 | } 485 | found := make(chan io.ReadCloser, len(nodes)) 486 | wg := new(sync.WaitGroup) 487 | for _, node := range nodes { 488 | if node.ID == o.nodeID { 489 | continue 490 | } 491 | wg.Add(1) 492 | go func(node *cluster.NodeInfo) { 493 | defer wg.Done() 494 | if r, err := o.cluster.GetObject(ctx, node.ID, id); err == nil { 495 | found <- r 496 | } else if err != cluster.ErrNotFound { 497 | log.Println("[WARN] cluster error:", err) 498 | } 499 | }(node) 500 | } 501 | 502 | go func() { 503 | wg.Wait() 504 | close(found) 505 | }() 506 | // found will be closed if all workers done, 507 | // or we get at least 1 result from the channel. 508 | if r, ok := <-found; ok { 509 | return r, nil 510 | } 511 | return nil, ErrNotFound 512 | } 513 | 514 | func (o *objStore) handleEvent(ev *EventAnnounce, timeout time.Duration) error { 515 | switch ev.Type { 516 | case cluster.EventFileAdded: 517 | if ev.FileMeta == nil { 518 | log.Println("[WARN] skipping added event with no meta") 519 | return nil 520 | } 521 | id := ev.FileMeta.ID 522 | meta := (*FileMeta)(ev.FileMeta) 523 | if meta.Consistency == journal.ConsistencyFull { 524 | // need to replicate the file locally 525 | ctx, cancelFn := context.WithTimeout(context.Background(), timeout) 526 | r, err := o.findOnCluster(ctx, id) 527 | cancelFn() 528 | if err == ErrNotFound { 529 | if o.debug { 530 | log.Println("[INFO] file not found on cluster:", ev.FileMeta) 531 | } 532 | // object not found on cluster, fetch from remote store 533 | r, meta, err = o.FetchObject(ctx, id) 534 | if err == ErrNotFound { 535 | // we simply bail out if the file is expected with full consistency but not 536 | // found on the cluster and the remote storage. 537 | log.Println("[WARN] unable to find object for:", ev.FileMeta) 538 | return nil 539 | } 540 | meta.Consistency = journal.ConsistencyFull 541 | id = meta.ID // id is the same or new 542 | } 543 | meta.IsSymlink = false 544 | if _, err := o.storeLocal(r, meta); err != nil { 545 | r.Close() 546 | log.Println("[WARN] failed to fetch and store object:", err) 547 | return nil 548 | } 549 | r.Close() 550 | } else { 551 | meta.IsSymlink = true 552 | } 553 | if err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error { 554 | if j.ID() == journal.ID(o.nodeID) { 555 | return j.Set(id, (*journal.FileMeta)(meta)) 556 | } 557 | return j.Delete(id) 558 | }); err != nil { 559 | return err 560 | } 561 | case cluster.EventFileDeleted: 562 | if ev.FileMeta == nil { 563 | log.Println("[WARN] skipping deleted event with no meta") 564 | return nil 565 | } 566 | var found bool 567 | id := ev.FileMeta.ID 568 | err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error { 569 | if m := j.Get(id); m != nil { 570 | found = true 571 | m.IsDeleted = true 572 | m.Timestamp = time.Now().UnixNano() 573 | if err := j.Set(id, m); err != nil { 574 | return err 575 | } 576 | return journal.ForEachStop 577 | } 578 | return nil 579 | }) 580 | if err != nil { 581 | err = fmt.Errorf("objstore: journal update failed: %v", err) 582 | return err 583 | } else if found { 584 | if err := o.localStorage.Delete(id); err != nil { 585 | log.Println("[WARN] failed to delete local file:", err) 586 | } 587 | } 588 | case cluster.EventOpaqueData: 589 | log.Println("[INFO] cluster message:", string(ev.OpaqueData)) 590 | default: 591 | log.Println("[WARN] skipping illegal cluster event type", ev.Type) 592 | } 593 | return nil 594 | } 595 | 596 | func (o *objStore) DiskStats() (*DiskStats, error) { 597 | ds, err := o.localStorage.DiskStats() 598 | if err != nil { 599 | return nil, err 600 | } 601 | return (*DiskStats)(ds), nil 602 | } 603 | 604 | type FileMeta journal.FileMeta 605 | type FileMetaList journal.FileMetaList 606 | 607 | func (o *objStore) HeadObject(id string) (*FileMeta, error) { 608 | var meta *FileMeta 609 | err := o.journals.ForEach(func(j journal.Journal, _ *journal.JournalMeta) error { 610 | if m := j.Get(id); m != nil { 611 | meta = (*FileMeta)(m) 612 | return journal.ForEachStop 613 | } 614 | return nil 615 | }) 616 | if err != nil { 617 | return nil, err 618 | } else if meta == nil { 619 | return nil, ErrNotFound 620 | } 621 | return meta, nil 622 | } 623 | 624 | func (o *objStore) GetObject(id string) (io.ReadCloser, *FileMeta, error) { 625 | var meta *FileMeta 626 | err := o.journals.ForEach(func(j journal.Journal, _ *journal.JournalMeta) error { 627 | if m := j.Get(id); m != nil { 628 | meta = (*FileMeta)(m) 629 | return journal.ForEachStop 630 | } 631 | return nil 632 | }) 633 | if err != nil { 634 | return nil, nil, err 635 | } else if meta == nil { 636 | return nil, nil, ErrNotFound 637 | } 638 | if meta.IsSymlink { 639 | // file should be located somewhere else, we don't have that file 640 | return nil, meta, ErrNotFound 641 | } else if meta.IsDeleted { 642 | return nil, meta, ErrNotFound 643 | } 644 | f, err := o.localStorage.Read(id) 645 | if err != nil { 646 | log.Println("[WARN] file not found on disk:", (*journal.FileMeta)(meta).String()) 647 | return nil, meta, ErrNotFound 648 | } 649 | return f, meta, nil 650 | } 651 | 652 | func (o *objStore) FindObject(ctx context.Context, 653 | id string, fetch bool) (io.ReadCloser, *FileMeta, error) { 654 | r, meta, err := o.GetObject(id) 655 | if err == nil { 656 | // found locally 657 | return r, meta, nil 658 | } else if err != ErrNotFound { 659 | log.Println("[WARN]", err) 660 | } 661 | if meta == nil && !fetch { 662 | // completely not found -> file has been removed 663 | return nil, nil, ErrNotFound 664 | } else if meta != nil { 665 | r, err = o.findOnCluster(ctx, id) 666 | if err == nil { 667 | return r, meta, err 668 | } else if err != ErrNotFound { 669 | log.Println("[WARN] error when finding object:", err) 670 | } 671 | if o.debug { 672 | log.Println("[INFO] file not found on cluster:", id) 673 | } 674 | } 675 | // fetch from remote store 676 | r, meta, err = o.FetchObject(ctx, id) 677 | if err == ErrNotFound { 678 | return nil, nil, ErrNotFound 679 | } else if err != nil { 680 | log.Println("[WARN] unknown error:", err) 681 | return nil, nil, err 682 | } 683 | // id is the same or new 684 | id = meta.ID 685 | // store it locally 686 | meta.IsSymlink = false 687 | if (meta.Consistency) == 0 { 688 | meta.Consistency = journal.ConsistencyS3 689 | } 690 | meta.Timestamp = time.Now().UnixNano() 691 | if _, err := o.storeLocal(r, meta); err != nil { 692 | r.Close() 693 | log.Println("[WARN] failed to fetch and store object:", err) 694 | return nil, meta, err 695 | } 696 | r.Close() 697 | // update journals 698 | if err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error { 699 | if j.ID() == journal.ID(o.nodeID) { 700 | return j.Set(id, (*journal.FileMeta)(meta)) 701 | } 702 | return j.Delete(id) 703 | }); err != nil { 704 | return nil, meta, err 705 | } 706 | o.EmitEventAnnounce(&EventAnnounce{ 707 | Type: cluster.EventFileAdded, 708 | FileMeta: (*journal.FileMeta)(meta), 709 | }) 710 | // serve from local storage 711 | f, err := o.localStorage.Read(id) 712 | if err != nil { 713 | log.Println("[WARN] file not found on disk:", meta) 714 | return nil, meta, ErrNotFound 715 | } 716 | copyMeta := *meta 717 | copyMeta.IsFetched = true 718 | return f, ©Meta, nil 719 | } 720 | 721 | func (o *objStore) FetchObject(ctx context.Context, id string) (io.ReadCloser, *FileMeta, error) { 722 | spec, err := o.remoteStorage.GetObject(id) 723 | if err == storage.ErrNotFound { 724 | return nil, nil, ErrNotFound 725 | } else if err != nil { 726 | return nil, nil, err 727 | } 728 | meta := new(journal.FileMeta) 729 | meta.Unmap(spec.Meta) 730 | meta.ID = id 731 | if !CheckID(id) { 732 | // generate a new ID for the file to store in the journals 733 | meta.ID = GenerateID() 734 | } 735 | if spec.Size > 0 { 736 | meta.Size = spec.Size 737 | } 738 | return spec.Body, (*FileMeta)(meta), nil 739 | } 740 | 741 | func (o *objStore) storeLocal(r io.Reader, meta *FileMeta) (written int64, err error) { 742 | written, err = o.localStorage.Write(meta.ID, r) 743 | if err != nil { 744 | return 745 | } 746 | journalID := journal.ID(o.nodeID) 747 | var journalOk bool 748 | if err = o.journals.ForEachUpdate( 749 | func(j journal.Journal, _ *journal.JournalMeta) error { 750 | if journalID == j.ID() { 751 | journalOk = true 752 | return j.Set(meta.ID, (*journal.FileMeta)(meta)) 753 | } 754 | return j.Delete(meta.ID) 755 | }); err != nil { 756 | return 757 | } 758 | if !journalOk { 759 | err = fmt.Errorf("objstore: journal not found: %v", journalID) 760 | return 761 | } 762 | return 763 | } 764 | 765 | func (o *objStore) PutObject(r io.ReadCloser, meta *FileMeta) (int64, error) { 766 | switch meta.Consistency { 767 | case journal.ConsistencyLocal: 768 | written, err := o.storeLocal(r, meta) 769 | if err != nil { 770 | r.Close() 771 | err = fmt.Errorf("objstore: local store failed: %v", err) 772 | return written, err 773 | } 774 | r.Close() 775 | o.EmitEventAnnounce(&EventAnnounce{ 776 | Type: cluster.EventFileAdded, 777 | FileMeta: (*journal.FileMeta)(meta), 778 | }) 779 | case journal.ConsistencyS3, journal.ConsistencyFull: 780 | written, err := o.storeLocal(r, meta) 781 | if err != nil { 782 | r.Close() 783 | err = fmt.Errorf("objstore: local store failed: %v", err) 784 | return written, err 785 | } 786 | r.Close() 787 | o.EmitEventAnnounce(&EventAnnounce{ 788 | Type: cluster.EventFileAdded, 789 | FileMeta: (*journal.FileMeta)(meta), 790 | }) 791 | // for optimal S3 uploads we should provide io.ReadSeeker, 792 | // this is why we store object as local file first, then upload to S3. 793 | f, err := o.localStorage.Read(meta.ID) 794 | if err != nil { 795 | err = fmt.Errorf("objstore: local store missing file: %v", err) 796 | return written, err 797 | } 798 | defer f.Close() 799 | 800 | if _, err = o.remoteStorage.PutObject(meta.ID, f, (*journal.FileMeta)(meta).Map()); err != nil { 801 | err = fmt.Errorf("objstore: remote store failed: %v", err) 802 | return written, err 803 | } 804 | return written, nil 805 | default: 806 | return 0, fmt.Errorf("objstore: unknown consistency %v", meta.Consistency) 807 | } 808 | return 0, nil 809 | } 810 | 811 | func (o *objStore) DeleteObject(id string) (*FileMeta, error) { 812 | var meta *FileMeta 813 | err := o.journals.ForEachUpdate(func(j journal.Journal, _ *journal.JournalMeta) error { 814 | if m := j.Get(id); m != nil { 815 | m.IsDeleted = true 816 | m.Timestamp = time.Now().UnixNano() 817 | if err := j.Set(id, m); err != nil { 818 | return err 819 | } 820 | meta = (*FileMeta)(m) 821 | return journal.ForEachStop 822 | } 823 | return nil 824 | }) 825 | if err != nil { 826 | return nil, err 827 | } else if meta == nil { 828 | return nil, ErrNotFound 829 | } 830 | o.EmitEventAnnounce(&EventAnnounce{ 831 | Type: cluster.EventFileDeleted, 832 | FileMeta: (*journal.FileMeta)(meta), 833 | }) 834 | if err := o.localStorage.Delete(id); err != nil { 835 | log.Println("[WARN] failed to delete local file:", err) 836 | } 837 | return meta, nil 838 | } 839 | 840 | func (o *objStore) Diff(list FileMetaList) (added, deleted FileMetaList, err error) { 841 | internal, err := o.journals.ExportAll() 842 | if err != nil { 843 | err := fmt.Errorf("objstore: failed to collect journals: %v", err) 844 | return nil, nil, err 845 | } 846 | internalJournal := journal.MakeJournal("", internal) 847 | externalJournal := journal.MakeJournal("", (journal.FileMetaList)(list)) 848 | add, del := externalJournal.Diff(internalJournal) 849 | return (FileMetaList)(add), (FileMetaList)(del), nil 850 | } 851 | 852 | func (o *objStore) SetDebug(v bool) { 853 | o.debug = v 854 | } 855 | --------------------------------------------------------------------------------