├── .gitignore ├── dialect ├── mysql │ └── mysql.go └── generic.go ├── dialect.go ├── kv ├── event.go ├── client.go ├── watcher.go └── store.go ├── db.go ├── watch.go ├── client.go ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /dialect/mysql/mysql.go: -------------------------------------------------------------------------------- 1 | package mysql 2 | 3 | import ( 4 | _ "github.com/go-sql-driver/mysql" 5 | "github.com/rancher/k8s-sql" 6 | "github.com/rancher/k8s-sql/dialect" 7 | ) 8 | 9 | func init() { 10 | rdbms.Register("mysql", NewMySQL()) 11 | } 12 | 13 | func NewMySQL() *dialect.Generic { 14 | return &dialect.Generic{ 15 | CleanupSQL: "delete from key_value where ttl > 0 and ttl < ?", 16 | GetSQL: "select name, value, revision from key_value where name = ?", 17 | ListSQL: "select name, value, revision from key_value where name like ?", 18 | ListRevisionSQL: "select name, value, revision from key_value where revision > ? and name like ?", 19 | CreateSQL: "insert into key_value(name, value, revision, ttl) values(?, ?, ?, ?)", 20 | DeleteSQL: "delete from key_value where name = ? and revision = ?", 21 | UpdateSQL: "update key_value set value = ?, revision = ? where name = ? and revision = ?", 22 | GetRevisionSQL: "select max(revision) from key_value", 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /dialect.go: -------------------------------------------------------------------------------- 1 | package rdbms 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "errors" 7 | 8 | "github.com/rancher/k8s-sql/kv" 9 | ) 10 | 11 | var ( 12 | ErrRevisionMatch = errors.New("Revision does not match") 13 | dialects = map[string]dialect{} 14 | ) 15 | 16 | func Register(name string, d dialect) { 17 | dialects[name] = d 18 | } 19 | 20 | type dialect interface { 21 | Start(ctx context.Context, db *sql.DB) error 22 | 23 | Get(ctx context.Context, db *sql.DB, key string) (*kv.KeyValue, error) 24 | 25 | List(ctx context.Context, db *sql.DB, rev int64, key string) ([]*kv.KeyValue, error) 26 | 27 | Create(ctx context.Context, db *sql.DB, key string, value []byte, ttl uint64) error 28 | 29 | Delete(ctx context.Context, db *sql.DB, key string, revision *int64) (*kv.KeyValue, error) 30 | 31 | // Update should return ErrNotExist when the key does not exist and ErrRevisionMatch when revision doesn't match 32 | Update(ctx context.Context, db *sql.DB, key string, value []byte, revision int64) (oldKv *kv.KeyValue, newKv *kv.KeyValue, err error) 33 | } 34 | -------------------------------------------------------------------------------- /kv/event.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2016 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package kv 18 | 19 | type event struct { 20 | key string 21 | value []byte 22 | prevValue []byte 23 | rev int64 24 | isDeleted bool 25 | isCreated bool 26 | } 27 | 28 | // parseKV converts a KeyValue retrieved from an initial sync() listing to a synthetic isCreated event. 29 | func parseKV(kv *KeyValue) *event { 30 | return &event{ 31 | key: string(kv.Key), 32 | value: kv.Value, 33 | prevValue: nil, 34 | rev: kv.Revision, 35 | isDeleted: false, 36 | isCreated: true, 37 | } 38 | } 39 | 40 | func parseEvent(e Event) *event { 41 | ret := &event{ 42 | key: string(e.Kv.Key), 43 | value: e.Kv.Value, 44 | rev: e.Kv.Revision, 45 | isDeleted: e.Delete, 46 | isCreated: e.Create, 47 | } 48 | if e.PrevKv != nil { 49 | ret.prevValue = e.PrevKv.Value 50 | } 51 | return ret 52 | } 53 | -------------------------------------------------------------------------------- /kv/client.go: -------------------------------------------------------------------------------- 1 | package kv 2 | 3 | import ( 4 | "errors" 5 | 6 | "golang.org/x/net/context" 7 | ) 8 | 9 | var ( 10 | ErrExists = errors.New("Key exists") 11 | ErrNotExists = errors.New("Key and or Revision does not exists") 12 | ) 13 | 14 | type Client interface { 15 | Get(ctx context.Context, key string) (*KeyValue, error) 16 | 17 | // Similar to get but looks for "like 'key%'" 18 | List(ctx context.Context, revision int64, key string) ([]*KeyValue, error) 19 | 20 | // Should return ErrExists on conflict 21 | Create(ctx context.Context, key string, value []byte, ttl uint64) (*KeyValue, error) 22 | 23 | // Should return ErrNotExists on conflict 24 | Delete(ctx context.Context, key string) (*KeyValue, error) 25 | 26 | // Should return ErrNotExist 27 | DeleteVersion(ctx context.Context, key string, revision int64) error 28 | 29 | // Should return ErrNotExists, if key doesn't exist it should be created 30 | UpdateOrCreate(ctx context.Context, key string, value []byte, revision int64, ttl uint64) (*KeyValue, error) 31 | 32 | Watch(ctx context.Context, revision int64, key string) ([]*KeyValue, WatchChan, error) 33 | } 34 | 35 | type WatchChan <-chan WatchResponse 36 | 37 | type WatchResponse struct { 38 | Events []Event 39 | err error 40 | } 41 | 42 | func WatchResponseError(err error) WatchResponse { 43 | return WatchResponse{ 44 | err: err, 45 | } 46 | } 47 | 48 | func (wr *WatchResponse) Err() error { 49 | return wr.err 50 | } 51 | 52 | type Event struct { 53 | Create bool 54 | Delete bool 55 | Kv *KeyValue 56 | PrevKv *KeyValue 57 | } 58 | 59 | type KeyValue struct { 60 | Key string 61 | Value []byte 62 | Revision int64 63 | } 64 | -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | package rdbms 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "sync" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/rancher/k8s-sql/kv" 10 | "k8s.io/apiserver/pkg/storage" 11 | "k8s.io/apiserver/pkg/storage/storagebackend" 12 | "k8s.io/apiserver/pkg/storage/storagebackend/factory" 13 | "k8s.io/apiserver/pkg/storage/value" 14 | ) 15 | 16 | var ( 17 | ErrNoDSN = errors.New("DB DSN must be set as ServerList") 18 | // Just assume there is only one for now 19 | globalClient kv.Client 20 | globalClientLock sync.Mutex 21 | ) 22 | 23 | func NewRDBMSStorage(c storagebackend.Config) (storage.Interface, factory.DestroyFunc, error) { 24 | if len(c.ServerList) != 2 { 25 | return nil, nil, ErrNoDSN 26 | } 27 | 28 | driverName, dsn := c.ServerList[0], c.ServerList[1] 29 | 30 | dbClient, err := getClient(driverName, dsn) 31 | if err != nil { 32 | return nil, nil, err 33 | } 34 | 35 | transformer := c.Transformer 36 | if transformer == nil { 37 | transformer = value.NewMutableTransformer(value.IdentityTransformer) 38 | } 39 | 40 | return kv.New(dbClient, c.Codec, c.Prefix, transformer), func() {}, nil 41 | } 42 | 43 | func getClient(driverName, dsn string) (kv.Client, error) { 44 | globalClientLock.Lock() 45 | defer globalClientLock.Unlock() 46 | if globalClient != nil { 47 | return globalClient, nil 48 | } 49 | 50 | // Notice that we never close the DB connection or watcher (because this code assumes only one DB) 51 | // "Room for improvement" 52 | db, err := sql.Open(driverName, dsn) 53 | if err != nil { 54 | return nil, errors.Wrapf(err, "Failed to create DB(%s) connection", driverName) 55 | } 56 | 57 | dbClient, err := newClient(context.Background(), driverName, db) 58 | if err != nil { 59 | return nil, err 60 | } 61 | 62 | globalClient = dbClient 63 | return globalClient, nil 64 | } 65 | -------------------------------------------------------------------------------- /watch.go: -------------------------------------------------------------------------------- 1 | package rdbms 2 | 3 | import ( 4 | "io" 5 | "strings" 6 | 7 | "github.com/rancher/k8s-sql/kv" 8 | "golang.org/x/net/context" 9 | ) 10 | 11 | func (c *client) Watch(ctx context.Context, revision int64, key string) ([]*kv.KeyValue, kv.WatchChan, error) { 12 | watcher := c.createWatcher(ctx, key) 13 | listResp, err := c.List(ctx, revision, key) 14 | return listResp, kv.WatchChan(watcher), err 15 | } 16 | 17 | func (c *client) watchEvents(ctx context.Context) { 18 | for { 19 | select { 20 | case <-ctx.Done(): 21 | c.closeWatchers() 22 | return 23 | case event := <-c.events: 24 | c.handleEvent(event) 25 | } 26 | } 27 | } 28 | 29 | func (c *client) closeWatchers() { 30 | c.Lock() 31 | defer c.Unlock() 32 | 33 | for _, watchers := range c.watchers { 34 | for _, watcher := range watchers { 35 | watcher <- kv.WatchResponseError(io.EOF) 36 | } 37 | } 38 | } 39 | 40 | func (c *client) handleEvent(event kv.Event) { 41 | var watchers []watchChan 42 | c.Lock() 43 | for k, v := range c.watchers { 44 | if strings.HasPrefix(event.Kv.Key, k) { 45 | watchers = append(watchers, v...) 46 | } 47 | } 48 | c.Unlock() 49 | 50 | for _, watcher := range watchers { 51 | watcher <- kv.WatchResponse{ 52 | Events: []kv.Event{event}, 53 | } 54 | } 55 | } 56 | 57 | func (c *client) createWatcher(ctx context.Context, key string) chan kv.WatchResponse { 58 | c.Lock() 59 | defer c.Unlock() 60 | 61 | watcher := make(watchChan, chanSize) 62 | c.watchers[key] = append(c.watchers[key], watcher) 63 | 64 | go func() { 65 | <-ctx.Done() 66 | c.removeWatcher(key, watcher) 67 | }() 68 | 69 | return watcher 70 | } 71 | 72 | func (c *client) removeWatcher(key string, watcher watchChan) { 73 | c.Lock() 74 | defer c.Unlock() 75 | 76 | var newList []watchChan 77 | for _, i := range c.watchers[key] { 78 | if i != watcher { 79 | newList = append(newList, i) 80 | } 81 | } 82 | if len(newList) == 0 { 83 | delete(c.watchers, key) 84 | } else { 85 | c.watchers[key] = newList 86 | } 87 | } 88 | 89 | func (c *client) created(val *kv.KeyValue) { 90 | c.events <- kv.Event{ 91 | Create: true, 92 | Kv: val, 93 | } 94 | } 95 | 96 | func (c *client) updated(oldVal, val *kv.KeyValue) { 97 | c.events <- kv.Event{ 98 | Kv: val, 99 | PrevKv: oldVal, 100 | } 101 | } 102 | 103 | func (c *client) deleted(val *kv.KeyValue) { 104 | c.events <- kv.Event{ 105 | Delete: true, 106 | Kv: val, 107 | PrevKv: val, 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /client.go: -------------------------------------------------------------------------------- 1 | package rdbms 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "sync" 7 | 8 | "github.com/rancher/k8s-sql/kv" 9 | "golang.org/x/net/context" 10 | ) 11 | 12 | const chanSize = 1000 13 | 14 | type watchChan chan kv.WatchResponse 15 | type scanner func(dest ...interface{}) error 16 | 17 | func newClient(ctx context.Context, dialectName string, db *sql.DB) (kv.Client, error) { 18 | dialect, ok := dialects[dialectName] 19 | if !ok { 20 | return nil, fmt.Errorf("Failed to find dialect %v", dialectName) 21 | } 22 | 23 | if err := dialect.Start(ctx, db); err != nil { 24 | return nil, err 25 | } 26 | 27 | client := &client{ 28 | db: db, 29 | dialect: dialect, 30 | events: make(chan kv.Event, chanSize), 31 | watchers: map[string][]watchChan{}, 32 | } 33 | go client.watchEvents(ctx) 34 | 35 | return client, nil 36 | } 37 | 38 | type client struct { 39 | sync.Mutex 40 | db *sql.DB 41 | dialect dialect 42 | events chan kv.Event 43 | watchers map[string][]watchChan 44 | } 45 | 46 | func (c *client) Get(ctx context.Context, key string) (*kv.KeyValue, error) { 47 | return c.dialect.Get(ctx, c.db, key) 48 | } 49 | 50 | func (c *client) List(ctx context.Context, revision int64, key string) ([]*kv.KeyValue, error) { 51 | return c.dialect.List(ctx, c.db, revision, key) 52 | } 53 | 54 | func (c *client) Create(ctx context.Context, key string, value []byte, ttl uint64) (*kv.KeyValue, error) { 55 | err := c.dialect.Create(ctx, c.db, key, value, ttl) 56 | // TODO: Check for specific error? Don't just assume the key is taken 57 | if err != nil { 58 | return nil, kv.ErrExists 59 | } 60 | 61 | result := &kv.KeyValue{ 62 | Key: key, 63 | Value: value, 64 | Revision: 1, 65 | } 66 | c.created(result) 67 | return result, nil 68 | } 69 | 70 | func (c *client) Delete(ctx context.Context, key string) (*kv.KeyValue, error) { 71 | return c.deleteVersion(ctx, key, nil) 72 | } 73 | 74 | func (c *client) DeleteVersion(ctx context.Context, key string, revision int64) error { 75 | _, err := c.deleteVersion(ctx, key, &revision) 76 | return err 77 | } 78 | 79 | func (c *client) deleteVersion(ctx context.Context, key string, revision *int64) (*kv.KeyValue, error) { 80 | value, err := c.dialect.Delete(ctx, c.db, key, revision) 81 | if err != nil { 82 | return nil, err 83 | } 84 | c.deleted(value) 85 | return value, nil 86 | } 87 | 88 | func (c *client) UpdateOrCreate(ctx context.Context, key string, value []byte, revision int64, ttl uint64) (*kv.KeyValue, error) { 89 | oldKv, newKv, err := c.dialect.Update(ctx, c.db, key, value, revision) 90 | if err == ErrRevisionMatch { 91 | return nil, kv.ErrNotExists 92 | } else if err == kv.ErrNotExists { 93 | return c.Create(ctx, key, value, 0) 94 | } else if err != nil { 95 | return nil, err 96 | } 97 | 98 | c.updated(oldKv, newKv) 99 | return newKv, nil 100 | } 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repo is not maintained, the idea was continued forward using a different approach, refer to https://github.com/ibuildthecloud/etcd/tree/clientv3 2 | --------------------- 3 | 4 | SQL Storage Backend for Kubernetes 5 | ================================== 6 | 7 | Use a DB such as MySQL, RDS, or Cloud SQL for your Kubernetes cluster state. 8 | 9 | Why? 10 | ---- 11 | 12 | This has very little to do with the technical merits of etcd or a traditional SQL database. Both are persistent systems that require consideration when running in production. Given various factors in your organization a SQL database may be more desirable to run over etcd. This may be due to in house expertise or the fact that services such as RDS and CloudSQL are readily available. 13 | 14 | Another consideration is that SQL is a frontend added to many non-traditional databases. Because this implementation uses very simple SQL statements (single table, no joins, simple SELECT, UPDATE, DELETE) it may be possible to leverage this interface to bring more storage backends to Kubernetes. 15 | 16 | And lastly, I (@ibuildthecloud) like to experiment and often do things because I can :) 17 | 18 | How? 19 | ---- 20 | 21 | Come to find out Kubernetes itself handles a lot of magic of data access. The access patterns are quite basic. While etcd has many incredibly useful features, a lot of them aren't used. Kubernetes need basic CRUD operations, TTL, and change notifications. Change notifications are the only thing that considers some thought. Right now it's very simple and stupid and done based off the assumption that there is only one API server so changes are easy to track. For HA something else will have to be implemented but there are many smart approaches for that I'm sure. 22 | 23 | 24 | Try it out 25 | ---------- 26 | 27 | While the code is fairly generic SQL, it currently is only tested with MySQL. Adopting to another RDBMS should be a trivial effort. But now... use MySQL. 28 | 29 | 30 | 1. Create a database with the following schema 31 | 32 | ```sql 33 | CREATE TABLE `key_value` ( 34 | `name` varchar(255) DEFAULT NULL, 35 | `value` mediumblob, 36 | `revision` bigint(20) DEFAULT NULL, 37 | `ttl` bigint(20) NOT NULL DEFAULT '0' 38 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 39 | 40 | ALTER TABLE `key_value` 41 | ADD UNIQUE KEY `uix_key_value_name` (`name`), 42 | ADD KEY `idx_key_value__ttl` (`ttl`); 43 | ``` 44 | 45 | 2. [Download](https://github.com/rancher/k8s-sql/releases/download/v0.0.1/kube-apiserver.xz) this custom patched Kubernetes API Server 1.7.4 server [[Source]](https://github.com/rancher/kubernetes/releases/tag/v1.7.6-netes1) 46 | 47 | 3. Run with your usual args but add the additional arguments 48 | 49 | ``` 50 | --storage-backend=rdbms 51 | --etcd-servers=mysql 52 | --etcd-servers=k8s:k8s@tcp(localhost:3306)/k8s 53 | --watch-cache=false 54 | ``` 55 | 56 | This assuming you have username/password as k8s/k8s and a database created called k8s. 57 | 58 | Yeah, it's a bit hacky because the API server is sort of hard coded to etcd. Also I've only tested with watch-cache off. 59 | 60 | 61 | Known Issues/Limitations 62 | ------------------------ 63 | 64 | 1. Watch caching doesn't work correctly 65 | 2. No HA apiserver support. Watches are using in-memory stuff that won't allow multiple API servers. If this basic implementation goes well shouldn't be terrible to add proper HA. 66 | -------------------------------------------------------------------------------- /dialect/generic.go: -------------------------------------------------------------------------------- 1 | package dialect 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "sync/atomic" 7 | "time" 8 | 9 | "github.com/pkg/errors" 10 | "github.com/rancher/k8s-sql" 11 | "github.com/rancher/k8s-sql/kv" 12 | ) 13 | 14 | type Generic struct { 15 | CleanupSQL string 16 | GetSQL string 17 | ListSQL string 18 | ListRevisionSQL string 19 | CreateSQL string 20 | DeleteSQL string 21 | UpdateSQL string 22 | GetRevisionSQL string 23 | revision int64 24 | } 25 | 26 | func (g *Generic) Start(ctx context.Context, db *sql.DB) error { 27 | row := db.QueryRowContext(ctx, g.GetRevisionSQL) 28 | rev := sql.NullInt64{} 29 | if err := row.Scan(&rev); err != nil { 30 | return errors.Wrap(err, "Failed to initialize revision") 31 | } 32 | if rev.Int64 == 0 { 33 | g.revision = 1 34 | } else { 35 | g.revision = rev.Int64 36 | } 37 | 38 | go func() { 39 | for { 40 | select { 41 | case <-ctx.Done(): 42 | return 43 | case <-time.After(time.Minute): 44 | db.ExecContext(ctx, g.CleanupSQL, time.Now().Second()) 45 | } 46 | } 47 | }() 48 | 49 | return nil 50 | } 51 | 52 | func (g *Generic) Get(ctx context.Context, db *sql.DB, key string) (*kv.KeyValue, error) { 53 | value := kv.KeyValue{} 54 | row := db.QueryRowContext(ctx, g.GetSQL, key) 55 | 56 | err := scan(row.Scan, &value) 57 | if err == sql.ErrNoRows { 58 | return nil, nil 59 | } 60 | 61 | return &value, err 62 | } 63 | 64 | func (g *Generic) List(ctx context.Context, db *sql.DB, revision int64, key string) ([]*kv.KeyValue, error) { 65 | var ( 66 | rows *sql.Rows 67 | err error 68 | ) 69 | 70 | if revision <= 0 { 71 | rows, err = db.QueryContext(ctx, g.ListSQL, key+"%") 72 | } else { 73 | rows, err = db.QueryContext(ctx, g.ListRevisionSQL, revision, key+"%") 74 | } 75 | if err != nil { 76 | return nil, err 77 | } 78 | defer rows.Close() 79 | 80 | resp := []*kv.KeyValue{} 81 | for rows.Next() { 82 | value := kv.KeyValue{} 83 | if err := scan(rows.Scan, &value); err != nil { 84 | return nil, err 85 | } 86 | resp = append(resp, &value) 87 | } 88 | 89 | return resp, nil 90 | } 91 | 92 | func (g *Generic) Create(ctx context.Context, db *sql.DB, key string, value []byte, ttl uint64) error { 93 | if ttl != 0 { 94 | ttl = uint64(time.Now().Second()) + ttl 95 | } 96 | newRev := atomic.AddInt64(&g.revision, 1) 97 | _, err := db.ExecContext(ctx, g.CreateSQL, key, []byte(value), newRev, ttl) 98 | return err 99 | } 100 | 101 | func (g *Generic) Delete(ctx context.Context, db *sql.DB, key string, revision *int64) (*kv.KeyValue, error) { 102 | value, err := g.Get(ctx, db, key) 103 | if err != nil { 104 | return nil, err 105 | } 106 | if value == nil || (revision != nil && value.Revision != *revision) { 107 | return nil, kv.ErrNotExists 108 | } 109 | 110 | result, err := db.ExecContext(ctx, g.DeleteSQL, key, value.Revision) 111 | if err != nil { 112 | return nil, err 113 | } 114 | 115 | rows, err := result.RowsAffected() 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | if rows == 0 { 121 | return nil, kv.ErrNotExists 122 | } 123 | 124 | return value, nil 125 | } 126 | 127 | func (g *Generic) Update(ctx context.Context, db *sql.DB, key string, value []byte, revision int64) (*kv.KeyValue, *kv.KeyValue, error) { 128 | oldKv, err := g.Get(ctx, db, key) 129 | if err != nil { 130 | return nil, nil, err 131 | } 132 | if oldKv == nil { 133 | return nil, nil, kv.ErrNotExists 134 | } 135 | 136 | if oldKv.Revision != revision { 137 | return nil, nil, rdbms.ErrRevisionMatch 138 | } 139 | 140 | newRevision := atomic.AddInt64(&g.revision, 1) 141 | result, err := db.ExecContext(ctx, g.UpdateSQL, value, newRevision, key, oldKv.Revision) 142 | if err != nil { 143 | return nil, nil, err 144 | } 145 | 146 | rows, err := result.RowsAffected() 147 | if err != nil { 148 | return nil, nil, err 149 | } 150 | if rows == 0 { 151 | return nil, nil, rdbms.ErrRevisionMatch 152 | } 153 | 154 | return oldKv, &kv.KeyValue{ 155 | Key: oldKv.Key, 156 | Value: []byte(value), 157 | Revision: oldKv.Revision + 1, 158 | }, nil 159 | } 160 | 161 | type scanner func(dest ...interface{}) error 162 | 163 | func scan(s scanner, out *kv.KeyValue) error { 164 | return s(&out.Key, &out.Value, &out.Revision) 165 | } 166 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /kv/watcher.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2016 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package kv 18 | 19 | import ( 20 | "errors" 21 | "fmt" 22 | "net/http" 23 | "os" 24 | "strconv" 25 | "strings" 26 | "sync" 27 | 28 | "github.com/golang/glog" 29 | "golang.org/x/net/context" 30 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 | "k8s.io/apimachinery/pkg/runtime" 32 | "k8s.io/apimachinery/pkg/watch" 33 | "k8s.io/apiserver/pkg/storage" 34 | "k8s.io/apiserver/pkg/storage/value" 35 | ) 36 | 37 | const ( 38 | // We have set a buffer in order to reduce times of context switches. 39 | incomingBufSize = 100 40 | outgoingBufSize = 100 41 | ) 42 | 43 | // fatalOnDecodeError is used during testing to panic the server if watcher encounters a decoding error 44 | var fatalOnDecodeError = false 45 | 46 | // errTestingDecode is the only error that testingDeferOnDecodeError catches during a panic 47 | var errTestingDecode = errors.New("sentinel error only used during testing to indicate watch decoding error") 48 | 49 | // testingDeferOnDecodeError is used during testing to recover from a panic caused by errTestingDecode, all other values continue to panic 50 | func testingDeferOnDecodeError() { 51 | if r := recover(); r != nil && r != errTestingDecode { 52 | panic(r) 53 | } 54 | } 55 | 56 | func init() { 57 | // check to see if we are running in a test environment 58 | fatalOnDecodeError, _ = strconv.ParseBool(os.Getenv("KUBE_PANIC_WATCH_DECODE_ERROR")) 59 | } 60 | 61 | type watcher struct { 62 | client Client 63 | codec runtime.Codec 64 | versioner storage.Versioner 65 | transformer value.Transformer 66 | } 67 | 68 | // watchChan implements watch.Interface. 69 | type watchChan struct { 70 | watcher *watcher 71 | key string 72 | rev int64 73 | recursive bool 74 | internalFilter storage.FilterFunc 75 | ctx context.Context 76 | cancel context.CancelFunc 77 | incomingEventChan chan *event 78 | resultChan chan watch.Event 79 | errChan chan error 80 | } 81 | 82 | func newWatcher(client Client, codec runtime.Codec, versioner storage.Versioner, transformer value.Transformer) *watcher { 83 | return &watcher{ 84 | client: client, 85 | codec: codec, 86 | versioner: versioner, 87 | transformer: transformer, 88 | } 89 | } 90 | 91 | // Watch watches on a key and returns a watch.Interface that transfers relevant notifications. 92 | // If rev is zero, it will return the existing object(s) and then start watching from 93 | // the maximum revision+1 from returned objects. 94 | // If rev is non-zero, it will watch events happened after given revision. 95 | // If recursive is false, it watches on given key. 96 | // If recursive is true, it watches any children and directories under the key, excluding the root key itself. 97 | // pred must be non-nil. Only if pred matches the change, it will be returned. 98 | func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) (watch.Interface, error) { 99 | if recursive && !strings.HasSuffix(key, "/") { 100 | key += "/" 101 | } 102 | wc := w.createWatchChan(ctx, key, rev, recursive, pred) 103 | go wc.run() 104 | return wc, nil 105 | } 106 | 107 | func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) *watchChan { 108 | wc := &watchChan{ 109 | watcher: w, 110 | key: key, 111 | rev: rev, 112 | recursive: recursive, 113 | internalFilter: storage.SimpleFilter(pred), 114 | incomingEventChan: make(chan *event, incomingBufSize), 115 | resultChan: make(chan watch.Event, outgoingBufSize), 116 | errChan: make(chan error, 1), 117 | } 118 | if pred.Empty() { 119 | // The filter doesn't filter out any object. 120 | wc.internalFilter = nil 121 | } 122 | wc.ctx, wc.cancel = context.WithCancel(ctx) 123 | return wc 124 | } 125 | 126 | func (wc *watchChan) run() { 127 | watchClosedCh := make(chan struct{}) 128 | go wc.startWatching(watchClosedCh) 129 | 130 | var resultChanWG sync.WaitGroup 131 | resultChanWG.Add(1) 132 | go wc.processEvent(&resultChanWG) 133 | 134 | select { 135 | case err := <-wc.errChan: 136 | if err == context.Canceled { 137 | break 138 | } 139 | errResult := parseError(err) 140 | if errResult != nil { 141 | // error result is guaranteed to be received by user before closing ResultChan. 142 | select { 143 | case wc.resultChan <- *errResult: 144 | case <-wc.ctx.Done(): // user has given up all results 145 | } 146 | } 147 | case <-watchClosedCh: 148 | case <-wc.ctx.Done(): // user cancel 149 | } 150 | 151 | // We use wc.ctx to reap all goroutines. Under whatever condition, we should stop them all. 152 | // It's fine to double cancel. 153 | wc.cancel() 154 | 155 | // we need to wait until resultChan wouldn't be used anymore 156 | resultChanWG.Wait() 157 | close(wc.resultChan) 158 | } 159 | 160 | func (wc *watchChan) Stop() { 161 | wc.cancel() 162 | } 163 | 164 | func (wc *watchChan) ResultChan() <-chan watch.Event { 165 | return wc.resultChan 166 | } 167 | 168 | // startWatching does: 169 | // - get current objects if initialRev=0; set initialRev to current rev 170 | // - watch on given key and send events to process. 171 | func (wc *watchChan) startWatching(watchClosedCh chan struct{}) { 172 | getResp, wch, err := wc.watcher.client.Watch(wc.ctx, wc.rev, wc.key) 173 | if err != nil { 174 | glog.Errorf("failed to sync with latest state: %v", err) 175 | wc.sendError(err) 176 | return 177 | } 178 | 179 | for _, item := range getResp { 180 | wc.sendEvent(parseKV(item)) 181 | } 182 | 183 | for wres := range wch { 184 | if wres.Err() != nil { 185 | err := wres.Err() 186 | // If there is an error on server (e.g. compaction), the channel will return it before closed. 187 | glog.Errorf("watch chan error: %v", err) 188 | wc.sendError(err) 189 | return 190 | } 191 | for _, e := range wres.Events { 192 | wc.sendEvent(parseEvent(e)) 193 | } 194 | } 195 | // When we come to this point, it's only possible that client side ends the watch. 196 | // e.g. cancel the context, close the client. 197 | // If this watch chan is broken and context isn't cancelled, other goroutines will still hang. 198 | // We should notify the main thread that this goroutine has exited. 199 | close(watchClosedCh) 200 | } 201 | 202 | // processEvent processes events from etcd watcher and sends results to resultChan. 203 | func (wc *watchChan) processEvent(wg *sync.WaitGroup) { 204 | defer wg.Done() 205 | 206 | for { 207 | select { 208 | case e := <-wc.incomingEventChan: 209 | res := wc.transform(e) 210 | if res == nil { 211 | continue 212 | } 213 | if len(wc.resultChan) == outgoingBufSize { 214 | glog.Warningf("Fast watcher, slow processing. Number of buffered events: %d."+ 215 | "Probably caused by slow dispatching events to watchers", outgoingBufSize) 216 | } 217 | // If user couldn't receive results fast enough, we also block incoming events from watcher. 218 | // Because storing events in local will cause more memory usage. 219 | // The worst case would be closing the fast watcher. 220 | select { 221 | case wc.resultChan <- *res: 222 | case <-wc.ctx.Done(): 223 | return 224 | } 225 | case <-wc.ctx.Done(): 226 | return 227 | } 228 | } 229 | } 230 | 231 | func (wc *watchChan) filter(obj runtime.Object) bool { 232 | if wc.internalFilter == nil { 233 | return true 234 | } 235 | return wc.internalFilter(obj) 236 | } 237 | 238 | func (wc *watchChan) acceptAll() bool { 239 | return wc.internalFilter == nil 240 | } 241 | 242 | // transform transforms an event into a result for user if not filtered. 243 | func (wc *watchChan) transform(e *event) (res *watch.Event) { 244 | curObj, oldObj, err := wc.prepareObjs(e) 245 | if err != nil { 246 | glog.Errorf("failed to prepare current and previous objects: %v", err) 247 | wc.sendError(err) 248 | return nil 249 | } 250 | 251 | switch { 252 | case e.isDeleted: 253 | if !wc.filter(oldObj) { 254 | return nil 255 | } 256 | res = &watch.Event{ 257 | Type: watch.Deleted, 258 | Object: oldObj, 259 | } 260 | case e.isCreated: 261 | if !wc.filter(curObj) { 262 | return nil 263 | } 264 | res = &watch.Event{ 265 | Type: watch.Added, 266 | Object: curObj, 267 | } 268 | default: 269 | if wc.acceptAll() { 270 | res = &watch.Event{ 271 | Type: watch.Modified, 272 | Object: curObj, 273 | } 274 | return res 275 | } 276 | curObjPasses := wc.filter(curObj) 277 | oldObjPasses := wc.filter(oldObj) 278 | switch { 279 | case curObjPasses && oldObjPasses: 280 | res = &watch.Event{ 281 | Type: watch.Modified, 282 | Object: curObj, 283 | } 284 | case curObjPasses && !oldObjPasses: 285 | res = &watch.Event{ 286 | Type: watch.Added, 287 | Object: curObj, 288 | } 289 | case !curObjPasses && oldObjPasses: 290 | res = &watch.Event{ 291 | Type: watch.Deleted, 292 | Object: oldObj, 293 | } 294 | } 295 | } 296 | return res 297 | } 298 | 299 | func parseError(err error) *watch.Event { 300 | return &watch.Event{ 301 | Type: watch.Error, 302 | Object: &metav1.Status{ 303 | Status: metav1.StatusFailure, 304 | Message: err.Error(), 305 | Code: http.StatusInternalServerError, 306 | Reason: metav1.StatusReasonInternalError, 307 | }, 308 | } 309 | } 310 | 311 | func (wc *watchChan) sendError(err error) { 312 | select { 313 | case wc.errChan <- err: 314 | case <-wc.ctx.Done(): 315 | } 316 | } 317 | 318 | func (wc *watchChan) sendEvent(e *event) { 319 | if len(wc.incomingEventChan) == incomingBufSize { 320 | glog.Warningf("Fast watcher, slow processing. Number of buffered events: %d."+ 321 | "Probably caused by slow decoding, user not receiving fast, or other processing logic", 322 | incomingBufSize) 323 | } 324 | select { 325 | case wc.incomingEventChan <- e: 326 | case <-wc.ctx.Done(): 327 | } 328 | } 329 | 330 | func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtime.Object, err error) { 331 | if !e.isDeleted { 332 | data, _, err := wc.watcher.transformer.TransformFromStorage(e.value, authenticatedDataString(e.key)) 333 | if err != nil { 334 | return nil, nil, err 335 | } 336 | curObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev) 337 | if err != nil { 338 | return nil, nil, err 339 | } 340 | } 341 | // We need to decode prevValue, only if this is deletion event or 342 | // the underlying filter doesn't accept all objects (otherwise we 343 | // know that the filter for previous object will return true and 344 | // we need the object only to compute whether it was filtered out 345 | // before). 346 | if len(e.prevValue) > 0 && (e.isDeleted || !wc.acceptAll()) { 347 | data, _, err := wc.watcher.transformer.TransformFromStorage(e.prevValue, authenticatedDataString(e.key)) 348 | if err != nil { 349 | return nil, nil, err 350 | } 351 | // Note that this sends the *old* object with the etcd revision for the time at 352 | // which it gets deleted. 353 | oldObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev) 354 | if err != nil { 355 | return nil, nil, err 356 | } 357 | } 358 | return curObj, oldObj, nil 359 | } 360 | 361 | func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (_ runtime.Object, err error) { 362 | obj, err := runtime.Decode(codec, []byte(data)) 363 | if err != nil { 364 | if fatalOnDecodeError { 365 | // catch watch decode error iff we caused it on 366 | // purpose during a unit test 367 | defer testingDeferOnDecodeError() 368 | // we are running in a test environment and thus an 369 | // error here is due to a coder mistake if the defer 370 | // does not catch it 371 | panic(err) 372 | } 373 | return nil, err 374 | } 375 | // ensure resource version is set on the object we load from etcd 376 | if err := versioner.UpdateObject(obj, uint64(rev)); err != nil { 377 | return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err) 378 | } 379 | return obj, nil 380 | } 381 | -------------------------------------------------------------------------------- /kv/store.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2016 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package kv 18 | 19 | import ( 20 | "bytes" 21 | "errors" 22 | "fmt" 23 | "path" 24 | "reflect" 25 | "strconv" 26 | "strings" 27 | "time" 28 | 29 | "github.com/golang/glog" 30 | err2 "github.com/pkg/errors" 31 | "golang.org/x/net/context" 32 | "k8s.io/apimachinery/pkg/api/meta" 33 | "k8s.io/apimachinery/pkg/conversion" 34 | "k8s.io/apimachinery/pkg/runtime" 35 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 36 | "k8s.io/apimachinery/pkg/watch" 37 | "k8s.io/apiserver/pkg/storage" 38 | "k8s.io/apiserver/pkg/storage/etcd" 39 | "k8s.io/apiserver/pkg/storage/value" 40 | utiltrace "k8s.io/apiserver/pkg/util/trace" 41 | ) 42 | 43 | // authenticatedDataString satisfies the value.Context interface. It uses the key to 44 | // authenticate the stored data. This does not defend against reuse of previously 45 | // encrypted values under the same key, but will prevent an attacker from using an 46 | // encrypted value from a different key. A stronger authenticated data segment would 47 | // include the etcd3 Version field (which is incremented on each write to a key and 48 | // reset when the key is deleted), but an attacker with write access to etcd can 49 | // force deletion and recreation of keys to weaken that angle. 50 | type authenticatedDataString string 51 | 52 | // AuthenticatedData implements the value.Context interface. 53 | func (d authenticatedDataString) AuthenticatedData() []byte { 54 | return []byte(string(d)) 55 | } 56 | 57 | var _ value.Context = authenticatedDataString("") 58 | 59 | type store struct { 60 | client Client 61 | // getOpts contains additional options that should be passed 62 | // to all Get() calls. 63 | codec runtime.Codec 64 | versioner storage.Versioner 65 | transformer value.Transformer 66 | pathPrefix string 67 | watcher *watcher 68 | } 69 | 70 | type elemForDecode struct { 71 | data []byte 72 | rev uint64 73 | } 74 | 75 | type objState struct { 76 | obj runtime.Object 77 | meta *storage.ResponseMeta 78 | rev int64 79 | data []byte 80 | stale bool 81 | } 82 | 83 | // New returns an etcd3 implementation of storage.Interface. 84 | func New(c Client, codec runtime.Codec, prefix string, transformer value.Transformer) storage.Interface { 85 | return newStore(c, codec, prefix, transformer) 86 | } 87 | 88 | func newStore(c Client, codec runtime.Codec, prefix string, transformer value.Transformer) *store { 89 | versioner := etcd.APIObjectVersioner{} 90 | result := &store{ 91 | client: c, 92 | codec: codec, 93 | versioner: versioner, 94 | transformer: transformer, 95 | // for compatibility with etcd2 impl. 96 | // no-op for default prefix of '/registry'. 97 | // keeps compatibility with etcd2 impl for custom prefixes that don't start with '/' 98 | pathPrefix: path.Join("/", prefix), 99 | watcher: newWatcher(c, codec, versioner, transformer), 100 | } 101 | return result 102 | } 103 | 104 | // Versioner implements storage.Interface.Versioner. 105 | func (s *store) Versioner() storage.Versioner { 106 | return s.versioner 107 | } 108 | 109 | // Get implements storage.Interface.Get. 110 | func (s *store) Get(ctx context.Context, key string, resourceVersion string, out runtime.Object, ignoreNotFound bool) error { 111 | key = path.Join(s.pathPrefix, key) 112 | resp, err := s.client.Get(ctx, key) 113 | if err != nil { 114 | return err 115 | } 116 | 117 | if resp == nil { 118 | if ignoreNotFound { 119 | return runtime.SetZeroValue(out) 120 | } 121 | return storage.NewKeyNotFoundError(key, 0) 122 | } 123 | 124 | data, _, err := s.transformer.TransformFromStorage(resp.Value, authenticatedDataString(key)) 125 | if err != nil { 126 | return storage.NewInternalError(err.Error()) 127 | } 128 | 129 | return decode(s.codec, s.versioner, data, out, resp.Revision) 130 | } 131 | 132 | // Create implements storage.Interface.Create. 133 | func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error { 134 | if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 { 135 | return errors.New("resourceVersion should not be set on objects to be created") 136 | } 137 | data, err := runtime.Encode(s.codec, obj) 138 | if err != nil { 139 | return err 140 | } 141 | key = path.Join(s.pathPrefix, key) 142 | 143 | newData, err := s.transformer.TransformToStorage(data, authenticatedDataString(key)) 144 | if err != nil { 145 | return storage.NewInternalError(err.Error()) 146 | } 147 | 148 | resp, err := s.client.Create(ctx, key, newData, ttl) 149 | if err == ErrExists { 150 | return storage.NewKeyExistsError(key, 0) 151 | } else if err != nil { 152 | return err 153 | } 154 | 155 | if out != nil { 156 | return decode(s.codec, s.versioner, data, out, resp.Revision) 157 | } 158 | return nil 159 | } 160 | 161 | // Delete implements storage.Interface.Delete. 162 | func (s *store) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions) error { 163 | v, err := conversion.EnforcePtr(out) 164 | if err != nil { 165 | panic("unable to convert output object to pointer") 166 | } 167 | key = path.Join(s.pathPrefix, key) 168 | if preconditions == nil { 169 | return s.unconditionalDelete(ctx, key, out) 170 | } 171 | return s.conditionalDelete(ctx, key, out, v, preconditions) 172 | } 173 | 174 | func (s *store) unconditionalDelete(ctx context.Context, key string, out runtime.Object) error { 175 | // We need to do get and delete in single transaction in order to 176 | // know the value and revision before deleting it. 177 | resp, err := s.client.Delete(ctx, key) 178 | if err == ErrNotExists { 179 | return storage.NewKeyNotFoundError(key, 0) 180 | } else if err != nil { 181 | return err 182 | } 183 | 184 | data, _, err := s.transformer.TransformFromStorage(resp.Value, authenticatedDataString(key)) 185 | if err != nil { 186 | return storage.NewInternalError(err.Error()) 187 | } 188 | return decode(s.codec, s.versioner, data, out, resp.Revision) 189 | } 190 | 191 | func (s *store) conditionalDelete(ctx context.Context, key string, out runtime.Object, v reflect.Value, preconditions *storage.Preconditions) error { 192 | for { 193 | getResp, err := s.client.Get(ctx, key) 194 | if err != nil { 195 | return err 196 | } 197 | 198 | origState, err := s.getState(getResp, key, v, false) 199 | if err != nil { 200 | return err 201 | } 202 | if err := checkPreconditions(key, preconditions, origState.obj); err != nil { 203 | return err 204 | } 205 | if err := s.client.DeleteVersion(ctx, key, origState.rev); err == ErrNotExists { 206 | continue 207 | } else if err != nil { 208 | return err 209 | } 210 | return decode(s.codec, s.versioner, origState.data, out, origState.rev) 211 | } 212 | } 213 | 214 | // GuaranteedUpdate implements storage.Interface.GuaranteedUpdate. 215 | func (s *store) GuaranteedUpdate( 216 | ctx context.Context, key string, out runtime.Object, ignoreNotFound bool, 217 | preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, suggestion ...runtime.Object) error { 218 | trace := utiltrace.New(fmt.Sprintf("GuaranteedUpdate etcd3: %s", reflect.TypeOf(out).String())) 219 | defer trace.LogIfLong(500 * time.Millisecond) 220 | 221 | v, err := conversion.EnforcePtr(out) 222 | if err != nil { 223 | panic("unable to convert output object to pointer") 224 | } 225 | key = path.Join(s.pathPrefix, key) 226 | 227 | var origState *objState 228 | if len(suggestion) == 1 && suggestion[0] != nil { 229 | origState, err = s.getStateFromObject(suggestion[0]) 230 | if err != nil { 231 | return err 232 | } 233 | } else { 234 | getResp, err := s.client.Get(ctx, key) 235 | if err != nil { 236 | return err 237 | } 238 | origState, err = s.getState(getResp, key, v, ignoreNotFound) 239 | if err != nil { 240 | return err 241 | } 242 | } 243 | trace.Step("initial value restored") 244 | 245 | transformContext := authenticatedDataString(key) 246 | for { 247 | if err := checkPreconditions(key, preconditions, origState.obj); err != nil { 248 | return err 249 | } 250 | 251 | ret, ttl, err := s.updateState(origState, tryUpdate) 252 | if err != nil { 253 | return err 254 | } 255 | 256 | data, err := runtime.Encode(s.codec, ret) 257 | if err != nil { 258 | return err 259 | } 260 | if !origState.stale && bytes.Equal(data, origState.data) { 261 | return decode(s.codec, s.versioner, origState.data, out, origState.rev) 262 | } 263 | 264 | newData, err := s.transformer.TransformToStorage(data, transformContext) 265 | if err != nil { 266 | return storage.NewInternalError(err.Error()) 267 | } 268 | 269 | trace.Step("Transaction prepared") 270 | 271 | resp, err := s.client.UpdateOrCreate(ctx, key, newData, origState.rev, ttl) 272 | if err == ErrNotExists { 273 | glog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key) 274 | origState, err = s.getState(resp, key, v, ignoreNotFound) 275 | if err != nil { 276 | return err 277 | } 278 | trace.Step("Retry value restored") 279 | continue 280 | } else if err != nil { 281 | return err 282 | } 283 | 284 | trace.Step("Transaction committed") 285 | 286 | return decode(s.codec, s.versioner, data, out, resp.Revision) 287 | } 288 | } 289 | 290 | // GetToList implements storage.Interface.GetToList. 291 | func (s *store) GetToList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error { 292 | listPtr, err := meta.GetItemsPtr(listObj) 293 | if err != nil { 294 | return err 295 | } 296 | key = path.Join(s.pathPrefix, key) 297 | 298 | resp, err := s.client.Get(ctx, key) 299 | if err != nil { 300 | return err 301 | } 302 | if resp == nil { 303 | return nil 304 | } 305 | data, _, err := s.transformer.TransformFromStorage(resp.Value, authenticatedDataString(key)) 306 | if err != nil { 307 | return storage.NewInternalError(err.Error()) 308 | } 309 | elems := []*elemForDecode{{ 310 | data: data, 311 | rev: uint64(resp.Revision), 312 | }} 313 | if err := decodeList(elems, storage.SimpleFilter(pred), listPtr, s.codec, s.versioner); err != nil { 314 | return err 315 | } 316 | return s.versioner.UpdateList(listObj, uint64(resp.Revision)) 317 | } 318 | 319 | // List implements storage.Interface.List. 320 | func (s *store) List(ctx context.Context, key, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error { 321 | listPtr, err := meta.GetItemsPtr(listObj) 322 | if err != nil { 323 | return err 324 | } 325 | key = path.Join(s.pathPrefix, key) 326 | // We need to make sure the key ended with "/" so that we only get children "directories". 327 | // e.g. if we have key "/a", "/a/b", "/ab", getting keys with prefix "/a" will return all three, 328 | // while with prefix "/a/" will return only "/a/b" which is the correct answer. 329 | if !strings.HasSuffix(key, "/") { 330 | key += "/" 331 | } 332 | 333 | if resourceVersion == "" { 334 | resourceVersion = "0" 335 | } 336 | 337 | revInt, err := strconv.ParseInt(resourceVersion, 10, 64) 338 | if err != nil { 339 | return err2.Wrapf(err, "Invalid revision: %s", resourceVersion) 340 | } 341 | getResp, err := s.client.List(ctx, revInt, key) 342 | if err != nil { 343 | return err 344 | } 345 | 346 | elems := make([]*elemForDecode, 0, len(getResp)) 347 | revision := int64(0) 348 | for _, item := range getResp { 349 | if item.Revision > revision { 350 | revision = item.Revision 351 | } 352 | 353 | data, _, err := s.transformer.TransformFromStorage(item.Value, authenticatedDataString(item.Key)) 354 | if err != nil { 355 | utilruntime.HandleError(fmt.Errorf("unable to transform key %q: %v", key, err)) 356 | continue 357 | } 358 | 359 | elems = append(elems, &elemForDecode{ 360 | data: data, 361 | rev: uint64(item.Revision), 362 | }) 363 | } 364 | if err := decodeList(elems, storage.SimpleFilter(pred), listPtr, s.codec, s.versioner); err != nil { 365 | return err 366 | } 367 | return s.versioner.UpdateList(listObj, uint64(revision)) 368 | } 369 | 370 | // Watch implements storage.Interface.Watch. 371 | func (s *store) Watch(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) { 372 | return s.watch(ctx, key, resourceVersion, pred, false) 373 | } 374 | 375 | // WatchList implements storage.Interface.WatchList. 376 | func (s *store) WatchList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) { 377 | return s.watch(ctx, key, resourceVersion, pred, true) 378 | } 379 | 380 | func (s *store) watch(ctx context.Context, key string, rv string, pred storage.SelectionPredicate, recursive bool) (watch.Interface, error) { 381 | rev, err := storage.ParseWatchResourceVersion(rv) 382 | if err != nil { 383 | return nil, err 384 | } 385 | key = path.Join(s.pathPrefix, key) 386 | return s.watcher.Watch(ctx, key, int64(rev), recursive, pred) 387 | } 388 | 389 | func (s *store) getState(item *KeyValue, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) { 390 | state := &objState{ 391 | obj: reflect.New(v.Type()).Interface().(runtime.Object), 392 | meta: &storage.ResponseMeta{}, 393 | } 394 | if item == nil { 395 | if !ignoreNotFound { 396 | return nil, storage.NewKeyNotFoundError(key, 0) 397 | } 398 | if err := runtime.SetZeroValue(state.obj); err != nil { 399 | return nil, err 400 | } 401 | } else { 402 | data, stale, err := s.transformer.TransformFromStorage(item.Value, authenticatedDataString(key)) 403 | if err != nil { 404 | return nil, storage.NewInternalError(err.Error()) 405 | } 406 | state.rev = item.Revision 407 | state.meta.ResourceVersion = uint64(state.rev) 408 | state.data = data 409 | state.stale = stale 410 | if err := decode(s.codec, s.versioner, state.data, state.obj, state.rev); err != nil { 411 | return nil, err 412 | } 413 | } 414 | return state, nil 415 | } 416 | 417 | func (s *store) getStateFromObject(obj runtime.Object) (*objState, error) { 418 | state := &objState{ 419 | obj: obj, 420 | meta: &storage.ResponseMeta{}, 421 | } 422 | 423 | rv, err := s.versioner.ObjectResourceVersion(obj) 424 | if err != nil { 425 | return nil, fmt.Errorf("couldn't get resource version: %v", err) 426 | } 427 | state.rev = int64(rv) 428 | state.meta.ResourceVersion = uint64(state.rev) 429 | 430 | // Compute the serialized form - for that we need to temporarily clean 431 | // its resource version field (those are not stored in etcd). 432 | if err := s.versioner.UpdateObject(obj, 0); err != nil { 433 | return nil, errors.New("resourceVersion cannot be set on objects store in etcd") 434 | } 435 | state.data, err = runtime.Encode(s.codec, obj) 436 | if err != nil { 437 | return nil, err 438 | } 439 | s.versioner.UpdateObject(state.obj, uint64(rv)) 440 | return state, nil 441 | } 442 | 443 | func (s *store) updateState(st *objState, userUpdate storage.UpdateFunc) (runtime.Object, uint64, error) { 444 | ret, ttlPtr, err := userUpdate(st.obj, *st.meta) 445 | if err != nil { 446 | return nil, 0, err 447 | } 448 | 449 | version, err := s.versioner.ObjectResourceVersion(ret) 450 | if err != nil { 451 | return nil, 0, err 452 | } 453 | if version != 0 { 454 | // We cannot store object with resourceVersion in etcd. We need to reset it. 455 | if err := s.versioner.UpdateObject(ret, 0); err != nil { 456 | return nil, 0, fmt.Errorf("UpdateObject failed: %v", err) 457 | } 458 | } 459 | var ttl uint64 460 | if ttlPtr != nil { 461 | ttl = *ttlPtr 462 | } 463 | return ret, ttl, nil 464 | } 465 | 466 | // decode decodes value of bytes into object. It will also set the object resource version to rev. 467 | // On success, objPtr would be set to the object. 468 | func decode(codec runtime.Codec, versioner storage.Versioner, value []byte, objPtr runtime.Object, rev int64) error { 469 | if _, err := conversion.EnforcePtr(objPtr); err != nil { 470 | panic("unable to convert output object to pointer") 471 | } 472 | _, _, err := codec.Decode(value, nil, objPtr) 473 | if err != nil { 474 | return err 475 | } 476 | // being unable to set the version does not prevent the object from being extracted 477 | versioner.UpdateObject(objPtr, uint64(rev)) 478 | return nil 479 | } 480 | 481 | // decodeList decodes a list of values into a list of objects, with resource version set to corresponding rev. 482 | // On success, ListPtr would be set to the list of objects. 483 | func decodeList(elems []*elemForDecode, filter storage.FilterFunc, ListPtr interface{}, codec runtime.Codec, versioner storage.Versioner) error { 484 | v, err := conversion.EnforcePtr(ListPtr) 485 | if err != nil || v.Kind() != reflect.Slice { 486 | panic("need ptr to slice") 487 | } 488 | for _, elem := range elems { 489 | obj, _, err := codec.Decode(elem.data, nil, reflect.New(v.Type().Elem()).Interface().(runtime.Object)) 490 | if err != nil { 491 | return err 492 | } 493 | // being unable to set the version does not prevent the object from being extracted 494 | versioner.UpdateObject(obj, elem.rev) 495 | if filter(obj) { 496 | v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem())) 497 | } 498 | } 499 | return nil 500 | } 501 | 502 | func checkPreconditions(key string, preconditions *storage.Preconditions, out runtime.Object) error { 503 | if preconditions == nil { 504 | return nil 505 | } 506 | objMeta, err := meta.Accessor(out) 507 | if err != nil { 508 | return storage.NewInternalErrorf("can't enforce preconditions %v on un-introspectable object %v, got error: %v", *preconditions, out, err) 509 | } 510 | if preconditions.UID != nil && *preconditions.UID != objMeta.GetUID() { 511 | errMsg := fmt.Sprintf("Precondition failed: UID in precondition: %v, UID in object meta: %v", *preconditions.UID, objMeta.GetUID()) 512 | return storage.NewInvalidObjError(key, errMsg) 513 | } 514 | return nil 515 | } 516 | --------------------------------------------------------------------------------