├── README.md ├── scripts ├── bench-kafka.sh ├── Procfile.zk.kafka ├── bench.sh ├── Procfile.xchk ├── Procfile.zetcd.kafka └── Procfile.xchk.kafka ├── xchk ├── README.md ├── auth.go ├── authconn.go ├── session.go ├── conn.go └── zk.go ├── docker ├── zk │ ├── run │ └── Dockerfile └── kafka │ ├── run │ ├── Dockerfile │ └── start-kafka.sh ├── .travis.yml ├── zk ├── auth.go ├── zk.go └── session.go ├── Procfile ├── auth.go ├── path.go ├── cmd ├── zkboom │ └── zkboom.go ├── zkctl │ └── zkctl.go └── zetcd │ └── zetcd.go ├── authconn.go ├── session.go ├── zk.go ├── bench_test.go ├── stat.go ├── zklog.go ├── op.go ├── conn.go ├── client.go ├── server.go ├── structs.go ├── pool.go ├── watches.go ├── constants.go ├── integration_test.go ├── encode.go └── zketcd.go /README.md: -------------------------------------------------------------------------------- 1 | # zetcd 2 | 3 | The official zetcd repo has moved to [github.com/coreos/zetcd](https://github.com/coreos/zetcd). 4 | -------------------------------------------------------------------------------- /scripts/bench-kafka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # go get -v github.com/wvanbergen/kafka/tools/stressproducer 4 | stressproducer -verbose 2>&1 | head -n100 5 | -------------------------------------------------------------------------------- /scripts/Procfile.zk.kafka: -------------------------------------------------------------------------------- 1 | zk: docker kill `docker ps | egrep "(zookeeper|kafka)" | cut -f1 -d' '`; cd ./docker/zk && ./run 2181 2 | 3 | kafka: sleep 5s; cd ./docker/kafka && ./run 4 | -------------------------------------------------------------------------------- /xchk/README.md: -------------------------------------------------------------------------------- 1 | # xchk 2 | 3 | Cross-checking zookeeper proxy. 4 | 5 | +---- ZKC1 --- ZK Oracle 6 | | 7 | Client --- Xchk 8 | | 9 | +---- ZKC2 --- ZK Challenge 10 | -------------------------------------------------------------------------------- /docker/zk/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" ]; then 4 | echo expecting host port argument 5 | exit 1 6 | fi 7 | 8 | docker build -t `whoami`/zookeeper:3.4.8 . 9 | docker run -p "$1":2181 -t `whoami`/zookeeper:3.4.8 . start 10 | -------------------------------------------------------------------------------- /docker/kafka/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | ZK_HOST=`ifconfig docker0 | grep 'inet ' | awk ' { print $2 } '` 6 | docker build -t `whoami`/kafka:0.8.2.1 . 7 | docker run \ 8 | -e ZK_HOST=${ZK_HOST} \ 9 | -e ADVERTISED_HOST=localhost \ 10 | -e ADVERTISED_PORT=9092 \ 11 | -p 127.0.0.1:9092:9092 `whoami`/kafka:0.8.2.1 . start 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: go 3 | go_import_path: github.com/chzchzchz/etcd 4 | sudo: false 5 | 6 | go: 7 | - 1.6 8 | - tip 9 | 10 | env: 11 | global: 12 | - GO15VENDOREXPERIMENT=1 13 | matrix: 14 | - TARGET=amd64 15 | 16 | 17 | matrix: 18 | fast_finish: true 19 | allow_failures: 20 | - go: tip 21 | 22 | script: 23 | - go test -v -race 24 | -------------------------------------------------------------------------------- /zk/auth.go: -------------------------------------------------------------------------------- 1 | package zk 2 | 3 | import ( 4 | "github.com/chzchzchz/zetcd" 5 | ) 6 | 7 | func NewAuth(addrs []string) zetcd.AuthFunc { 8 | return func(zka zetcd.AuthConn) (zetcd.Session, error) { 9 | return newSession(addrs, zka) 10 | } 11 | } 12 | 13 | func NewZK() zetcd.ZKFunc { 14 | return func(s zetcd.Session) (zetcd.ZK, error) { 15 | zk, err := newZK(s) 16 | return zetcd.NewZKLog(zk), err 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | etcd: etcd --name infra1 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:2380 --initial-advertise-peer-urls http://127.0.0.1:2380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:2380' --initial-cluster-state new --enable-pprof 2 | 3 | zketcd: ./cmd/zetcd/zetcd -endpoint http://localhost:2379 -zkaddr 127.0.0.1:2181 -logtostderr -v 9 4 | -------------------------------------------------------------------------------- /auth.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | etcd "github.com/coreos/etcd/clientv3" 5 | ) 6 | 7 | type AuthFunc func(AuthConn) (Session, error) 8 | type ZKFunc func(Session) (ZK, error) 9 | 10 | func NewAuth(c *etcd.Client) AuthFunc { 11 | sp := NewSessionPool(c) 12 | return func(zka AuthConn) (Session, error) { 13 | return sp.Auth(zka) 14 | } 15 | } 16 | 17 | func NewZK(c *etcd.Client) ZKFunc { 18 | return func(s Session) (ZK, error) { 19 | return NewZKLog(NewZKEtcd(c, s)), nil 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /scripts/bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | function tidy { 6 | kill -9 `pgrep etcd` || true 7 | kill -9 `pgrep zetcd` || true 8 | docker kill `docker ps | egrep "(zookeeper|kafka)" | awk ' { print $1 } '` || true 9 | } 10 | 11 | function run_config { 12 | #config="xchk.kafka" 13 | config="$1" 14 | workload="$2" 15 | tidy 16 | sleep 1s 17 | goreman -f scripts/Procfile."$config".$workload start & 18 | pid=$! 19 | sleep 10s 20 | ./scripts/bench-$workload.sh >bench.$config.$workload 21 | kill $pid 22 | } 23 | 24 | for a in zetcd xchk zk; do run_config $a kafka; done 25 | tidy -------------------------------------------------------------------------------- /scripts/Procfile.xchk: -------------------------------------------------------------------------------- 1 | etcd: killall -9 etcd; rm -rf *.etcd && etcd --name infra1 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:2380 --initial-advertise-peer-urls http://127.0.0.1:2380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:2380' --initial-cluster-state new --enable-pprof 2 | 3 | zk: docker kill `docker ps | grep zookeeper | cut -f1 -d' '`; cd ./docker/zk && ./run 2182 4 | 5 | zketcd: killall -9 zetcd; sleep 3s && ./cmd/zetcd/zetcd -endpoint http://localhost:2379 -zkaddr 127.0.0.1:2181 -zkbridge 127.0.0.1:2182 -oracle zk -logtostderr -v 10 2>zketcd.xchk 6 | -------------------------------------------------------------------------------- /scripts/Procfile.zetcd.kafka: -------------------------------------------------------------------------------- 1 | etcd: killall -9 etcd; rm -rf *.etcd && etcd --name infra1 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:2380 --initial-advertise-peer-urls http://127.0.0.1:2380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:2380' --initial-cluster-state new --enable-pprof 2 | 3 | zketcd: killall -9 zetcd; sleep 3s && ./cmd/zetcd/zetcd -endpoint http://localhost:2379 -zkaddr `ifconfig docker0 | grep 'inet ' | awk ' { print $2 } '`:2181 -logtostderr -v 5 4 | 5 | kafka: docker kill `docker ps | egrep "kafka"`; sleep 5s; cd ./docker/kafka && ./run 6 | -------------------------------------------------------------------------------- /xchk/auth.go: -------------------------------------------------------------------------------- 1 | package xchk 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/chzchzchz/zetcd" 7 | ) 8 | 9 | // NewAuth takes a candidate AuthFunc and an oracle AuthFunc 10 | func NewAuth(cAuth, oAuth zetcd.AuthFunc) zetcd.AuthFunc { 11 | sp := newSessionPool() 12 | return func(zka zetcd.AuthConn) (zetcd.Session, error) { 13 | return Auth(sp, zka, cAuth, oAuth) 14 | } 15 | } 16 | 17 | // NewZK takes a candidate ZKFunc and an oracle ZKFunc, returning a cross checker. 18 | func NewZK(cZK, oZK zetcd.ZKFunc) zetcd.ZKFunc { 19 | return func(s zetcd.Session) (zetcd.ZK, error) { 20 | ss, ok := s.(*session) 21 | if !ok { 22 | return nil, fmt.Errorf("expected xchk.session") 23 | } 24 | return newZK(ss, cZK, oZK) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /path.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "path" 5 | ) 6 | 7 | func mkPath(zkPath string) string { 8 | p := path.Clean(zkPath) 9 | if p[0] != '/' { 10 | p = "/" + p 11 | } 12 | depth := 0 13 | for i := 0; i < len(p); i++ { 14 | if p[i] == '/' { 15 | depth++ 16 | } 17 | } 18 | return string(append([]byte{byte(depth)}, []byte(p)...)) 19 | } 20 | 21 | func incPath(zetcdPath string) string { 22 | b := []byte(zetcdPath) 23 | b[0]++ 24 | return string(b) 25 | } 26 | 27 | func getListPfx(p string) string { 28 | pfx := "/zk/ver/" 29 | if len(p) != 2 { 30 | // /abc => 1 => listing dir needs search on p[0] = 2 31 | searchP := string([]byte{p[0] + 1}) + p[1:] 32 | return pfx + searchP + "/" 33 | } 34 | return pfx + p 35 | } 36 | -------------------------------------------------------------------------------- /scripts/Procfile.xchk.kafka: -------------------------------------------------------------------------------- 1 | etcd: killall -9 etcd; rm -rf *.etcd && etcd --name infra1 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:2380 --initial-advertise-peer-urls http://127.0.0.1:2380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:2380' --initial-cluster-state new --enable-pprof 2 | 3 | zk: docker kill `docker ps | egrep "(zookeeper|kafka)" | cut -f1 -d' '`; cd ./docker/zk && ./run 2182 4 | 5 | zketcd: killall -9 zetcd; sleep 3s && ./cmd/zetcd/zetcd -endpoint http://localhost:2379 -zkaddr `ifconfig docker0 | grep 'inet ' | awk ' { print $2 } '`:2181 -zkbridge 127.0.0.1:2182 -oracle zk -logtostderr -v 10 2>zketcd.xchk 6 | 7 | kafka: sleep 5s; cd ./docker/kafka && ./run 8 | -------------------------------------------------------------------------------- /docker/zk/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:openjdk-8-jre-alpine 2 | # had to hack this a bit... 3 | # MAINTAINER Justin Plock 4 | 5 | LABEL name="zookeeper" version="3.4.8" 6 | 7 | RUN apk add --no-cache wget bash 8 | RUN mkdir /opt 9 | RUN wget -q -O - http://apache.mirrors.pair.com/zookeeper/zookeeper-3.4.8/zookeeper-3.4.8.tar.gz | tar -xzf - -C /opt 10 | RUN mv /opt/zookeeper-3.4.8 /opt/zookeeper 11 | RUN cp /opt/zookeeper/conf/zoo_sample.cfg /opt/zookeeper/conf/zoo.cfg 12 | RUN mkdir -p /tmp/zookeeper 13 | #RUN ln -s /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/conf/start 14 | RUN ls -lah /opt/zookeeper/bin 15 | EXPOSE 2181 2888 3888 16 | 17 | WORKDIR /opt/zookeeper 18 | 19 | VOLUME ["/opt/zookeeper/conf", "/tmp/zookeeper"] 20 | 21 | ENTRYPOINT ["/opt/zookeeper/bin/zkServer.sh", "start-foreground", "/opt/zookeeper/conf/zoo.cfg"] 22 | -------------------------------------------------------------------------------- /docker/kafka/Dockerfile: -------------------------------------------------------------------------------- 1 | # Kafka 2 | 3 | FROM java:openjdk-8-jre 4 | 5 | ENV DEBIAN_FRONTEND noninteractive 6 | RUN apt-get update 7 | RUN apt-get install -y wget supervisor dnsutils 8 | 9 | # Install Kafka, Zookeeper and other needed things 10 | RUN rm -rf /var/lib/apt/lists/* 11 | RUN apt-get clean 12 | 13 | ENV SCALA_VERSION 2.11 14 | ENV KAFKA_VERSION 0.8.2.1 15 | ENV KAFKA_HOME /opt/kafka_"$SCALA_VERSION"-"$KAFKA_VERSION" 16 | 17 | RUN wget -q http://apache.mirrors.spacedump.net/kafka/"$KAFKA_VERSION"/kafka_"$SCALA_VERSION"-"$KAFKA_VERSION".tgz -O /tmp/kafka_"$SCALA_VERSION"-"$KAFKA_VERSION".tgz 18 | RUN tar xfz /tmp/kafka_"$SCALA_VERSION"-"$KAFKA_VERSION".tgz -C /opt 19 | RUN rm /tmp/kafka_"$SCALA_VERSION"-"$KAFKA_VERSION".tgz 20 | 21 | ENV AUTO_CREATE_TOPICS true 22 | ENV BROKER_ID 1 23 | # 9092 is kafka port 24 | EXPOSE 9092 25 | ADD start-kafka.sh /usr/bin/start-kafka.sh 26 | ENTRYPOINT ["/usr/bin/start-kafka.sh"] 27 | -------------------------------------------------------------------------------- /cmd/zkboom/zkboom.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/samuel/go-zookeeper/zk" 8 | ) 9 | 10 | var ( 11 | flags = int32(0) 12 | acl = zk.WorldACL(zk.PermAll) 13 | ) 14 | 15 | func main() { 16 | conn, _, err := zk.Connect([]string{"127.0.0.1:2181"}, time.Second) 17 | if err != nil { 18 | panic(err) 19 | } 20 | 21 | // benchMem(conn) 22 | benchPut(conn) 23 | } 24 | 25 | func benchMem(conn *zk.Conn) { 26 | val := make([]byte, 128) 27 | 28 | for i := 0; i < 1000000; i++ { 29 | _, err := conn.Create("/foo"+fmt.Sprint(i), val, flags, acl) 30 | if err != nil { 31 | panic(err) 32 | } 33 | if i%1000 == 0 { 34 | fmt.Println(i) 35 | } 36 | } 37 | } 38 | 39 | func benchPut(conn *zk.Conn) { 40 | _, err := conn.Create("/foo", []byte("bar"), flags, acl) 41 | if err != nil { 42 | fmt.Println(err) 43 | } 44 | 45 | donec := make(chan struct{}) 46 | start := time.Now() 47 | for n := 0; n < 100; n++ { 48 | go func() { 49 | for i := 0; i < 1000; i++ { 50 | _, err = conn.Set("/foo", []byte("bar"), -1) 51 | if err != nil { 52 | panic(err) 53 | } 54 | } 55 | donec <- struct{}{} 56 | }() 57 | } 58 | 59 | for n := 0; n < 100; n++ { 60 | <-donec 61 | } 62 | fmt.Println(time.Since(start)) 63 | } 64 | -------------------------------------------------------------------------------- /authconn.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "net" 5 | 6 | "github.com/golang/glog" 7 | ) 8 | 9 | // AuthConn transfers zookeeper handshaking for establishing a session 10 | type AuthConn interface { 11 | Read() (*AuthRequest, error) 12 | Write(AuthResponse) (Conn, error) 13 | Close() 14 | } 15 | 16 | type AuthResponse struct { 17 | Resp *ConnectResponse 18 | // TODO: add four letter response 19 | } 20 | 21 | type AuthRequest struct { 22 | Req *ConnectRequest 23 | // TODO: add four letter commands 24 | } 25 | 26 | type authConn struct { 27 | c net.Conn 28 | } 29 | 30 | func NewAuthConn(c net.Conn) AuthConn { return &authConn{c} } 31 | 32 | func (ac *authConn) Read() (*AuthRequest, error) { 33 | req := &ConnectRequest{} 34 | if err := ReadPacket(ac.c, req); err != nil { 35 | glog.V(6).Infof("error reading connection request (%v)", err) 36 | return nil, err 37 | } 38 | glog.V(6).Infof("auth(%+v)", req) 39 | return &AuthRequest{req}, nil 40 | } 41 | 42 | func (ac *authConn) Write(ar AuthResponse) (Conn, error) { 43 | if err := WritePacket(ac.c, ar.Resp); err != nil { 44 | return nil, err 45 | } 46 | zkc := NewConn(ac.c) 47 | ac.c = nil 48 | return zkc, nil 49 | } 50 | 51 | func (ac *authConn) Close() { 52 | if ac.c != nil { 53 | ac.c.Close() 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /session.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "sync" 5 | 6 | etcd "github.com/coreos/etcd/clientv3" 7 | "github.com/golang/glog" 8 | "golang.org/x/net/context" 9 | ) 10 | 11 | type Session interface { 12 | Conn 13 | Watches 14 | Sid() Sid 15 | ZXid() ZXid 16 | ConnReq() ConnectRequest 17 | Backing() interface{} 18 | } 19 | 20 | type session struct { 21 | Conn 22 | *watches 23 | id etcd.LeaseID 24 | c *etcd.Client 25 | req ConnectRequest 26 | 27 | leaseZXid ZXid 28 | mu sync.RWMutex 29 | } 30 | 31 | func (s *session) ConnReq() ConnectRequest { return s.req } 32 | func (s *session) Backing() interface{} { return s } 33 | 34 | func newSession(c *etcd.Client, zkc Conn, id etcd.LeaseID) (*session, error) { 35 | ctx, cancel := context.WithCancel(c.Ctx()) 36 | s := &session{Conn: zkc, id: id, c: c, watches: newWatches(c)} 37 | 38 | kach, kaerr := c.KeepAlive(ctx, id) 39 | if kaerr != nil { 40 | cancel() 41 | return nil, kaerr 42 | } 43 | 44 | go func() { 45 | glog.V(9).Infof("starting the session... id=%v", id) 46 | defer func() { 47 | glog.V(9).Infof("finishing the session... id=%v; expect revoke...", id) 48 | cancel() 49 | s.Close() 50 | }() 51 | for { 52 | select { 53 | case ka, ok := <-kach: 54 | if !ok { 55 | return 56 | } 57 | if ka.ResponseHeader == nil { 58 | continue 59 | } 60 | s.mu.Lock() 61 | s.leaseZXid = ZXid(ka.ResponseHeader.Revision) 62 | s.mu.Unlock() 63 | case <-s.StopNotify(): 64 | return 65 | } 66 | } 67 | }() 68 | 69 | return s, nil 70 | } 71 | 72 | func (s *session) Sid() Sid { return Sid(s.id) } 73 | 74 | // ZXid gets the lease ZXid 75 | func (s *session) ZXid() ZXid { 76 | s.mu.RLock() 77 | zxid := s.leaseZXid 78 | s.mu.RUnlock() 79 | return zxid 80 | } 81 | -------------------------------------------------------------------------------- /zk.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import "fmt" 4 | 5 | // ZK is a synchronous interface 6 | type ZK interface { 7 | Create(xid Xid, op *CreateRequest) ZKResponse 8 | Delete(xid Xid, op *DeleteRequest) ZKResponse 9 | Exists(xid Xid, op *ExistsRequest) ZKResponse 10 | GetData(xid Xid, op *GetDataRequest) ZKResponse 11 | SetData(xid Xid, op *SetDataRequest) ZKResponse 12 | GetAcl(xid Xid, op *GetAclRequest) ZKResponse 13 | SetAcl(xid Xid, op *SetAclRequest) ZKResponse 14 | GetChildren(xid Xid, op *GetChildrenRequest) ZKResponse 15 | Sync(xid Xid, op *SyncRequest) ZKResponse 16 | Ping(xid Xid, op *PingRequest) ZKResponse 17 | GetChildren2(xid Xid, op *GetChildren2Request) ZKResponse 18 | // opCheck = 13 19 | Multi(xid Xid, op *MultiRequest) ZKResponse 20 | Close(xid Xid, op *CloseRequest) ZKResponse 21 | SetAuth(xid Xid, op *SetAuthRequest) ZKResponse 22 | SetWatches(xid Xid, op *SetWatchesRequest) ZKResponse 23 | } 24 | 25 | func DispatchZK(zk ZK, xid Xid, op interface{}) ZKResponse { 26 | switch op := op.(type) { 27 | case *CreateRequest: 28 | return zk.Create(xid, op) 29 | case *DeleteRequest: 30 | return zk.Delete(xid, op) 31 | case *GetChildrenRequest: 32 | return zk.GetChildren(xid, op) 33 | case *GetChildren2Request: 34 | return zk.GetChildren2(xid, op) 35 | case *PingRequest: 36 | return zk.Ping(xid, op) 37 | case *GetDataRequest: 38 | return zk.GetData(xid, op) 39 | case *SetDataRequest: 40 | return zk.SetData(xid, op) 41 | case *ExistsRequest: 42 | return zk.Exists(xid, op) 43 | case *SyncRequest: 44 | return zk.Sync(xid, op) 45 | case *CloseRequest: 46 | return zk.Close(xid, op) 47 | case *SetWatchesRequest: 48 | return zk.SetWatches(xid, op) 49 | default: 50 | fmt.Printf("unexpected type %d %T\n", xid, op) 51 | } 52 | return mkZKErr(xid, 0, errAPIError) 53 | } 54 | -------------------------------------------------------------------------------- /bench_test.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "testing" 8 | "time" 9 | 10 | "github.com/samuel/go-zookeeper/zk" 11 | ) 12 | 13 | const ( 14 | zkAddr = "127.0.0.1:2181" 15 | zetcdAddr = "127.0.0.1:2182" 16 | ) 17 | 18 | func init() { zk.DefaultLogger = log.New(ioutil.Discard, "", 0) } 19 | 20 | func benchGet(b *testing.B, addr string) { 21 | c, _, err := zk.Connect([]string{addr}, time.Second) 22 | if err != nil { 23 | b.Fatal(err) 24 | } 25 | defer c.Close() 26 | c.Create("/abc", []byte("abc"), 0, acl) 27 | for i := 0; i < b.N; i++ { 28 | if _, _, gerr := c.Get("/abc"); gerr != nil { 29 | b.Fatal(err) 30 | } 31 | } 32 | } 33 | 34 | func benchConnGet(b *testing.B, addr string) { 35 | for i := 0; i < b.N; i++ { 36 | c, _, err := zk.Connect([]string{addr}, time.Second) 37 | if err != nil { 38 | b.Fatal(err) 39 | } 40 | if _, _, gerr := c.Get("/abc"); gerr != nil { 41 | b.Fatal(err) 42 | } 43 | c.Close() 44 | } 45 | } 46 | 47 | func benchCreateSet(b *testing.B, addr string) { 48 | c, _, err := zk.Connect([]string{addr}, time.Second) 49 | if err != nil { 50 | b.Fatal(err) 51 | } 52 | defer c.Close() 53 | for i := 0; i < b.N; i++ { 54 | s := fmt.Sprintf("/%d", i) 55 | v := fmt.Sprintf("%v", time.Now()) 56 | c.Create(s, []byte(v), 0, acl) 57 | c.Set("/", []byte(v), -1) 58 | } 59 | } 60 | 61 | func BenchmarkZetcdGet(b *testing.B) { benchGet(b, zetcdAddr) } 62 | func BenchmarkZKGet(b *testing.B) { benchGet(b, zkAddr) } 63 | 64 | func BenchmarkZetcdConnGet(b *testing.B) { benchConnGet(b, zetcdAddr) } 65 | func BenchmarkZKConnGet(b *testing.B) { benchConnGet(b, zkAddr) } 66 | 67 | func BenchmarkZetcdCreateSet(b *testing.B) { benchCreateSet(b, zetcdAddr) } 68 | func BenchmarkZKCreateSet(b *testing.B) { benchCreateSet(b, zkAddr) } 69 | -------------------------------------------------------------------------------- /stat.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | etcd "github.com/coreos/etcd/clientv3" 5 | ) 6 | 7 | func statGets(p string) []etcd.Op { 8 | return []etcd.Op{ 9 | etcd.OpGet("/zk/ctime/"+p, etcd.WithSerializable()), 10 | etcd.OpGet("/zk/mtime/"+p, etcd.WithSerializable(), 11 | etcd.WithSort(etcd.SortByModRevision, etcd.SortDescend)), 12 | etcd.OpGet("/zk/key/"+p, etcd.WithSerializable()), 13 | etcd.OpGet("/zk/cver/"+p, etcd.WithSerializable()), 14 | etcd.OpGet("/zk/aver/"+p, etcd.WithSerializable()), 15 | // to compute num children 16 | etcd.OpGet(getListPfx(p), etcd.WithSerializable(), etcd.WithPrefix()), 17 | } 18 | } 19 | 20 | func statTxn(txnresp *etcd.TxnResponse) (s Stat) { 21 | ctime := txnresp.Responses[0].GetResponseRange() 22 | mtime := txnresp.Responses[1].GetResponseRange() 23 | node := txnresp.Responses[2].GetResponseRange() 24 | cver := txnresp.Responses[3].GetResponseRange() 25 | aver := txnresp.Responses[4].GetResponseRange() 26 | children := txnresp.Responses[5].GetResponseRange() 27 | 28 | // XXX hack: need to format zk / node instead of this garbage 29 | if len(ctime.Kvs) != 0 { 30 | s.Ctime = decodeInt64(ctime.Kvs[0].Value) 31 | s.Czxid = rev2zxid(ctime.Kvs[0].ModRevision) 32 | s.Pzxid = s.Czxid 33 | } 34 | if len(mtime.Kvs) != 0 { 35 | s.Mzxid = rev2zxid(mtime.Kvs[0].ModRevision) 36 | s.Mtime = decodeInt64(mtime.Kvs[0].Value) 37 | s.Version = Ver(mtime.Kvs[0].Version - 1) 38 | } 39 | if len(cver.Kvs) != 0 { 40 | s.Cversion = Ver(decodeInt64(cver.Kvs[0].Value)) 41 | } 42 | if len(aver.Kvs) != 0 { 43 | s.Aversion = Ver(decodeInt64(aver.Kvs[0].Value)) 44 | } 45 | if len(node.Kvs) != 0 { 46 | s.EphemeralOwner = Sid(node.Kvs[0].Lease) 47 | s.DataLength = int32(len(node.Kvs[0].Value)) 48 | } 49 | s.NumChildren = int32(len(children.Kvs)) 50 | if s.NumChildren > 0 { 51 | s.Pzxid = rev2zxid(children.Kvs[0].ModRevision) 52 | } 53 | return s 54 | } 55 | -------------------------------------------------------------------------------- /xchk/authconn.go: -------------------------------------------------------------------------------- 1 | package xchk 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/chzchzchz/zetcd" 7 | ) 8 | 9 | // authConn implements an AuthConn that can fork off xchked AuthConns 10 | type authConn struct { 11 | zka zetcd.AuthConn 12 | workers []*authConnWorker 13 | } 14 | 15 | func newAuthConn(zka zetcd.AuthConn) *authConn { 16 | return &authConn{zka: zka} 17 | } 18 | 19 | func (ac *authConn) Read() (*zetcd.AuthRequest, error) { return ac.zka.Read() } 20 | 21 | // Write waits for the worker writes and returns a new conn if matching. 22 | func (ac *authConn) Write(ar zetcd.AuthResponse) (zetcd.Conn, error) { 23 | zkc, cerr := ac.zka.Write(ar) 24 | if cerr != nil { 25 | return nil, cerr 26 | } 27 | conn, workers := newConn(zkc, len(ac.workers)) 28 | for i, w := range ac.workers { 29 | w.connc <- workers[i] 30 | } 31 | return conn, nil 32 | } 33 | 34 | func (ac *authConn) Close() { 35 | ac.zka.Close() 36 | for _, w := range ac.workers { 37 | w.Close() 38 | } 39 | } 40 | 41 | // authConnWorker implements an AuthConn that is xchked by an authConn 42 | type authConnWorker struct { 43 | reqc chan *zetcd.AuthRequest 44 | respc chan *zetcd.AuthResponse 45 | connc chan zetcd.Conn 46 | } 47 | 48 | // worker creates a clone of the auth conn 49 | func (ac *authConn) worker() *authConnWorker { 50 | acw := &authConnWorker{ 51 | reqc: make(chan *zetcd.AuthRequest, 1), 52 | respc: make(chan *zetcd.AuthResponse, 1), 53 | connc: make(chan zetcd.Conn), 54 | } 55 | ac.workers = append(ac.workers, acw) 56 | return acw 57 | } 58 | 59 | func (acw *authConnWorker) Read() (*zetcd.AuthRequest, error) { 60 | if req := <-acw.reqc; req != nil { 61 | return req, nil 62 | } 63 | return nil, fmt.Errorf("lost request") 64 | } 65 | 66 | func (acw *authConnWorker) Write(ar zetcd.AuthResponse) (zetcd.Conn, error) { 67 | acw.respc <- &ar 68 | c := <-acw.connc 69 | if c == nil { 70 | return nil, fmt.Errorf("xchk error") 71 | } 72 | return c, nil 73 | } 74 | 75 | func (acw *authConnWorker) Close() { 76 | if acw.respc != nil { 77 | close(acw.respc) 78 | close(acw.reqc) 79 | acw.respc = nil 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /zk/zk.go: -------------------------------------------------------------------------------- 1 | package zk 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/chzchzchz/zetcd" 7 | ) 8 | 9 | // zkZK takes incoming ZK requests and forwards them to a remote ZK server 10 | type zkZK struct{ s *session } 11 | 12 | func newZK(s zetcd.Session) (*zkZK, error) { 13 | ss, ok := s.Backing().(*session) 14 | if !ok { 15 | return nil, fmt.Errorf("unexpected session type %t", s) 16 | } 17 | return &zkZK{ss}, nil 18 | } 19 | 20 | func (zz *zkZK) Create(xid zetcd.Xid, op *zetcd.CreateRequest) zetcd.ZKResponse { 21 | return <-zz.s.future(xid, op) 22 | } 23 | 24 | func (zz *zkZK) Delete(xid zetcd.Xid, op *zetcd.DeleteRequest) zetcd.ZKResponse { 25 | return <-zz.s.future(xid, op) 26 | } 27 | func (zz *zkZK) Exists(xid zetcd.Xid, op *zetcd.ExistsRequest) zetcd.ZKResponse { 28 | return <-zz.s.future(xid, op) 29 | } 30 | func (zz *zkZK) GetData(xid zetcd.Xid, op *zetcd.GetDataRequest) zetcd.ZKResponse { 31 | return <-zz.s.future(xid, op) 32 | } 33 | func (zz *zkZK) SetData(xid zetcd.Xid, op *zetcd.SetDataRequest) zetcd.ZKResponse { 34 | return <-zz.s.future(xid, op) 35 | } 36 | func (zz *zkZK) GetAcl(xid zetcd.Xid, op *zetcd.GetAclRequest) zetcd.ZKResponse { 37 | return <-zz.s.future(xid, op) 38 | } 39 | func (zz *zkZK) SetAcl(xid zetcd.Xid, op *zetcd.SetAclRequest) zetcd.ZKResponse { 40 | return <-zz.s.future(xid, op) 41 | } 42 | func (zz *zkZK) GetChildren(xid zetcd.Xid, op *zetcd.GetChildrenRequest) zetcd.ZKResponse { 43 | return <-zz.s.future(xid, op) 44 | } 45 | func (zz *zkZK) Sync(xid zetcd.Xid, op *zetcd.SyncRequest) zetcd.ZKResponse { 46 | return <-zz.s.future(xid, op) 47 | } 48 | func (zz *zkZK) Ping(xid zetcd.Xid, op *zetcd.PingRequest) zetcd.ZKResponse { 49 | return <-zz.s.future(xid, op) 50 | } 51 | 52 | func (zz *zkZK) GetChildren2(xid zetcd.Xid, op *zetcd.GetChildren2Request) zetcd.ZKResponse { 53 | return <-zz.s.future(xid, op) 54 | } 55 | func (zz *zkZK) Multi(xid zetcd.Xid, op *zetcd.MultiRequest) zetcd.ZKResponse { 56 | return <-zz.s.future(xid, op) 57 | } 58 | func (zz *zkZK) Close(xid zetcd.Xid, op *zetcd.CloseRequest) zetcd.ZKResponse { 59 | return <-zz.s.future(xid, op) 60 | } 61 | func (zz *zkZK) SetAuth(xid zetcd.Xid, op *zetcd.SetAuthRequest) zetcd.ZKResponse { 62 | return <-zz.s.future(xid, op) 63 | } 64 | func (zz *zkZK) SetWatches(xid zetcd.Xid, op *zetcd.SetWatchesRequest) zetcd.ZKResponse { 65 | return <-zz.s.future(xid, op) 66 | } 67 | -------------------------------------------------------------------------------- /cmd/zkctl/zkctl.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "path" 8 | "sort" 9 | "time" 10 | 11 | "github.com/samuel/go-zookeeper/zk" 12 | ) 13 | 14 | var ( 15 | acl = zk.WorldACL(zk.PermAll) 16 | ) 17 | 18 | func main() { 19 | s := flag.String("zkaddr", "127.0.0.1", "address of zookeeper server") 20 | flag.Parse() 21 | c, _, err := zk.Connect([]string{*s}, time.Second) 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | switch flag.Args()[0] { 27 | case "watch": 28 | dir := "/" 29 | if len(flag.Args()) > 1 { 30 | dir = flag.Args()[1] 31 | } 32 | watch(c, dir) 33 | case "ls": 34 | dir := "/" 35 | if len(flag.Args()) > 1 { 36 | dir = flag.Args()[1] 37 | } 38 | err = ls(c, dir) 39 | case "rm": 40 | err = rm(c, flag.Args()[1]) 41 | case "set": 42 | err = set(c, flag.Args()[1], flag.Args()[2]) 43 | case "get": 44 | err = get(c, flag.Args()[1]) 45 | case "put": 46 | err = put(c, flag.Args()[1], flag.Args()[2], 0) 47 | case "eput": 48 | err = put(c, flag.Args()[1], flag.Args()[2], zk.FlagEphemeral) 49 | case "sput": 50 | err = put(c, flag.Args()[1], flag.Args()[2], zk.FlagSequence) 51 | } 52 | 53 | if err != nil { 54 | fmt.Println(err) 55 | os.Exit(1) 56 | } 57 | } 58 | 59 | func watch(c *zk.Conn, dir string) { 60 | fmt.Println("watch dir", dir) 61 | children, stat, ch, err := c.ChildrenW(dir) 62 | if err != nil { 63 | panic(err) 64 | } 65 | fmt.Printf("%+v %+v\n", children, stat) 66 | e := <-ch 67 | fmt.Printf("%+v\n", e) 68 | } 69 | 70 | func ls(c *zk.Conn, dir string) error { 71 | fmt.Println("ls dir", dir) 72 | children, stat, err := c.Children(dir) 73 | if err != nil { 74 | return err 75 | } 76 | sort.Sort(sort.StringSlice(children)) 77 | fmt.Println("Children:") 78 | for _, c := range children { 79 | fmt.Printf("%s (%s)\n", path.Clean(dir+"/"+c), c) 80 | } 81 | fmt.Printf("Stat: %+v\n", stat) 82 | return nil 83 | } 84 | 85 | func put(c *zk.Conn, path, data string, fl int32) error { 86 | // TODO: descriptive acls 87 | _, err := c.Create(path, []byte(data), fl, acl) 88 | return err 89 | } 90 | 91 | func set(c *zk.Conn, path, data string) error { 92 | _, err := c.Set(path, []byte(data), -1) 93 | return err 94 | } 95 | 96 | func rm(c *zk.Conn, path string) error { 97 | return c.Delete(path, -1) 98 | } 99 | 100 | func get(c *zk.Conn, path string) error { 101 | dat, st, err := c.Get(path) 102 | if err == nil { 103 | fmt.Println(dat) 104 | fmt.Printf("Stat:\n%+v\n", st) 105 | } 106 | return err 107 | } 108 | -------------------------------------------------------------------------------- /zklog.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "github.com/golang/glog" 5 | ) 6 | 7 | type zkLog struct{ zk ZK } 8 | 9 | func NewZKLog(zk ZK) ZK { 10 | return &zkLog{zk} 11 | } 12 | 13 | func (zl *zkLog) Create(xid Xid, op *CreateRequest) ZKResponse { 14 | glog.V(7).Infof("Create(%v,%+v)", xid, *op) 15 | return zl.zk.Create(xid, op) 16 | } 17 | 18 | func (zl *zkLog) Delete(xid Xid, op *DeleteRequest) ZKResponse { 19 | glog.V(7).Infof("Delete(%v,%+v)", xid, *op) 20 | return zl.zk.Delete(xid, op) 21 | } 22 | 23 | func (zl *zkLog) Exists(xid Xid, op *ExistsRequest) ZKResponse { 24 | glog.V(7).Infof("Exists(%v,%+v)", xid, *op) 25 | return zl.zk.Exists(xid, op) 26 | } 27 | 28 | func (zl *zkLog) GetData(xid Xid, op *GetDataRequest) ZKResponse { 29 | glog.V(7).Infof("GetData(%v,%+v)", xid, *op) 30 | return zl.zk.GetData(xid, op) 31 | } 32 | 33 | func (zl *zkLog) SetData(xid Xid, op *SetDataRequest) ZKResponse { 34 | glog.V(7).Infof("SetData(%v,%+v)", xid, *op) 35 | return zl.zk.SetData(xid, op) 36 | } 37 | 38 | func (zl *zkLog) GetAcl(xid Xid, op *GetAclRequest) ZKResponse { 39 | glog.V(7).Infof("GetAcl(%v,%+v)", xid, *op) 40 | return zl.zk.GetAcl(xid, op) 41 | } 42 | 43 | func (zl *zkLog) SetAcl(xid Xid, op *SetAclRequest) ZKResponse { 44 | glog.V(7).Infof("SetAcl(%v,%+v)", xid, *op) 45 | return zl.zk.SetAcl(xid, op) 46 | } 47 | 48 | func (zl *zkLog) GetChildren(xid Xid, op *GetChildrenRequest) ZKResponse { 49 | glog.V(7).Infof("GetChildren(%v,%+v)", xid, *op) 50 | return zl.zk.GetChildren(xid, op) 51 | } 52 | 53 | func (zl *zkLog) Sync(xid Xid, op *SyncRequest) ZKResponse { 54 | glog.V(7).Infof("Sync(%v,%+v)", xid, *op) 55 | return zl.zk.Sync(xid, op) 56 | } 57 | 58 | func (zl *zkLog) Ping(xid Xid, op *PingRequest) ZKResponse { 59 | glog.V(7).Infof("Ping(%v,%+v)", xid, *op) 60 | return zl.zk.Ping(xid, op) 61 | } 62 | 63 | func (zl *zkLog) GetChildren2(xid Xid, op *GetChildren2Request) ZKResponse { 64 | glog.V(7).Infof("GetChildren2(%v,%+v)", xid, *op) 65 | return zl.zk.GetChildren2(xid, op) 66 | } 67 | 68 | func (zl *zkLog) Multi(xid Xid, op *MultiRequest) ZKResponse { 69 | glog.V(7).Infof("Multi(%v,%+v)", xid, *op) 70 | return zl.zk.Multi(xid, op) 71 | } 72 | 73 | func (zl *zkLog) Close(xid Xid, op *CloseRequest) ZKResponse { 74 | glog.V(7).Infof("Close(%v,%+v)", xid, *op) 75 | return zl.zk.Close(xid, op) 76 | } 77 | 78 | func (zl *zkLog) SetAuth(xid Xid, op *SetAuthRequest) ZKResponse { 79 | glog.V(7).Infof("SetAuth(%v,%+v)", xid, *op) 80 | return zl.zk.SetAuth(xid, op) 81 | } 82 | 83 | func (zl *zkLog) SetWatches(xid Xid, op *SetWatchesRequest) ZKResponse { 84 | glog.V(7).Infof("SetWatches(%v,%+v)", xid, *op) 85 | return zl.zk.SetWatches(xid, op) 86 | } 87 | -------------------------------------------------------------------------------- /docker/kafka/start-kafka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Optional ENV variables: 4 | # * ADVERTISED_HOST: the external ip for the container, e.g. `docker-machine ip \`docker-machine active\`` 5 | # * ADVERTISED_PORT: the external port for Kafka, e.g. 9092 6 | # * ZK_CHROOT: the zookeeper chroot that's used by Kafka (without / prefix), e.g. "kafka" 7 | # * LOG_RETENTION_HOURS: the minimum age of a log file in hours to be eligible for deletion (default is 168, for 1 week) 8 | # * LOG_RETENTION_BYTES: configure the size at which segments are pruned from the log, (default is 1073741824, for 1GB) 9 | # * NUM_PARTITIONS: configure the default number of log partitions per topic 10 | 11 | # Set the external host and port 12 | if [ ! -z "$ADVERTISED_HOST" ]; then 13 | echo "advertised host: $ADVERTISED_HOST" 14 | sed -r -i "s/#(advertised.host.name)=(.*)/\1=$ADVERTISED_HOST/g" $KAFKA_HOME/config/server.properties 15 | fi 16 | if [ ! -z "$ADVERTISED_PORT" ]; then 17 | echo "advertised port: $ADVERTISED_PORT" 18 | sed -r -i "s/#(advertised.port)=(.*)/\1=$ADVERTISED_PORT/g" $KAFKA_HOME/config/server.properties 19 | fi 20 | 21 | # configure kafka 22 | sed -r -i "s/(zookeeper.connect)=(.*)/\1=$ZK_HOST/g" $KAFKA_HOME/config/server.properties 23 | 24 | # Allow specification of log retention policies 25 | if [ ! -z "$LOG_RETENTION_HOURS" ]; then 26 | echo "log retention hours: $LOG_RETENTION_HOURS" 27 | sed -r -i "s/(log.retention.hours)=(.*)/\1=$LOG_RETENTION_HOURS/g" $KAFKA_HOME/config/server.properties 28 | fi 29 | if [ ! -z "$LOG_RETENTION_BYTES" ]; then 30 | echo "log retention bytes: $LOG_RETENTION_BYTES" 31 | sed -r -i "s/#(log.retention.bytes)=(.*)/\1=$LOG_RETENTION_BYTES/g" $KAFKA_HOME/config/server.properties 32 | fi 33 | 34 | # Configure the default number of log partitions per topic 35 | if [ ! -z "$NUM_PARTITIONS" ]; then 36 | echo "default number of partition: $NUM_PARTITIONS" 37 | sed -r -i "s/(num.partitions)=(.*)/\1=$NUM_PARTITIONS/g" $KAFKA_HOME/config/server.properties 38 | fi 39 | 40 | # Enable/disable auto creation of topics 41 | if [ ! -z "$AUTO_CREATE_TOPICS" ]; then 42 | echo "auto.create.topics.enable: $AUTO_CREATE_TOPICS" 43 | echo "auto.create.topics.enable=$AUTO_CREATE_TOPICS" >> $KAFKA_HOME/config/server.properties 44 | fi 45 | 46 | if [ ! -z "$BROKER_ID" ]; then 47 | echo "broker.id: $BROKER_ID" 48 | sed -r -i "s/(broker.id)=(.*)/\1=$BROKER_ID/g" $KAFKA_HOME/config/server.properties 49 | echo "log.dir: /tmp/kafka-logs-$BROKER_ID" 50 | sed -r -i "s|(log.dir)=(.*)|\1=/tmp/kafka-logs-$BROKER_ID|g" $KAFKA_HOME/config/server.properties 51 | fi 52 | 53 | # Run Kafka 54 | $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties 55 | -------------------------------------------------------------------------------- /cmd/zetcd/zetcd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "net" 7 | "os" 8 | 9 | "github.com/chzchzchz/zetcd" 10 | "github.com/chzchzchz/zetcd/xchk" 11 | "github.com/chzchzchz/zetcd/zk" 12 | etcd "github.com/coreos/etcd/clientv3" 13 | "golang.org/x/net/context" 14 | ) 15 | 16 | type personality struct { 17 | authf zetcd.AuthFunc 18 | zkf zetcd.ZKFunc 19 | ctx context.Context 20 | } 21 | 22 | func newZKEtcd(etcdAddr string) (p personality) { 23 | // talk to the etcd3 server 24 | cfg := etcd.Config{Endpoints: []string{etcdAddr}} 25 | c, err := etcd.New(cfg) 26 | if err != nil { 27 | panic(err) 28 | } 29 | p.authf = zetcd.NewAuth(c) 30 | p.zkf = zetcd.NewZK(c) 31 | p.ctx = c.Ctx() 32 | return p 33 | } 34 | 35 | func newBridge(bridgeAddr string) (p personality) { 36 | // proxy to zk server 37 | p.authf = zk.NewAuth([]string{bridgeAddr}) 38 | p.zkf = zk.NewZK() 39 | p.ctx = context.Background() 40 | return p 41 | } 42 | 43 | func newOracle(etcdAddr, bridgeAddr, oracle string) (p personality) { 44 | var cper, oper personality 45 | switch oracle { 46 | case "zk": 47 | cper, oper = newZKEtcd(etcdAddr), newBridge(bridgeAddr) 48 | case "etcd": 49 | oper, cper = newZKEtcd(etcdAddr), newBridge(bridgeAddr) 50 | default: 51 | fmt.Println("oracle expected etcd or zk, got", oracle) 52 | os.Exit(1) 53 | } 54 | p.authf = xchk.NewAuth(cper.authf, oper.authf) 55 | p.zkf = xchk.NewZK(cper.zkf, oper.zkf) 56 | p.ctx = cper.ctx 57 | return p 58 | } 59 | 60 | func main() { 61 | etcdAddr := flag.String("endpoint", "", "etcd3 client address") 62 | zkaddr := flag.String("zkaddr", "", "address for serving zookeeper clients") 63 | oracle := flag.String("oracle", "", "oracle zookeeper server address") 64 | bridgeAddr := flag.String("zkbridge", "", "bridge zookeeper server address") 65 | 66 | flag.Parse() 67 | fmt.Println("Running zetcd proxy") 68 | 69 | if len(*zkaddr) == 0 { 70 | fmt.Println("expected -zkaddr") 71 | os.Exit(1) 72 | } 73 | 74 | // listen on zookeeper server port 75 | ln, err := net.Listen("tcp", *zkaddr) 76 | if err != nil { 77 | os.Exit(1) 78 | } 79 | 80 | var p personality 81 | serv := zetcd.Serve 82 | switch { 83 | case *oracle != "": 84 | if len(*etcdAddr) == 0 || len(*bridgeAddr) == 0 { 85 | fmt.Println("expected -endpoint and -zkbridge") 86 | os.Exit(1) 87 | } 88 | p = newOracle(*etcdAddr, *bridgeAddr, *oracle) 89 | serv = zetcd.ServeSerial 90 | case len(*etcdAddr) != 0 && len(*bridgeAddr) != 0: 91 | fmt.Println("expected -endpoint or -zkbridge but not both") 92 | os.Exit(1) 93 | case len(*etcdAddr) != 0: 94 | p = newZKEtcd(*etcdAddr) 95 | case len(*bridgeAddr) != 0: 96 | p = newBridge(*bridgeAddr) 97 | default: 98 | fmt.Println("expected -endpoint or -zkbridge") 99 | os.Exit(1) 100 | } 101 | 102 | serv(p.ctx, ln, p.authf, p.zkf) 103 | } 104 | -------------------------------------------------------------------------------- /op.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import "fmt" 4 | 5 | // use a map 6 | 7 | func op2req(op Op) interface{} { 8 | switch op { 9 | case opGetChildren2: 10 | return &GetChildren2Request{} 11 | case opPing: 12 | return &PingRequest{} 13 | case opCreate: 14 | return &CreateRequest{} 15 | case opCheck: 16 | return &CheckVersionRequest{} 17 | case opSetWatches: 18 | return &SetWatchesRequest{} 19 | case opSetData: 20 | return &SetDataRequest{} 21 | case opGetData: 22 | return &GetDataRequest{} 23 | case opDelete: 24 | return &DeleteRequest{} 25 | case opExists: 26 | return &ExistsRequest{} 27 | case opGetAcl: 28 | return &GetAclRequest{} 29 | case opSetAcl: 30 | return &SetAclRequest{} 31 | case opGetChildren: 32 | return &GetChildrenRequest{} 33 | case opSync: 34 | return &SyncRequest{} 35 | case opMulti: 36 | return &MultiRequest{} 37 | case opClose: 38 | return &CloseRequest{} 39 | case opSetAuth: 40 | return &SetAuthRequest{} 41 | 42 | default: 43 | fmt.Println("unknown opcode ", op) 44 | } 45 | return nil 46 | } 47 | 48 | func op2resp(op Op) interface{} { 49 | switch op { 50 | case opGetChildren2: 51 | return &GetChildren2Response{} 52 | case opPing: 53 | return &PingResponse{} 54 | case opCreate: 55 | return &CreateResponse{} 56 | case opSetWatches: 57 | return &SetWatchesResponse{} 58 | case opSetData: 59 | return &SetDataResponse{} 60 | case opGetData: 61 | return &GetDataResponse{} 62 | case opDelete: 63 | return &DeleteResponse{} 64 | case opExists: 65 | return &ExistsResponse{} 66 | case opGetAcl: 67 | return &GetAclResponse{} 68 | case opSetAcl: 69 | return &SetAclResponse{} 70 | case opGetChildren: 71 | return &GetChildrenResponse{} 72 | case opSync: 73 | return &SyncResponse{} 74 | case opMulti: 75 | return &MultiResponse{} 76 | case opClose: 77 | return &CloseResponse{} 78 | case opSetAuth: 79 | return &SetAuthResponse{} 80 | default: 81 | fmt.Println("unknown opcode ", op) 82 | } 83 | return nil 84 | } 85 | 86 | func req2op(req interface{}) Op { 87 | switch req.(type) { 88 | case *GetChildren2Request: 89 | return opGetChildren2 90 | case *PingRequest: 91 | return opPing 92 | case *CreateRequest: 93 | return opCreate 94 | case *SetWatchesRequest: 95 | return opSetWatches 96 | case *SetDataRequest: 97 | return opSetData 98 | case *GetDataRequest: 99 | return opGetData 100 | case *DeleteRequest: 101 | return opDelete 102 | case *ExistsRequest: 103 | return opExists 104 | case *GetAclRequest: 105 | return opGetAcl 106 | case *SetAclRequest: 107 | return opSetAcl 108 | case *GetChildrenRequest: 109 | return opGetChildren 110 | case *SyncRequest: 111 | return opSync 112 | case *MultiRequest: 113 | return opMulti 114 | case *CloseRequest: 115 | return opClose 116 | case *SetAuthRequest: 117 | return opSetAuth 118 | default: 119 | fmt.Println("unknown request", req) 120 | } 121 | return opInvalid 122 | } 123 | -------------------------------------------------------------------------------- /conn.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "net" 7 | "sync" 8 | 9 | "github.com/golang/glog" 10 | ) 11 | 12 | type Conn interface { 13 | Send(xid Xid, zxid ZXid, resp interface{}) error 14 | Read() <-chan ZKRequest 15 | StopNotify() <-chan struct{} 16 | Close() 17 | } 18 | 19 | type conn struct { 20 | zkc net.Conn 21 | outc chan []byte 22 | readc chan ZKRequest 23 | mu sync.RWMutex 24 | 25 | // stopc is closed to shutdown session 26 | stopc chan struct{} 27 | // donec is closed to signal session is torn down 28 | donec chan struct{} 29 | } 30 | 31 | type ZKRequest struct { 32 | xid Xid 33 | req interface{} 34 | err error 35 | } 36 | 37 | func (zk *ZKRequest) String() string { 38 | if zk.req != nil { 39 | return fmt.Sprintf("{xid:%v req:%T:%+v}", zk.xid, zk.req, zk.req) 40 | } 41 | if zk.err != nil { 42 | return fmt.Sprintf("{xid:%v err:%q}", zk.xid, zk.err) 43 | } 44 | return fmt.Sprintf("{xid:%v err:%q}", zk.xid, zk.err) 45 | } 46 | 47 | func NewConn(zk net.Conn) Conn { 48 | outc := make(chan []byte, 16) 49 | c := &conn{ 50 | zkc: zk, 51 | outc: outc, 52 | readc: make(chan ZKRequest), 53 | stopc: make(chan struct{}), 54 | donec: make(chan struct{}), 55 | } 56 | 57 | go func() { 58 | defer close(c.readc) 59 | for { 60 | xid, req, err := readReqOp(c.zkc) 61 | select { 62 | case c.readc <- ZKRequest{xid, req, err}: 63 | if err != nil { 64 | return 65 | } 66 | case <-c.stopc: 67 | return 68 | case <-c.donec: 69 | return 70 | } 71 | } 72 | }() 73 | 74 | go func() { 75 | defer close(c.donec) 76 | for msg := range outc { 77 | if _, err := c.zkc.Write(msg); err != nil { 78 | return 79 | } 80 | } 81 | }() 82 | 83 | return c 84 | } 85 | 86 | func (c *conn) Read() <-chan ZKRequest { return c.readc } 87 | 88 | func (c *conn) Send(xid Xid, zxid ZXid, resp interface{}) error { 89 | buf := make([]byte, 2*1024*1024) 90 | hdr := &ResponseHeader{Xid: xid, Zxid: zxid, Err: errOk} 91 | 92 | _, isEv := resp.(*WatcherEvent) 93 | if isEv { 94 | hdr.Xid = -1 95 | } 96 | 97 | ec, hasErr := resp.(*ErrCode) 98 | if hasErr { 99 | hdr.Err = *ec 100 | } 101 | n1, err1 := encodePacket(buf[4:], hdr) 102 | if err1 != nil { 103 | return err1 104 | } 105 | pktlen := n1 106 | if !hasErr { 107 | n2, err2 := encodePacket(buf[4+n1:], resp) 108 | if err2 != nil { 109 | return err2 110 | } 111 | pktlen += n2 112 | } 113 | 114 | binary.BigEndian.PutUint32(buf[:4], uint32(pktlen)) 115 | c.mu.RLock() 116 | defer c.mu.RUnlock() 117 | select { 118 | case c.outc <- buf[:4+pktlen]: 119 | glog.V(9).Infof("conn.Send(xid=%v, zxid=%v, %+v)", xid, zxid, resp) 120 | case <-c.donec: 121 | } 122 | return nil 123 | } 124 | 125 | func (c *conn) Close() { 126 | c.mu.Lock() 127 | if c.outc != nil { 128 | close(c.stopc) 129 | close(c.outc) 130 | c.outc = nil 131 | c.zkc.Close() 132 | } 133 | c.mu.Unlock() 134 | <-c.donec 135 | } 136 | 137 | func (c *conn) StopNotify() <-chan struct{} { return c.donec } 138 | -------------------------------------------------------------------------------- /xchk/session.go: -------------------------------------------------------------------------------- 1 | package xchk 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | "github.com/chzchzchz/zetcd" 8 | ) 9 | 10 | type sessCreds struct { 11 | sid zetcd.Sid 12 | pass []byte 13 | } 14 | 15 | type sessionPool struct { 16 | mu sync.Mutex 17 | // o2cSid maps oracle sids to candidate sids 18 | o2cSid map[zetcd.Sid]sessCreds 19 | } 20 | 21 | func (sp *sessionPool) get(sid zetcd.Sid) (zetcd.Sid, []byte) { 22 | sp.mu.Lock() 23 | defer sp.mu.Unlock() 24 | if sc, ok := sp.o2cSid[sid]; ok { 25 | return sc.sid, sc.pass 26 | } 27 | return 0, nil 28 | } 29 | 30 | func (sp *sessionPool) put(osid, csid zetcd.Sid, pwd []byte) { 31 | sp.mu.Lock() 32 | sp.o2cSid[osid] = sessCreds{csid, pwd} 33 | sp.mu.Unlock() 34 | } 35 | 36 | func newSessionPool() *sessionPool { 37 | return &sessionPool{o2cSid: make(map[zetcd.Sid]sessCreds)} 38 | } 39 | 40 | // session intercepts Sends so responses can be xchked 41 | type session struct { 42 | zetcd.Conn 43 | oracle zetcd.Session 44 | candidate zetcd.Session 45 | req zetcd.ConnectRequest 46 | 47 | sp *sessionPool 48 | } 49 | 50 | func (s *session) Close() { 51 | s.Conn.Close() 52 | s.oracle.Close() 53 | s.candidate.Close() 54 | } 55 | 56 | func Auth(sp *sessionPool, zka zetcd.AuthConn, cAuth, oAuth zetcd.AuthFunc) (zetcd.Session, error) { 57 | xzka := newAuthConn(zka) 58 | defer xzka.Close() 59 | 60 | var oSession, cSession zetcd.Session 61 | ozka, czka := xzka.worker(), xzka.worker() 62 | oerrc, cerrc := make(chan error, 1), make(chan error, 1) 63 | 64 | go func() { 65 | s, oerr := oAuth(ozka) 66 | oSession = s 67 | oerrc <- oerr 68 | }() 69 | go func() { 70 | s, cerr := cAuth(czka) 71 | cSession = s 72 | cerrc <- cerr 73 | }() 74 | 75 | // get client request 76 | ar, arerr := xzka.Read() 77 | if arerr != nil { 78 | return nil, arerr 79 | } 80 | 81 | // send to oracle as usual 82 | ozka.reqc <- ar 83 | 84 | // send to candidate patched with right session info 85 | creq := *ar.Req 86 | if creq.SessionID != 0 { 87 | creq.SessionID, creq.Passwd = sp.get(creq.SessionID) 88 | } 89 | czka.reqc <- &zetcd.AuthRequest{Req: &creq} 90 | 91 | oresp, cresp := <-ozka.respc, <-czka.respc 92 | vNil := oresp == nil && cresp == nil 93 | vVal := oresp != nil && cresp != nil 94 | if !vNil && !vVal { 95 | return nil, fmt.Errorf("mismatch %+v vs %+v", oresp, cresp) 96 | } 97 | if oresp == nil { 98 | return nil, fmt.Errorf("bad oracle response") 99 | } 100 | 101 | // save session info in case of resume 102 | sp.put(oresp.Resp.SessionID, cresp.Resp.SessionID, cresp.Resp.Passwd) 103 | 104 | xzkc, xerr := xzka.Write(zetcd.AuthResponse{oresp.Resp}) 105 | oerr, cerr := <-oerrc, <-cerrc 106 | if xerr != nil || cerr != nil || oerr != nil { 107 | return nil, fmt.Errorf("err: xchk: %v. oracle: %v. candidate: %v", oerr, cerr) 108 | } 109 | 110 | return &session{ 111 | Conn: xzkc, 112 | oracle: oSession, 113 | candidate: cSession, 114 | req: *ar.Req, 115 | sp: sp, 116 | }, nil 117 | } 118 | 119 | func (s *session) ConnReq() zetcd.ConnectRequest { return s.req } 120 | 121 | func (s *session) Backing() interface{} { return s } 122 | 123 | func (s *session) Sid() zetcd.Sid { return s.oracle.Sid() } 124 | 125 | func (s *session) Wait(rev zetcd.ZXid, path string, evtype zetcd.EventType) { panic("stub") } 126 | 127 | func (s *session) Watch(rev zetcd.ZXid, xid zetcd.Xid, path string, evtype zetcd.EventType, cb func(zetcd.ZXid)) { 128 | panic("stuB") 129 | } 130 | 131 | func (s *session) ZXid() zetcd.ZXid { panic("uh") } 132 | -------------------------------------------------------------------------------- /client.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "net" 7 | "sync" 8 | 9 | "golang.org/x/net/context" 10 | ) 11 | 12 | // client represents a client that connects to a zk server. 13 | type client struct { 14 | ctx context.Context 15 | zkc net.Conn 16 | outc chan []byte 17 | readc chan ZKResponse 18 | xids map[Xid]Op 19 | mu sync.RWMutex 20 | 21 | // stopc is closed to shutdown session 22 | stopc chan struct{} 23 | // donec is closed to signal session is torn down 24 | donec chan struct{} 25 | } 26 | 27 | type Client interface { 28 | Send(xid Xid, req interface{}) error 29 | Read() <-chan ZKResponse 30 | StopNotify() <-chan struct{} 31 | Close() 32 | } 33 | 34 | type ZKResponse struct { 35 | // data to be send back to the proxy's client 36 | 37 | Hdr *ResponseHeader 38 | Resp interface{} 39 | 40 | // Err is from transmission errors, etc 41 | Err error 42 | } 43 | 44 | func NewClient(ctx context.Context, zk net.Conn) Client { 45 | outc := make(chan []byte, 16) 46 | c := &client{ 47 | ctx: ctx, 48 | zkc: zk, 49 | outc: outc, 50 | readc: make(chan ZKResponse), 51 | xids: make(map[Xid]Op), 52 | stopc: make(chan struct{}), 53 | donec: make(chan struct{}), 54 | } 55 | 56 | go func() { 57 | defer close(c.readc) 58 | xid2op := func(xid Xid) interface{} { return c.xid2resp(xid) } 59 | for { 60 | hdr, resp, err := readRespOp(c.zkc, xid2op) 61 | if hdr != nil { 62 | c.ackXid(hdr.Xid) 63 | } 64 | select { 65 | case c.readc <- ZKResponse{hdr, resp, err}: 66 | if err != nil { 67 | return 68 | } 69 | case <-c.stopc: 70 | return 71 | case <-c.donec: 72 | return 73 | } 74 | } 75 | }() 76 | 77 | go func() { 78 | defer close(c.donec) 79 | for msg := range outc { 80 | if _, err := c.zkc.Write(msg); err != nil { 81 | return 82 | } 83 | } 84 | }() 85 | 86 | return c 87 | } 88 | 89 | func (c *client) ackXid(xid Xid) { 90 | if xid == -1 { 91 | return 92 | } 93 | c.mu.Lock() 94 | delete(c.xids, xid) 95 | c.mu.Unlock() 96 | } 97 | 98 | func (c *client) xid2resp(xid Xid) interface{} { 99 | c.mu.Lock() 100 | defer c.mu.Unlock() 101 | op, ok := c.xids[xid] 102 | if !ok { 103 | return fmt.Sprintf("unexpected xid %x", xid) 104 | } 105 | return op2resp(op) 106 | } 107 | 108 | // Read receives zookeeper responses. 109 | func (c *client) Read() <-chan ZKResponse { return c.readc } 110 | 111 | // Send sends a zookeeper request. 112 | func (c *client) Send(xid Xid, req interface{}) error { 113 | hdr := &requestHeader{Xid: xid, Opcode: req2op(req)} 114 | if hdr.Opcode == opInvalid { 115 | return ErrAPIError 116 | } 117 | 118 | buf := make([]byte, 2*1024*1024) 119 | 120 | n1, err1 := encodePacket(buf[4:], hdr) 121 | if err1 != nil { 122 | return err1 123 | } 124 | pktlen := n1 125 | n2, err2 := encodePacket(buf[4+n1:], req) 126 | if err2 != nil { 127 | return err2 128 | } 129 | pktlen += n2 130 | 131 | // record the xid 132 | c.mu.Lock() 133 | c.xids[xid] = hdr.Opcode 134 | c.mu.Unlock() 135 | 136 | binary.BigEndian.PutUint32(buf[:4], uint32(pktlen)) 137 | c.mu.RLock() 138 | defer c.mu.RUnlock() 139 | select { 140 | case c.outc <- buf[:4+pktlen]: 141 | case <-c.donec: 142 | case <-c.ctx.Done(): 143 | return c.ctx.Err() 144 | } 145 | return nil 146 | } 147 | 148 | func (c *client) Close() { 149 | c.mu.Lock() 150 | if c.outc != nil { 151 | close(c.stopc) 152 | close(c.outc) 153 | c.outc = nil 154 | c.zkc.Close() 155 | } 156 | c.mu.Unlock() 157 | <-c.donec 158 | } 159 | 160 | func (c *client) StopNotify() <-chan struct{} { return c.donec } 161 | -------------------------------------------------------------------------------- /server.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "net" 5 | "sync" 6 | 7 | "github.com/golang/glog" 8 | "golang.org/x/net/context" 9 | ) 10 | 11 | type acceptHandler func(conn net.Conn, auth AuthFunc, zk ZKFunc) 12 | 13 | // Serve will serve multiple sessions in concurrently. 14 | func Serve(ctx context.Context, ln net.Listener, auth AuthFunc, zk ZKFunc) { 15 | serveByHandler(handleSessionSerialRequests, ctx, ln, auth, zk) 16 | } 17 | 18 | // ServeSerial has at most one inflight request at a time so two servers can be 19 | // reliably cross checked. 20 | func ServeSerial(ctx context.Context, ln net.Listener, auth AuthFunc, zk ZKFunc) { 21 | serveByHandler(newHandleGlobalSerialRequests(), ctx, ln, auth, zk) 22 | } 23 | 24 | func newHandleGlobalSerialRequests() acceptHandler { 25 | var mu sync.Mutex 26 | return func(conn net.Conn, auth AuthFunc, zk ZKFunc) { 27 | s, zke, serr := openSession(conn, auth, zk) 28 | if serr != nil { 29 | return 30 | } 31 | defer s.Close() 32 | glog.V(9).Infof("serving global serial session requests on s=%+v", s) 33 | for zkreq := range s.Read() { 34 | mu.Lock() 35 | err := serveRequest(s, zke, zkreq) 36 | mu.Unlock() 37 | if err != nil { 38 | return 39 | } 40 | } 41 | } 42 | } 43 | 44 | func newHandleGlobalSerialSessions(ch acceptHandler) acceptHandler { 45 | var mu sync.Mutex 46 | return func(conn net.Conn, auth AuthFunc, zk ZKFunc) { 47 | mu.Lock() 48 | defer mu.Unlock() 49 | ch(conn, auth, zk) 50 | } 51 | } 52 | 53 | func handleSessionSerialRequests(conn net.Conn, auth AuthFunc, zk ZKFunc) { 54 | s, zke, serr := openSession(conn, auth, zk) 55 | if serr != nil { 56 | return 57 | } 58 | defer s.Close() 59 | glog.V(9).Infof("serving serial session requests on id=%x", s.Sid()) 60 | for zkreq := range s.Read() { 61 | if err := serveRequest(s, zke, zkreq); err != nil { 62 | return 63 | } 64 | } 65 | } 66 | 67 | func handleSessionConcurrentRequests(conn net.Conn, auth AuthFunc, zk ZKFunc) { 68 | s, zke, serr := openSession(conn, auth, zk) 69 | if serr != nil { 70 | return 71 | } 72 | defer s.Close() 73 | var wg sync.WaitGroup 74 | wg.Add(1) 75 | glog.V(9).Infof("serving concurrent session requests on id=%x", s.Sid()) 76 | for zkreq := range s.Read() { 77 | wg.Add(1) 78 | go func() { 79 | defer wg.Done() 80 | if err := serveRequest(s, zke, zkreq); err != nil { 81 | return 82 | } 83 | }() 84 | } 85 | wg.Wait() 86 | } 87 | 88 | func newHandleLogClose(ch acceptHandler) acceptHandler { 89 | return func(conn net.Conn, auth AuthFunc, zk ZKFunc) { 90 | glog.V(6).Infof("closing remote connection %q", conn.RemoteAddr()) 91 | ch(conn, auth, zk) 92 | } 93 | } 94 | 95 | func serveRequest(s Session, zke ZK, zkreq ZKRequest) error { 96 | glog.V(9).Infof("zkreq=%v", &zkreq) 97 | if zkreq.err != nil { 98 | return zkreq.err 99 | } 100 | zkresp := DispatchZK(zke, zkreq.xid, zkreq.req) 101 | if zkresp.Err != nil { 102 | glog.V(9).Infof("dispatch error", zkresp.Err) 103 | return zkresp.Err 104 | } 105 | if zkresp.Hdr.Err == 0 { 106 | s.Send(zkresp.Hdr.Xid, zkresp.Hdr.Zxid, zkresp.Resp) 107 | } else { 108 | s.Send(zkresp.Hdr.Xid, zkresp.Hdr.Zxid, &zkresp.Hdr.Err) 109 | } 110 | return nil 111 | } 112 | 113 | func openSession(conn net.Conn, auth AuthFunc, zk ZKFunc) (Session, ZK, error) { 114 | glog.V(6).Infof("accepted remote connection %q", conn.RemoteAddr()) 115 | s, serr := auth(NewAuthConn(conn)) 116 | if serr != nil { 117 | return nil, nil, serr 118 | } 119 | zke, zkerr := zk(s) 120 | if zkerr != nil { 121 | s.Close() 122 | return nil, nil, zkerr 123 | } 124 | return s, zke, nil 125 | } 126 | 127 | func serveByHandler(h acceptHandler, ctx context.Context, ln net.Listener, auth AuthFunc, zk ZKFunc) { 128 | for { 129 | conn, err := ln.Accept() 130 | if err != nil { 131 | glog.V(5).Infof("Accept()=%v", err) 132 | } else { 133 | go h(conn, auth, zk) 134 | } 135 | select { 136 | case <-ctx.Done(): 137 | return 138 | default: 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /zk/session.go: -------------------------------------------------------------------------------- 1 | package zk 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "sync" 7 | 8 | "github.com/chzchzchz/zetcd" 9 | "github.com/golang/glog" 10 | "golang.org/x/net/context" 11 | ) 12 | 13 | type session struct { 14 | zetcd.Conn 15 | zetcd.Watches 16 | zkc zetcd.Client 17 | connReq zetcd.ConnectRequest 18 | sid zetcd.Sid 19 | 20 | ctx context.Context 21 | cancel context.CancelFunc 22 | 23 | mu sync.Mutex 24 | futures map[zetcd.Xid]chan zetcd.ZKResponse 25 | } 26 | 27 | func (s *session) Sid() zetcd.Sid { return s.sid } 28 | func (s *session) ZXid() zetcd.ZXid { return 111111 } 29 | 30 | func (s *session) ConnReq() zetcd.ConnectRequest { return s.connReq } 31 | func (s *session) Backing() interface{} { return s } 32 | 33 | func newSession(servers []string, zka zetcd.AuthConn) (*session, error) { 34 | defer zka.Close() 35 | glog.V(6).Infof("newSession(%s)", servers) 36 | req := zetcd.ConnectRequest{} 37 | areq, err := zka.Read() 38 | if err != nil { 39 | return nil, err 40 | } 41 | if req.ProtocolVersion != 0 { 42 | panic("unhandled req stuff!") 43 | } 44 | // create connection to zk server based on 'servers' 45 | zkconn, err := net.Dial("tcp", servers[0]) 46 | if err != nil { 47 | glog.V(6).Infof("failed to dial (%v)", err) 48 | return nil, err 49 | } 50 | // send connection request 51 | if err = zetcd.WritePacket(zkconn, areq.Req); err != nil { 52 | glog.V(6).Infof("failed to write connection request (%v)", err) 53 | zkconn.Close() 54 | return nil, err 55 | } 56 | // pipe back connectino result 57 | resp := zetcd.ConnectResponse{} 58 | if err := zetcd.ReadPacket(zkconn, &resp); err != nil { 59 | glog.V(6).Infof("failed to read connection response (%v)", err) 60 | return nil, err 61 | } 62 | // pass response back to proxy 63 | zkc, aerr := zka.Write(zetcd.AuthResponse{Resp: &resp}) 64 | if zkc == nil || aerr != nil { 65 | zkconn.Close() 66 | return nil, aerr 67 | } 68 | 69 | ctx, cancel := context.WithCancel(context.Background()) 70 | glog.V(6).Infof("auth resp OK (%+v)", resp) 71 | 72 | s := &session{ 73 | Conn: zkc, 74 | zkc: zetcd.NewClient(ctx, zkconn), 75 | connReq: req, 76 | sid: resp.SessionID, 77 | ctx: ctx, 78 | cancel: cancel, 79 | futures: make(map[zetcd.Xid]chan zetcd.ZKResponse), 80 | } 81 | go s.recvLoop() 82 | return s, nil 83 | } 84 | 85 | func (s *session) future(xid zetcd.Xid, op interface{}) <-chan zetcd.ZKResponse { 86 | ch := make(chan zetcd.ZKResponse, 1) 87 | if s.futures == nil { 88 | glog.V(6).Infof("futuresClosed=%+v", op) 89 | ch <- zetcd.ZKResponse{Err: fmt.Errorf("closed")} 90 | return ch 91 | } 92 | s.mu.Lock() 93 | s.futures[xid] = ch 94 | s.mu.Unlock() 95 | if err := s.zkc.Send(xid, op); err != nil { 96 | ch <- zetcd.ZKResponse{Err: err} 97 | s.mu.Lock() 98 | delete(s.futures, xid) 99 | s.mu.Unlock() 100 | return ch 101 | } 102 | glog.V(6).Infof("waitFutureSendResp=%+v", op) 103 | return ch 104 | } 105 | 106 | // recvLoop forwards responses from the real zk server to the zetcd connection. 107 | func (s *session) recvLoop() { 108 | defer func() { 109 | s.mu.Lock() 110 | for _, ch := range s.futures { 111 | close(ch) 112 | } 113 | s.futures = nil 114 | s.mu.Unlock() 115 | s.cancel() 116 | }() 117 | for resp := range s.zkc.Read() { 118 | if resp.Err != nil { 119 | glog.V(6).Infof("zk/zkresp=Err(%v)", resp.Err) 120 | return 121 | } 122 | glog.V(6).Infof("zk/zkresp=(%+v,%+v)", *resp.Hdr, resp.Resp) 123 | s.mu.Lock() 124 | if ch := s.futures[resp.Hdr.Xid]; ch != nil { 125 | ch <- resp 126 | delete(s.futures, resp.Hdr.Xid) 127 | s.mu.Unlock() 128 | continue 129 | } 130 | s.mu.Unlock() 131 | 132 | // out of band requests (i.e., watches) 133 | var r interface{} 134 | if resp.Hdr.Err != 0 { 135 | r = &resp.Hdr.Err 136 | } else { 137 | r = resp.Resp 138 | } 139 | 140 | glog.V(6).Infof("zk/zkSessOOB=%+v %+v", resp.Hdr, r) 141 | s.Send(resp.Hdr.Xid, resp.Hdr.Zxid, r) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /xchk/conn.go: -------------------------------------------------------------------------------- 1 | package xchk 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/chzchzchz/zetcd" 9 | "github.com/golang/glog" 10 | ) 11 | 12 | // conn implements a Conn that xchks several conns 13 | type conn struct { 14 | zkc zetcd.Conn 15 | stopc chan struct{} 16 | donec chan struct{} 17 | 18 | readc chan zetcd.ZKRequest 19 | sendc chan sendPkt 20 | 21 | // mu protects pktmap 22 | mu sync.Mutex 23 | 24 | // oobRespPath tracks out of band events by path 25 | oobRespPath map[string]chan sendPkt 26 | 27 | workers []*connWorker 28 | } 29 | 30 | func newConn(zkc zetcd.Conn, nworkers int) (*conn, []zetcd.Conn) { 31 | c := &conn{ 32 | zkc: zkc, 33 | stopc: make(chan struct{}), 34 | donec: make(chan struct{}), 35 | 36 | readc: make(chan zetcd.ZKRequest, 16), 37 | sendc: make(chan sendPkt, 16), 38 | 39 | oobRespPath: make(map[string]chan sendPkt), 40 | 41 | workers: make([]*connWorker, nworkers), 42 | } 43 | 44 | workers := make([]zetcd.Conn, len(c.workers)) 45 | for i := range c.workers { 46 | c.workers[i] = &connWorker{ 47 | readc: make(chan zetcd.ZKRequest, 16), 48 | stopc: make(chan struct{}), 49 | parent: c, 50 | } 51 | workers[i] = c.workers[i] 52 | } 53 | 54 | // collect sends from workers 55 | go c.sendLoop() 56 | return c, workers 57 | } 58 | 59 | func (c *conn) processSendOOB(sp sendPkt) { 60 | c.mu.Lock() 61 | defer c.mu.Unlock() 62 | if c.oobRespPath == nil { 63 | return 64 | } 65 | 66 | if sp.xid != -1 { 67 | panic("expected xid == -1") 68 | } 69 | 70 | if lastCh := c.oobRespPath[sp.wev.Path]; lastCh != nil { 71 | lastCh <- sp 72 | delete(c.oobRespPath, sp.wev.Path) 73 | return 74 | } 75 | ch := make(chan sendPkt, 1) 76 | c.oobRespPath[sp.wev.Path] = ch 77 | 78 | go func() { 79 | var newSp sendPkt 80 | var ok bool 81 | select { 82 | case newSp, ok = <-ch: 83 | if !ok { 84 | return 85 | } 86 | case <-time.After(3 * time.Second): 87 | glog.Warningf("xchk failed waited too long to match response to %+v", *sp.wev) 88 | newSp = sp 89 | } 90 | c.mu.Lock() 91 | if c.oobRespPath != nil { 92 | delete(c.oobRespPath, sp.wev.Path) 93 | } 94 | c.mu.Unlock() 95 | if *newSp.wev != *sp.wev { 96 | glog.Warningf("xchk failed (path:%q): %+v != %+v", sp.wev.Path, *sp.wev, *newSp.wev) 97 | } 98 | glog.V(6).Infof("xchkSendOOB response %+v", *sp.wev) 99 | c.zkc.Send(sp.xid, sp.zxid, sp.wev) 100 | }() 101 | } 102 | 103 | func (c *conn) sendLoop() { 104 | defer close(c.donec) 105 | for { 106 | var sp sendPkt 107 | select { 108 | case sp = <-c.sendc: 109 | case <-c.stopc: 110 | return 111 | } 112 | c.processSendOOB(sp) 113 | } 114 | } 115 | 116 | func (c *conn) Send(xid zetcd.Xid, zxid zetcd.ZXid, resp interface{}) error { 117 | glog.V(6).Infof("sendXchk Xid:%v ZXid:%v Resp:%+v", xid, zxid, resp) 118 | return c.zkc.Send(xid, zxid, resp) 119 | } 120 | 121 | func (c *conn) Read() <-chan zetcd.ZKRequest { return c.zkc.Read() } 122 | func (c *conn) StopNotify() <-chan struct{} { return c.stopc } 123 | 124 | func (c *conn) Close() { 125 | close(c.stopc) 126 | c.mu.Lock() 127 | for _, ch := range c.oobRespPath { 128 | close(ch) 129 | } 130 | c.oobRespPath = nil 131 | c.mu.Unlock() 132 | <-c.donec 133 | } 134 | 135 | type connWorker struct { 136 | parent *conn 137 | readc chan zetcd.ZKRequest 138 | 139 | stopc chan struct{} 140 | } 141 | 142 | type sendPkt struct { 143 | xid zetcd.Xid 144 | zxid zetcd.ZXid 145 | wev *zetcd.WatcherEvent 146 | } 147 | 148 | func (c *connWorker) Send(xid zetcd.Xid, zxid zetcd.ZXid, resp interface{}) error { 149 | glog.V(7).Infof("connWorkerSend(%v,%v,%+v)", xid, zxid, resp) 150 | 151 | wev, ok := resp.(*zetcd.WatcherEvent) 152 | if !ok { 153 | glog.Fatalf("unexpected send response %+v", resp) 154 | } 155 | 156 | select { 157 | case c.parent.sendc <- sendPkt{xid, zxid, wev}: 158 | case <-c.stopc: 159 | return fmt.Errorf("send stopped") 160 | } 161 | return nil 162 | } 163 | 164 | func (c *connWorker) Read() <-chan zetcd.ZKRequest { return c.readc } 165 | func (c *connWorker) StopNotify() <-chan struct{} { return c.stopc } 166 | func (c *connWorker) Close() { 167 | if c.readc != nil { 168 | c.readc = nil 169 | close(c.stopc) 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /structs.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | // This file describes the on-wire format. 4 | 5 | type Xid int32 6 | type Op int32 7 | type ZXid int64 8 | type Sid int64 9 | type Ver int32 // version 10 | 11 | type ACL struct { 12 | Perms int32 13 | Scheme string 14 | ID string 15 | } 16 | 17 | type CheckVersionRequest pathVersionRequest 18 | 19 | type ConnectRequest struct { 20 | ProtocolVersion int32 21 | LastZxidSeen ZXid 22 | TimeOut int32 23 | SessionID Sid 24 | Passwd []byte 25 | } 26 | 27 | type ConnectResponse struct { 28 | ProtocolVersion int32 29 | TimeOut int32 30 | SessionID Sid 31 | Passwd []byte 32 | } 33 | 34 | type CreateRequest struct { 35 | Path string 36 | Data []byte 37 | Acl []ACL 38 | Flags int32 39 | } 40 | 41 | type CreateResponse pathResponse 42 | 43 | type CloseRequest struct{} 44 | type CloseResponse struct{} 45 | 46 | type auth struct { 47 | Type int32 48 | Scheme string 49 | Auth []byte 50 | } 51 | 52 | type SetAuthRequest auth 53 | type SetAuthResponse struct{} 54 | 55 | type SetWatchesRequest struct { 56 | RelativeZxid ZXid 57 | DataWatches []string 58 | ExistWatches []string 59 | ChildWatches []string 60 | } 61 | 62 | type SetWatchesResponse struct{} 63 | 64 | type MultiHeader struct { 65 | Type Op 66 | Done bool 67 | Err ErrCode 68 | } 69 | 70 | type MultiRequestOp struct { 71 | Header MultiHeader 72 | Op interface{} 73 | } 74 | type MultiRequest struct { 75 | Ops []MultiRequestOp 76 | DoneHeader MultiHeader 77 | } 78 | type MultiResponseOp struct { 79 | Header MultiHeader 80 | String string 81 | Stat *Stat 82 | } 83 | type MultiResponse struct { 84 | Ops []MultiResponseOp 85 | DoneHeader MultiHeader 86 | } 87 | 88 | type GetChildren2Request pathWatchRequest 89 | 90 | type GetChildren2Response struct { 91 | Children []string 92 | Stat Stat 93 | } 94 | 95 | type GetDataRequest pathWatchRequest 96 | 97 | type GetDataResponse struct { 98 | Data []byte 99 | Stat Stat 100 | } 101 | 102 | type DeleteRequest pathVersionRequest 103 | type DeleteResponse struct{} 104 | 105 | type ExistsRequest pathWatchRequest 106 | type ExistsResponse statResponse 107 | 108 | type GetAclRequest pathRequest 109 | 110 | type GetAclResponse struct { 111 | Acl []ACL 112 | Stat Stat 113 | } 114 | type SetAclRequest struct { 115 | Path string 116 | Acl []ACL 117 | Version Ver 118 | } 119 | 120 | type SetAclResponse statResponse 121 | 122 | type GetChildrenRequest pathWatchRequest 123 | 124 | type GetChildrenResponse struct { 125 | Children []string 126 | } 127 | 128 | type SyncRequest pathRequest 129 | type SyncResponse pathResponse 130 | 131 | type PingRequest struct{} 132 | type PingResponse struct{} 133 | 134 | type SetDataRequest struct { 135 | Path string 136 | Data []byte 137 | Version Ver 138 | } 139 | 140 | type SetDataResponse statResponse 141 | 142 | type Stat struct { 143 | // Czxid is the zxid change that caused this znode to be created. 144 | Czxid ZXid 145 | // Mzxid is The zxid change that last modified this znode. 146 | Mzxid ZXid 147 | // Ctime is milliseconds from epoch when this znode was created. 148 | Ctime int64 149 | // Mtime is The time in milliseconds from epoch when this znode was last modified. 150 | Mtime int64 151 | Version Ver // The number of changes to the data of this znode. 152 | Cversion Ver // The number of changes to the children of this znode. 153 | Aversion Ver // The number of changes to the ACL of this znode. 154 | EphemeralOwner Sid // The session id of the owner of this znode if the znode is an ephemeral node. If it is not an ephemeral node, it will be zero. 155 | DataLength int32 // The length of the data field of this znode. 156 | NumChildren int32 // The number of children of this znode. 157 | Pzxid ZXid // last modified children 158 | } 159 | 160 | type WatcherEvent struct { 161 | Type EventType 162 | State State 163 | Path string 164 | } 165 | 166 | type pathWatchRequest struct { 167 | Path string 168 | Watch bool 169 | } 170 | 171 | type pathResponse struct { 172 | Path string 173 | } 174 | 175 | type pathVersionRequest struct { 176 | Path string 177 | Version Ver 178 | } 179 | 180 | type statResponse struct { 181 | Stat Stat 182 | } 183 | 184 | type requestHeader struct { 185 | Xid Xid 186 | Opcode Op 187 | } 188 | 189 | type ResponseHeader struct { 190 | Xid Xid 191 | Zxid ZXid 192 | Err ErrCode 193 | } 194 | 195 | type pathRequest struct { 196 | Path string 197 | } 198 | -------------------------------------------------------------------------------- /pool.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "bytes" 5 | "crypto/aes" 6 | "crypto/cipher" 7 | "crypto/rand" 8 | "encoding/binary" 9 | "fmt" 10 | "sync" 11 | 12 | etcd "github.com/coreos/etcd/clientv3" 13 | "github.com/golang/glog" 14 | ) 15 | 16 | type SessionPool struct { 17 | sessions map[etcd.LeaseID]Session 18 | c *etcd.Client 19 | mu sync.RWMutex 20 | be sessionBackend 21 | } 22 | 23 | func NewSessionPool(client *etcd.Client) *SessionPool { 24 | /* 25 | be, err := newAesSessionBackend(client) 26 | if err != nil { 27 | panic(err) 28 | } 29 | */ 30 | be := &etcdSessionBackend{client} 31 | return &SessionPool{ 32 | sessions: make(map[etcd.LeaseID]Session), 33 | c: client, 34 | be: be, 35 | } 36 | } 37 | 38 | func (sp *SessionPool) Auth(zka AuthConn) (Session, error) { 39 | defer zka.Close() 40 | areq, err := zka.Read() 41 | if err != nil { 42 | return nil, err 43 | } 44 | req := areq.Req 45 | 46 | if req.ProtocolVersion != 0 { 47 | panic(fmt.Sprintf("unhandled req stuff! %+v", req)) 48 | } 49 | 50 | // TODO use ttl from lease 51 | lid := etcd.LeaseID(req.SessionID) 52 | if lid == 0 { 53 | lid, req.Passwd, err = sp.be.create(int64(req.TimeOut) / 1000) 54 | } else { 55 | lid, err = sp.be.resume(req.SessionID, req.Passwd) 56 | } 57 | 58 | if err != nil { 59 | resp := &ConnectResponse{Passwd: make([]byte, 14)} 60 | zkc, _ := zka.Write(AuthResponse{Resp: resp}) 61 | if zkc != nil { 62 | zkc.Close() 63 | } 64 | return nil, err 65 | } 66 | 67 | resp := &ConnectResponse{ 68 | ProtocolVersion: 0, 69 | TimeOut: req.TimeOut, 70 | SessionID: Sid(lid), 71 | Passwd: req.Passwd, 72 | } 73 | glog.V(7).Infof("authresp=%+v", resp) 74 | zkc, aerr := zka.Write(AuthResponse{Resp: resp}) 75 | if zkc == nil || aerr != nil { 76 | return nil, aerr 77 | } 78 | 79 | s, serr := newSession(sp.c, zkc, lid) 80 | if serr != nil { 81 | return nil, serr 82 | } 83 | s.req = *areq.Req 84 | 85 | sp.mu.Lock() 86 | sp.sessions[s.id] = s 87 | sp.mu.Unlock() 88 | return s, nil 89 | } 90 | 91 | type sessionBackend interface { 92 | create(ttl int64) (etcd.LeaseID, []byte, error) 93 | resume(Sid, []byte) (etcd.LeaseID, error) 94 | } 95 | 96 | type etcdSessionBackend struct { 97 | c *etcd.Client 98 | } 99 | 100 | func (sp *etcdSessionBackend) create(ttl int64) (etcd.LeaseID, []byte, error) { 101 | pwd := make([]byte, 16) 102 | if _, err := rand.Read(pwd); err != nil { 103 | return 0, nil, err 104 | } 105 | if ttl == 0 { 106 | ttl = 1 107 | } 108 | lcr, err := sp.c.Grant(sp.c.Ctx(), ttl) 109 | if err != nil { 110 | return 0, nil, err 111 | } 112 | _, err = sp.c.Put(sp.c.Ctx(), lid2key(lcr.ID), string(pwd), etcd.WithLease(lcr.ID)) 113 | if err != nil { 114 | return 0, nil, err 115 | } 116 | return lcr.ID, pwd, nil 117 | } 118 | 119 | func (sp *etcdSessionBackend) resume(sid Sid, pwd []byte) (etcd.LeaseID, error) { 120 | gresp, gerr := sp.c.Get(sp.c.Ctx(), lid2key(etcd.LeaseID(sid))) 121 | switch { 122 | case gerr != nil: 123 | return 0, gerr 124 | case len(gresp.Kvs) == 0: 125 | return 0, fmt.Errorf("bad lease") 126 | case bytes.Compare(gresp.Kvs[0].Value, pwd) != 0: 127 | return 0, fmt.Errorf("bad passwd") 128 | } 129 | return etcd.LeaseID(sid), nil 130 | } 131 | 132 | func lid2key(lid etcd.LeaseID) string { return fmt.Sprintf("/zk/ses/%x", lid) } 133 | 134 | type aesSessionBackend struct { 135 | c *etcd.Client 136 | key []byte 137 | b cipher.Block 138 | } 139 | 140 | func newAesSessionBackend(c *etcd.Client) (sb *aesSessionBackend, err error) { 141 | sb = &aesSessionBackend{c: c, key: make([]byte, 16)} 142 | if _, err = rand.Read(sb.key); err != nil { 143 | return nil, err 144 | } 145 | if sb.b, err = aes.NewCipher(sb.key); err != nil { 146 | return nil, err 147 | } 148 | return sb, nil 149 | } 150 | 151 | func (sb *aesSessionBackend) create(ttl int64) (etcd.LeaseID, []byte, error) { 152 | if ttl == 0 { 153 | ttl = 1 154 | } 155 | lcr, err := sb.c.Grant(sb.c.Ctx(), ttl) 156 | if err != nil { 157 | return 0, nil, err 158 | } 159 | return lcr.ID, sb.sid2pwd(Sid(lcr.ID)), nil 160 | } 161 | 162 | func (sb *aesSessionBackend) resume(sid Sid, pwd []byte) (etcd.LeaseID, error) { 163 | if bytes.Compare(sb.sid2pwd(sid), pwd) != 0 { 164 | return 0, fmt.Errorf("bad password") 165 | } 166 | return etcd.LeaseID(sid), nil 167 | } 168 | 169 | func (sb *aesSessionBackend) sid2pwd(sid Sid) []byte { 170 | dst, src := make([]byte, sb.b.BlockSize()), make([]byte, sb.b.BlockSize()) 171 | binary.BigEndian.PutUint64(src, uint64(sid)) 172 | sb.b.Encrypt(dst, src) 173 | return dst 174 | } 175 | -------------------------------------------------------------------------------- /watches.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "sync" 5 | 6 | etcd "github.com/coreos/etcd/clientv3" 7 | "github.com/golang/glog" 8 | "golang.org/x/net/context" 9 | ) 10 | 11 | type Watches interface { 12 | // Watch creates a watch request on a given path and evtype. 13 | Watch(rev ZXid, xid Xid, path string, evtype EventType, cb func(ZXid)) 14 | 15 | // Wait blocks until all watches that rely on the given rev are dispatched. 16 | Wait(rev ZXid, path string, evtype EventType) 17 | } 18 | 19 | type watches struct { 20 | mu sync.Mutex 21 | c *etcd.Client 22 | 23 | path2watch [5]map[string]*watch 24 | 25 | ctx context.Context 26 | cancel context.CancelFunc 27 | } 28 | 29 | type watch struct { 30 | c *etcd.Client 31 | 32 | xid Xid 33 | evtype EventType 34 | path string 35 | 36 | wch etcd.WatchChan 37 | ctx context.Context 38 | cancel context.CancelFunc 39 | 40 | // startRev is the etcd store revision when this watch began 41 | startRev ZXid 42 | donec chan struct{} 43 | } 44 | 45 | func (w *watch) isRelevant(ev *etcd.Event) (relevant bool) { 46 | defer func() { 47 | if !relevant { 48 | glog.V(8).Infof("filtered watch event %+v", *ev) 49 | } 50 | }() 51 | switch w.evtype { 52 | case EventNodeDeleted: 53 | if ev.Type != etcd.EventTypeDelete { 54 | return 55 | } 56 | case EventNodeChildrenChanged: 57 | if ev.Type != etcd.EventTypeDelete && !ev.IsCreate() { 58 | return 59 | } 60 | case EventNodeDataChanged: 61 | if !ev.IsModify() { 62 | return 63 | } 64 | case EventNodeCreated: 65 | if !ev.IsCreate() { 66 | return 67 | } 68 | } 69 | return true 70 | } 71 | 72 | func newWatches(c *etcd.Client) *watches { 73 | ctx, cancel := context.WithCancel(context.TODO()) 74 | ws := &watches{ 75 | c: c, 76 | ctx: ctx, 77 | cancel: cancel, 78 | } 79 | for i := 0; i < len(ws.path2watch); i++ { 80 | ws.path2watch[i] = make(map[string]*watch) 81 | } 82 | return ws 83 | } 84 | 85 | func (ws *watches) Watch(rev ZXid, xid Xid, path string, evtype EventType, cb func(ZXid)) { 86 | ws.mu.Lock() 87 | curw := ws.path2watch[evtype][path] 88 | ws.mu.Unlock() 89 | if curw != nil { 90 | return 91 | } 92 | 93 | ctx, cancel := context.WithCancel(ws.ctx) 94 | var wch etcd.WatchChan 95 | switch evtype { 96 | case EventNodeDataChanged: 97 | fallthrough 98 | case EventNodeCreated: 99 | fallthrough 100 | // use rev+1 watch begins AFTER the requested zxid 101 | case EventNodeDeleted: 102 | wch = ws.c.Watch(ctx, "/zk/key/"+path, etcd.WithRev(int64(rev+1))) 103 | case EventNodeChildrenChanged: 104 | wch = ws.c.Watch( 105 | ctx, 106 | getListPfx(path), 107 | etcd.WithPrefix(), 108 | etcd.WithRev(int64(rev+1))) 109 | default: 110 | panic("unsupported watch op") 111 | } 112 | 113 | ws.mu.Lock() 114 | defer ws.mu.Unlock() 115 | curw = ws.path2watch[evtype][path] 116 | if curw != nil { 117 | glog.V(7).Infof("ELIDING WATCH on xid=%d evtype=%d, already have %s evtype=%d", xid, evtype, path, curw.evtype) 118 | cancel() 119 | return 120 | } 121 | w := &watch{ws.c, xid, evtype, path, wch, ctx, cancel, rev, make(chan struct{})} 122 | ws.path2watch[evtype][path] = w 123 | go ws.runWatch(w, cb) 124 | } 125 | 126 | func (ws *watches) runWatch(w *watch, cb func(ZXid)) { 127 | defer func() { 128 | close(w.donec) 129 | <-w.wch 130 | }() 131 | for { 132 | select { 133 | case resp, ok := <-w.wch: 134 | if !ok { 135 | return 136 | } 137 | for _, ev := range resp.Events { 138 | if !w.isRelevant(ev) { 139 | continue 140 | } 141 | ws.mu.Lock() 142 | delete(ws.path2watch[w.evtype], w.path) 143 | ws.mu.Unlock() 144 | cb(ZXid(resp.Header.Revision)) 145 | w.cancel() 146 | } 147 | case <-w.ctx.Done(): 148 | } 149 | } 150 | } 151 | 152 | func (ws *watches) close() { 153 | ws.mu.Lock() 154 | defer ws.mu.Unlock() 155 | ws.cancel() 156 | for i := range ws.path2watch { 157 | for _, w := range ws.path2watch[i] { 158 | for range w.wch { 159 | } 160 | } 161 | } 162 | } 163 | 164 | // Wait until watcher depending on the given rev completes. 165 | // NOTE: path is internal zkpath representation 166 | // TODO: watch waiting may need to be proxy-wide to be correct 167 | // TODO: better algorithm 168 | func (ws *watches) Wait(rev ZXid, path string, evtype EventType) { 169 | ch := []<-chan struct{}{} 170 | rev++ 171 | ws.mu.Lock() 172 | for k, w := range ws.path2watch[evtype] { 173 | if k != path { 174 | continue 175 | } 176 | if w.startRev <= rev && w.evtype == evtype { 177 | ch = append(ch, w.donec) 178 | } 179 | } 180 | ws.mu.Unlock() 181 | for _, c := range ch { 182 | <-c 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /constants.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | const ( 8 | protocolVersion = 0 9 | 10 | DefaultPort = 2181 11 | ) 12 | 13 | const ( 14 | opNotify Op = 0 15 | opCreate = 1 16 | opDelete = 2 17 | opExists = 3 18 | opGetData = 4 19 | opSetData = 5 20 | opGetAcl = 6 21 | opSetAcl = 7 22 | opGetChildren = 8 23 | opSync = 9 24 | opPing = 11 25 | opGetChildren2 = 12 26 | opCheck = 13 27 | opMulti = 14 28 | 29 | opClose = -11 30 | opSetAuth = 100 31 | opSetWatches = 101 32 | 33 | // Not in protocol, used internally 34 | opWatcherEvent = -2 35 | opInvalid = -100000 36 | ) 37 | 38 | type EventType int32 39 | 40 | const ( 41 | EventNodeCreated EventType = iota + 1 42 | EventNodeDeleted 43 | EventNodeDataChanged 44 | EventNodeChildrenChanged 45 | 46 | EventSession = EventType(-1) 47 | EventNotWatching = EventType(-2) 48 | ) 49 | 50 | const ( 51 | FlagEphemeral = 1 52 | FlagSequence = 2 53 | ) 54 | 55 | const ( 56 | StateUnknown = State(-1) 57 | StateDisconnected = State(0) 58 | StateConnecting = State(1) 59 | StateSyncConnected = State(3) 60 | StateAuthFailed = State(4) 61 | StateConnectedReadOnly = State(5) 62 | StateSaslAuthenticated = State(6) 63 | StateExpired = State(-112) 64 | // StateAuthFailed = State(-113) 65 | 66 | StateConnected = State(100) 67 | StateHasSession = State(101) 68 | ) 69 | 70 | var ( 71 | stateNames = map[State]string{ 72 | StateUnknown: "StateUnknown", 73 | StateDisconnected: "StateDisconnected", 74 | StateConnectedReadOnly: "StateConnectedReadOnly", 75 | StateSaslAuthenticated: "StateSaslAuthenticated", 76 | StateExpired: "StateExpired", 77 | StateAuthFailed: "StateAuthFailed", 78 | StateConnecting: "StateConnecting", 79 | StateConnected: "StateConnected", 80 | StateHasSession: "StateHasSession", 81 | } 82 | ) 83 | 84 | type State int32 85 | 86 | var ( 87 | ErrConnectionClosed = errors.New("zk: connection closed") 88 | ErrUnknown = errors.New("zk: unknown error") 89 | ErrAPIError = errors.New("zk: api error") 90 | ErrNoNode = errors.New("zk: node does not exist") 91 | ErrNoAuth = errors.New("zk: not authenticated") 92 | ErrBadVersion = errors.New("zk: version conflict") 93 | ErrNoChildrenForEphemerals = errors.New("zk: ephemeral nodes may not have children") 94 | ErrNodeExists = errors.New("zk: node already exists") 95 | ErrNotEmpty = errors.New("zk: node has children") 96 | ErrSessionExpired = errors.New("zk: session has been expired by the server") 97 | ErrInvalidACL = errors.New("zk: invalid ACL specified") 98 | ErrAuthFailed = errors.New("zk: client authentication failed") 99 | ErrClosing = errors.New("zk: zookeeper is closing") 100 | ErrNothing = errors.New("zk: no server responsees to process") 101 | ErrSessionMoved = errors.New("zk: session moved to another server, so operation is ignored") 102 | 103 | // ErrInvalidCallback = errors.New("zk: invalid callback specified") 104 | errCodeToError = map[ErrCode]error{ 105 | 0: nil, 106 | errAPIError: ErrAPIError, 107 | errNoNode: ErrNoNode, 108 | errNoAuth: ErrNoAuth, 109 | errBadVersion: ErrBadVersion, 110 | errNoChildrenForEphemerals: ErrNoChildrenForEphemerals, 111 | errNodeExists: ErrNodeExists, 112 | errNotEmpty: ErrNotEmpty, 113 | errSessionExpired: ErrSessionExpired, 114 | // errInvalidCallback: ErrInvalidCallback, 115 | errInvalidAcl: ErrInvalidACL, 116 | errAuthFailed: ErrAuthFailed, 117 | errClosing: ErrClosing, 118 | errNothing: ErrNothing, 119 | errSessionMoved: ErrSessionMoved, 120 | } 121 | ) 122 | 123 | func (e ErrCode) toError() error { 124 | if err, ok := errCodeToError[e]; ok { 125 | return err 126 | } 127 | return ErrUnknown 128 | } 129 | 130 | type ErrCode int32 131 | 132 | const ( 133 | errOk ErrCode = 0 134 | // System and server-side errors 135 | errSystemError = ErrCode(-1) 136 | errRuntimeInconsistency = ErrCode(-2) 137 | errDataInconsistency = ErrCode(-3) 138 | errConnectionLoss = ErrCode(-4) 139 | errMarshallingError = ErrCode(-5) 140 | errUnimplemented = ErrCode(-6) 141 | errOperationTimeout = ErrCode(-7) 142 | errBadArguments = ErrCode(-8) 143 | errInvalidState = ErrCode(-9) 144 | // API errors 145 | errAPIError = ErrCode(-100) 146 | errNoNode = ErrCode(-101) // * 147 | errNoAuth = ErrCode(-102) 148 | errBadVersion = ErrCode(-103) // * 149 | errNoChildrenForEphemerals = ErrCode(-108) 150 | errNodeExists = ErrCode(-110) // * 151 | errNotEmpty = ErrCode(-111) 152 | errSessionExpired = ErrCode(-112) 153 | errInvalidCallback = ErrCode(-113) 154 | errInvalidAcl = ErrCode(-114) 155 | errAuthFailed = ErrCode(-115) 156 | errClosing = ErrCode(-116) 157 | errNothing = ErrCode(-117) 158 | errSessionMoved = ErrCode(-118) 159 | ) 160 | -------------------------------------------------------------------------------- /integration_test.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "net" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | etcd "github.com/coreos/etcd/clientv3" 10 | "github.com/coreos/etcd/integration" 11 | "github.com/samuel/go-zookeeper/zk" 12 | ) 13 | 14 | var ( 15 | acl = zk.WorldACL(zk.PermAll) 16 | ) 17 | 18 | func TestCreateGet(t *testing.T) { 19 | runTest(t, func(t *testing.T, c *zk.Conn) { 20 | if _, _, err := c.Get("/abc"); err == nil { 21 | t.Fatalf("expected error on getting absent /abc") 22 | } 23 | if _, err := c.Create("/foo/bar", []byte("x"), 0, acl); err == nil { 24 | t.Fatalf("expected error on creating /foo/bar without /foo") 25 | } 26 | if _, err := c.Create("/abc", []byte("data1"), 0, acl); err != nil { 27 | t.Fatal(err) 28 | } 29 | if _, err := c.Create("/abc", []byte("data1"), 0, acl); err == nil { 30 | t.Fatalf("don't allow double create") 31 | } 32 | if _, _, err := c.Get("/abc"); err != nil { 33 | t.Fatal(err) 34 | } 35 | if _, _, err := c.Get("/abc/def"); err == nil { 36 | t.Fatalf("expected error on getting /abc/def") 37 | } 38 | if _, err := c.Create("/abc/def", []byte("data2"), 0, acl); err != nil { 39 | t.Fatal(err) 40 | } 41 | if _, _, err := c.Get("/abc/def"); err != nil { 42 | t.Fatal(err) 43 | } 44 | }) 45 | } 46 | 47 | func TestCreateSequential(t *testing.T) { 48 | runTest(t, func(t *testing.T, c *zk.Conn) { 49 | if _, err := c.Create("/abc", []byte("x"), 0, acl); err != nil { 50 | t.Fatal(err) 51 | } 52 | if _, err := c.Create("/abc/def", []byte("x"), zk.FlagSequence, acl); err != nil { 53 | t.Fatal(err) 54 | } 55 | if _, err := c.Create("/abc/def", []byte("x"), zk.FlagSequence, acl); err != nil { 56 | t.Fatal(err) 57 | } 58 | if _, _, err := c.Get("/abc/def0000000000"); err != nil { 59 | t.Fatal(err) 60 | } 61 | if _, _, err := c.Get("/abc/def0000000001"); err != nil { 62 | t.Fatal(err) 63 | } 64 | }) 65 | } 66 | 67 | func TestGetDataW(t *testing.T) { 68 | runTest(t, func(t *testing.T, c *zk.Conn) { 69 | if _, err := c.Create("/abc", []byte("data1"), 0, acl); err != nil { 70 | t.Fatal(err) 71 | } 72 | _, _, ch, werr := c.GetW("/abc") 73 | if werr != nil { 74 | t.Fatal(werr) 75 | } 76 | select { 77 | case resp := <-ch: 78 | t.Fatalf("should block on get channel, got %+v", resp) 79 | case <-time.After(10 * time.Millisecond): 80 | } 81 | if _, err := c.Set("/abc", []byte("a"), -1); err != nil { 82 | t.Fatal(err) 83 | } 84 | select { 85 | case <-ch: 86 | case <-time.After(5 * time.Second): 87 | t.Fatalf("took too long to get data update") 88 | } 89 | }) 90 | } 91 | 92 | func TestSync(t *testing.T) { 93 | runTest(t, func(t *testing.T, c *zk.Conn) { 94 | if _, err := c.Create("/abc", []byte(""), 0, acl); err != nil { 95 | t.Fatal(err) 96 | } 97 | if _, err := c.Sync("/abc"); err != nil { 98 | t.Fatal(err) 99 | } 100 | }) 101 | } 102 | 103 | func TestExists(t *testing.T) { 104 | runTest(t, func(t *testing.T, c *zk.Conn) { 105 | if _, err := c.Create("/abc", []byte(""), 0, acl); err != nil { 106 | t.Fatal(err) 107 | } 108 | if ok, _, err := c.Exists("/abc"); err != nil || !ok { 109 | t.Fatalf("expected it to exist %v %v", err, ok) 110 | } 111 | if ok, _, err := c.Exists("/ab"); ok { 112 | t.Fatalf("expected it to not exist %v %v", err, ok) 113 | } 114 | }) 115 | } 116 | 117 | func TestExistsW(t *testing.T) { 118 | runTest(t, func(t *testing.T, c *zk.Conn) { 119 | // test create 120 | ok, _, ch, err := c.ExistsW("/abc") 121 | if ok || err != nil { 122 | t.Fatal(err) 123 | } 124 | if _, err := c.Create("/abc", []byte(""), 0, acl); err != nil { 125 | t.Fatal(err) 126 | } 127 | select { 128 | case <-ch: 129 | case <-time.After(time.Second): 130 | t.Fatalf("took too long to get creation exists event") 131 | } 132 | 133 | // test (multi) set 134 | for i := 0; i < 2; i++ { 135 | ok, _, ch, err = c.ExistsW("/abc") 136 | if !ok || err != nil { 137 | t.Fatal(err) 138 | } 139 | if _, err := c.Set("/abc", []byte("a"), -1); err != nil { 140 | t.Fatal(err) 141 | } 142 | select { 143 | case <-ch: 144 | t.Fatalf("set data shouldn't trigger watcher") 145 | case <-time.After(time.Second): 146 | } 147 | } 148 | 149 | // test delete 150 | ok, _, ch, err = c.ExistsW("/abc") 151 | if !ok || err != nil { 152 | t.Fatal(err) 153 | } 154 | if err = c.Delete("/abc", -1); err != nil { 155 | t.Fatal(err) 156 | } 157 | select { 158 | case <-ch: 159 | case <-time.After(time.Second): 160 | t.Fatalf("took too long to get deletion exists event") 161 | } 162 | }) 163 | } 164 | 165 | func TestChildren(t *testing.T) { 166 | runTest(t, func(t *testing.T, c *zk.Conn) { 167 | if _, err := c.Create("/abc", []byte(""), 0, acl); err != nil { 168 | t.Fatal(err) 169 | } 170 | if _, err := c.Create("/abc/def", []byte(""), 0, acl); err != nil { 171 | t.Fatal(err) 172 | } 173 | if _, err := c.Create("/abc/123", []byte(""), 0, acl); err != nil { 174 | t.Fatal(err) 175 | } 176 | children, _, err := c.Children("/") 177 | if err != nil { 178 | t.Fatal(err) 179 | } 180 | if len(children) != 1 { 181 | t.Fatalf("expected one child, got %v", children) 182 | } 183 | children, _, err = c.Children("/abc") 184 | if err != nil { 185 | t.Fatal(err) 186 | } 187 | if len(children) != 2 { 188 | t.Fatalf("expected two children, got %v", children) 189 | } 190 | }) 191 | } 192 | 193 | func TestGetChildrenW(t *testing.T) { 194 | runTest(t, func(t *testing.T, c *zk.Conn) { 195 | if _, err := c.Create("/abc", []byte(""), 0, acl); err != nil { 196 | t.Fatal(err) 197 | } 198 | 199 | // watch for /abc/def 200 | _, _, ch, err := c.ChildrenW("/abc") 201 | if err != nil { 202 | t.Fatal(err) 203 | } 204 | select { 205 | case <-ch: 206 | t.Fatalf("should block") 207 | case <-time.After(10 * time.Millisecond): 208 | } 209 | if _, err := c.Create("/abc/def", []byte(""), 0, acl); err != nil { 210 | t.Fatal(err) 211 | } 212 | select { 213 | case <-ch: 214 | case <-time.After(time.Second): 215 | t.Fatalf("waited to long for new child") 216 | } 217 | 218 | // watch for /abc/123 219 | _, _, ch, err = c.ChildrenW("/abc") 220 | if err != nil { 221 | t.Fatal(err) 222 | } 223 | select { 224 | case <-ch: 225 | t.Fatalf("should block") 226 | case <-time.After(10 * time.Millisecond): 227 | } 228 | if _, err := c.Create("/abc/123", []byte(""), 0, acl); err != nil { 229 | t.Fatal(err) 230 | } 231 | select { 232 | case <-ch: 233 | case <-time.After(time.Second): 234 | t.Fatalf("waited to long for new child") 235 | } 236 | }) 237 | } 238 | 239 | func TestCreateInvalidACL(t *testing.T) { 240 | runTest(t, func(t *testing.T, c *zk.Conn) { 241 | werr := ErrInvalidACL 242 | resp, err := c.Create("/foo", []byte("x"), 0, nil) 243 | if err == nil { 244 | t.Fatalf("created with invalid acl %v, wanted %v", resp, werr) 245 | } 246 | if err.Error() != werr.Error() { 247 | t.Fatalf("got err %v, wanted %v", err, werr) 248 | } 249 | }) 250 | } 251 | 252 | func runTest(t *testing.T, f func(*testing.T, *zk.Conn)) { 253 | clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) 254 | ch, cancel := serve(clus.RandClient()) 255 | 256 | c, _, err := zk.Connect([]string{"127.0.0.1:30000"}, time.Second) 257 | if err != nil { 258 | t.Fatal(err) 259 | } 260 | f(t, c) 261 | 262 | clus.Terminate(t) 263 | cancel() 264 | c.Close() 265 | <-ch 266 | 267 | } 268 | 269 | func serve(c *etcd.Client) (<-chan struct{}, func()) { 270 | ch := make(chan struct{}) 271 | // TODO use unix socket 272 | ln, err := net.Listen("tcp", ":30000") 273 | if err != nil { 274 | os.Exit(-1) 275 | } 276 | go func() { 277 | Serve(c.Ctx(), ln, NewAuth(c), NewZK(c)) 278 | close(ch) 279 | }() 280 | return ch, func() { ln.Close() } 281 | } 282 | -------------------------------------------------------------------------------- /encode.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "io" 7 | "net" 8 | "reflect" 9 | "runtime" 10 | ) 11 | 12 | var ( 13 | ErrShortBuffer = errors.New("short buffer") 14 | ErrPtrExpected = errors.New("ptr expected") 15 | ErrUnhandledFieldType = errors.New("unhandled field type") 16 | ) 17 | 18 | type decoder interface { 19 | Decode(buf []byte) (int, error) 20 | } 21 | 22 | type encoder interface { 23 | Encode(buf []byte) (int, error) 24 | } 25 | 26 | func decodePacket(buf []byte, st interface{}) (n int, err error) { 27 | defer func() { 28 | if r := recover(); r != nil { 29 | if e, ok := r.(runtime.Error); ok && e.Error() == "runtime error: slice bounds out of range" { 30 | err = ErrShortBuffer 31 | } else { 32 | panic(r) 33 | } 34 | } 35 | }() 36 | 37 | v := reflect.ValueOf(st) 38 | if v.Kind() != reflect.Ptr || v.IsNil() { 39 | return 0, ErrPtrExpected 40 | } 41 | return decodePacketValue(buf, v) 42 | } 43 | 44 | func decodePacketValue(buf []byte, v reflect.Value) (int, error) { 45 | rv := v 46 | kind := v.Kind() 47 | if kind == reflect.Ptr { 48 | if v.IsNil() { 49 | v.Set(reflect.New(v.Type().Elem())) 50 | } 51 | v = v.Elem() 52 | kind = v.Kind() 53 | } 54 | 55 | n := 0 56 | switch kind { 57 | default: 58 | return n, ErrUnhandledFieldType 59 | case reflect.Struct: 60 | if de, ok := rv.Interface().(decoder); ok { 61 | return de.Decode(buf) 62 | } else if de, ok := v.Interface().(decoder); ok { 63 | return de.Decode(buf) 64 | } else { 65 | for i := 0; i < v.NumField(); i++ { 66 | field := v.Field(i) 67 | n2, err := decodePacketValue(buf[n:], field) 68 | n += n2 69 | if err != nil { 70 | return n, err 71 | } 72 | } 73 | } 74 | case reflect.Bool: 75 | v.SetBool(buf[n] != 0) 76 | n++ 77 | case reflect.Int32: 78 | v.SetInt(int64(binary.BigEndian.Uint32(buf[n : n+4]))) 79 | n += 4 80 | case reflect.Int64: 81 | v.SetInt(int64(binary.BigEndian.Uint64(buf[n : n+8]))) 82 | n += 8 83 | case reflect.String: 84 | ln := int(binary.BigEndian.Uint32(buf[n : n+4])) 85 | v.SetString(string(buf[n+4 : n+4+ln])) 86 | n += 4 + ln 87 | case reflect.Slice: 88 | switch v.Type().Elem().Kind() { 89 | default: 90 | count := int(binary.BigEndian.Uint32(buf[n : n+4])) 91 | n += 4 92 | values := reflect.MakeSlice(v.Type(), count, count) 93 | v.Set(values) 94 | for i := 0; i < count; i++ { 95 | n2, err := decodePacketValue(buf[n:], values.Index(i)) 96 | n += n2 97 | if err != nil { 98 | return n, err 99 | } 100 | } 101 | case reflect.Uint8: 102 | ln := int(int32(binary.BigEndian.Uint32(buf[n : n+4]))) 103 | if ln < 0 { 104 | n += 4 105 | v.SetBytes(nil) 106 | } else { 107 | bytes := make([]byte, ln) 108 | copy(bytes, buf[n+4:n+4+ln]) 109 | v.SetBytes(bytes) 110 | n += 4 + ln 111 | } 112 | } 113 | } 114 | return n, nil 115 | } 116 | 117 | func encodePacket(buf []byte, st interface{}) (n int, err error) { 118 | defer func() { 119 | if r := recover(); r != nil { 120 | if e, ok := r.(runtime.Error); ok && e.Error() == "runtime error: slice bounds out of range" { 121 | err = ErrShortBuffer 122 | } else { 123 | panic(r) 124 | } 125 | } 126 | }() 127 | 128 | v := reflect.ValueOf(st) 129 | if v.Kind() != reflect.Ptr || v.IsNil() { 130 | return 0, ErrPtrExpected 131 | } 132 | return encodePacketValue(buf, v) 133 | } 134 | 135 | func encodePacketValue(buf []byte, v reflect.Value) (int, error) { 136 | rv := v 137 | for v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface { 138 | v = v.Elem() 139 | } 140 | 141 | n := 0 142 | switch v.Kind() { 143 | default: 144 | return n, ErrUnhandledFieldType 145 | case reflect.Struct: 146 | if en, ok := rv.Interface().(encoder); ok { 147 | return en.Encode(buf) 148 | } else if en, ok := v.Interface().(encoder); ok { 149 | return en.Encode(buf) 150 | } else { 151 | for i := 0; i < v.NumField(); i++ { 152 | field := v.Field(i) 153 | n2, err := encodePacketValue(buf[n:], field) 154 | n += n2 155 | if err != nil { 156 | return n, err 157 | } 158 | } 159 | } 160 | case reflect.Bool: 161 | if v.Bool() { 162 | buf[n] = 1 163 | } else { 164 | buf[n] = 0 165 | } 166 | n++ 167 | case reflect.Int32: 168 | binary.BigEndian.PutUint32(buf[n:n+4], uint32(v.Int())) 169 | n += 4 170 | case reflect.Int64: 171 | binary.BigEndian.PutUint64(buf[n:n+8], uint64(v.Int())) 172 | n += 8 173 | case reflect.String: 174 | str := v.String() 175 | binary.BigEndian.PutUint32(buf[n:n+4], uint32(len(str))) 176 | copy(buf[n+4:n+4+len(str)], []byte(str)) 177 | n += 4 + len(str) 178 | case reflect.Slice: 179 | switch v.Type().Elem().Kind() { 180 | default: 181 | count := v.Len() 182 | startN := n 183 | n += 4 184 | for i := 0; i < count; i++ { 185 | n2, err := encodePacketValue(buf[n:], v.Index(i)) 186 | n += n2 187 | if err != nil { 188 | return n, err 189 | } 190 | } 191 | binary.BigEndian.PutUint32(buf[startN:startN+4], uint32(count)) 192 | case reflect.Uint8: 193 | if v.IsNil() { 194 | binary.BigEndian.PutUint32(buf[n:n+4], uint32(0xffffffff)) 195 | n += 4 196 | } else { 197 | bytes := v.Bytes() 198 | binary.BigEndian.PutUint32(buf[n:n+4], uint32(len(bytes))) 199 | copy(buf[n+4:n+4+len(bytes)], bytes) 200 | n += 4 + len(bytes) 201 | } 202 | } 203 | } 204 | return n, nil 205 | } 206 | 207 | func ReadPacket(zk net.Conn, r interface{}) error { 208 | buf := make([]byte, 256) 209 | _, err := io.ReadFull(zk, buf[:4]) 210 | blen := int(binary.BigEndian.Uint32(buf[:4])) 211 | if cap(buf) < blen { 212 | buf = make([]byte, blen) 213 | } 214 | _, err = io.ReadFull(zk, buf[:blen]) 215 | if err != nil { 216 | return err 217 | } 218 | _, err = decodePacket(buf[:blen], r) 219 | return err 220 | } 221 | 222 | func WritePacket(zk net.Conn, r interface{}) error { 223 | buf := make([]byte, 256) 224 | n, err := encodePacket(buf[4:], r) 225 | if err != nil { 226 | return err 227 | } 228 | binary.BigEndian.PutUint32(buf[:4], uint32(n)) 229 | _, err = zk.Write(buf[:n+4]) 230 | return err 231 | } 232 | 233 | func readBuf(zk net.Conn) ([]byte, uint32, error) { 234 | buf := make([]byte, 256) 235 | if _, err := io.ReadFull(zk, buf[:4]); err != nil { 236 | return nil, 0, err 237 | } 238 | blen := binary.BigEndian.Uint32(buf[:4]) 239 | if cap(buf) < int(blen) { 240 | buf = make([]byte, blen) 241 | } 242 | if _, err := io.ReadFull(zk, buf[:blen]); err != nil { 243 | return nil, 0, err 244 | } 245 | return buf, blen, nil 246 | } 247 | 248 | func readReqOp(zk net.Conn) (Xid, interface{}, error) { 249 | buf, blen, err := readBuf(zk) 250 | if err != nil { 251 | return 0, nil, err 252 | } 253 | hdr := &requestHeader{} 254 | n, herr := decodePacket(buf, hdr) 255 | if herr != nil { 256 | return 0, nil, herr 257 | } 258 | op := op2req(hdr.Opcode) 259 | _, oerr := decodePacket(buf[n:blen], op) 260 | return hdr.Xid, op, oerr 261 | 262 | } 263 | 264 | func readRespOp(zk net.Conn, xid2resp func(Xid) interface{}) (*ResponseHeader, interface{}, error) { 265 | buf, blen, err := readBuf(zk) 266 | if err != nil { 267 | return nil, nil, err 268 | } 269 | hdr := &ResponseHeader{} 270 | n, herr := decodePacket(buf, hdr) 271 | if herr != nil { 272 | return nil, nil, herr 273 | } 274 | 275 | var resp interface{} 276 | if hdr.Err != 0 { 277 | return hdr, nil, nil 278 | } 279 | if hdr.Xid == -1 { 280 | resp = &WatcherEvent{} 281 | } else { 282 | resp = xid2resp(hdr.Xid) 283 | } 284 | if resp == nil { 285 | return nil, nil, ErrAPIError 286 | } 287 | _, oerr := decodePacket(buf[n:blen], resp) 288 | return hdr, resp, oerr 289 | } 290 | 291 | func (r *MultiRequest) Encode(buf []byte) (int, error) { 292 | total := 0 293 | for _, op := range r.Ops { 294 | op.Header.Done = false 295 | n, err := encodePacketValue(buf[total:], reflect.ValueOf(op)) 296 | if err != nil { 297 | return total, err 298 | } 299 | total += n 300 | } 301 | r.DoneHeader.Done = true 302 | n, err := encodePacketValue(buf[total:], reflect.ValueOf(r.DoneHeader)) 303 | if err != nil { 304 | return total, err 305 | } 306 | total += n 307 | 308 | return total, nil 309 | } 310 | 311 | func (r *MultiRequest) Decode(buf []byte) (int, error) { 312 | r.Ops = make([]MultiRequestOp, 0) 313 | r.DoneHeader = MultiHeader{-1, true, -1} 314 | total := 0 315 | for { 316 | header := &MultiHeader{} 317 | n, err := decodePacketValue(buf[total:], reflect.ValueOf(header)) 318 | if err != nil { 319 | return total, err 320 | } 321 | total += n 322 | if header.Done { 323 | r.DoneHeader = *header 324 | break 325 | } 326 | 327 | req := op2req(header.Type) 328 | if req == nil { 329 | return total, ErrAPIError 330 | } 331 | n, err = decodePacketValue(buf[total:], reflect.ValueOf(req)) 332 | if err != nil { 333 | return total, err 334 | } 335 | total += n 336 | r.Ops = append(r.Ops, MultiRequestOp{*header, req}) 337 | } 338 | return total, nil 339 | } 340 | 341 | func (r *MultiResponse) Decode(buf []byte) (int, error) { 342 | r.Ops = make([]MultiResponseOp, 0) 343 | r.DoneHeader = MultiHeader{-1, true, -1} 344 | total := 0 345 | for { 346 | header := &MultiHeader{} 347 | n, err := decodePacketValue(buf[total:], reflect.ValueOf(header)) 348 | if err != nil { 349 | return total, err 350 | } 351 | total += n 352 | if header.Done { 353 | r.DoneHeader = *header 354 | break 355 | } 356 | 357 | res := MultiResponseOp{Header: *header} 358 | var w reflect.Value 359 | switch header.Type { 360 | default: 361 | return total, ErrAPIError 362 | case opCreate: 363 | w = reflect.ValueOf(&res.String) 364 | case opSetData: 365 | res.Stat = new(Stat) 366 | w = reflect.ValueOf(res.Stat) 367 | case opCheck, opDelete: 368 | } 369 | if w.IsValid() { 370 | n, err := decodePacketValue(buf[total:], w) 371 | if err != nil { 372 | return total, err 373 | } 374 | total += n 375 | } 376 | r.Ops = append(r.Ops, res) 377 | } 378 | return total, nil 379 | } 380 | -------------------------------------------------------------------------------- /xchk/zk.go: -------------------------------------------------------------------------------- 1 | package xchk 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/chzchzchz/zetcd" 9 | "github.com/golang/glog" 10 | ) 11 | 12 | var ( 13 | errStat = fmt.Errorf("stat mismatch") 14 | errData = fmt.Errorf("data mismatch") 15 | errAcl = fmt.Errorf("acl mismatch") 16 | errNumAcl = fmt.Errorf("acl length mismatch") 17 | errPath = fmt.Errorf("path mismatch") 18 | errErr = fmt.Errorf("err mismatch") 19 | errZXid = fmt.Errorf("zxid mismatch") 20 | errNumChildren = fmt.Errorf("number of children mismatch") 21 | errChildren = fmt.Errorf("children paths mismatch") 22 | ) 23 | 24 | // zkXchk takes incoming ZK requests and forwards them to a remote ZK server 25 | type zkXchk struct { 26 | s *session 27 | 28 | cZK zetcd.ZK 29 | oZK zetcd.ZK 30 | } 31 | 32 | func newZK(s *session, cZKf, oZKf zetcd.ZKFunc) (*zkXchk, error) { 33 | cZK, cerr := cZKf(s.candidate) 34 | if cerr != nil { 35 | return nil, cerr 36 | } 37 | oZK, oerr := oZKf(s.oracle) 38 | if oerr != nil { 39 | return nil, oerr 40 | } 41 | return &zkXchk{s, cZK, oZK}, nil 42 | } 43 | 44 | func (xchk *zkXchk) Create(xid zetcd.Xid, op *zetcd.CreateRequest) zetcd.ZKResponse { 45 | cf := func() zetcd.ZKResponse { return xchk.cZK.Create(xid, op) } 46 | of := func() zetcd.ZKResponse { return xchk.oZK.Create(xid, op) } 47 | cr, or, err := xchkResp(cf, of) 48 | defer func() { reportErr(cr, or, err) }() 49 | if err != nil || or.Resp == nil { 50 | return or 51 | } 52 | crr, orr := cr.Resp.(*zetcd.CreateResponse), or.Resp.(*zetcd.CreateResponse) 53 | if crr.Path != orr.Path { 54 | err = errPath 55 | } 56 | return or 57 | } 58 | 59 | func (xchk *zkXchk) Delete(xid zetcd.Xid, op *zetcd.DeleteRequest) zetcd.ZKResponse { 60 | cf := func() zetcd.ZKResponse { return xchk.cZK.Delete(xid, op) } 61 | of := func() zetcd.ZKResponse { return xchk.oZK.Delete(xid, op) } 62 | cr, or, err := xchkResp(cf, of) 63 | defer func() { reportErr(cr, or, err) }() 64 | if err != nil || or.Resp == nil { 65 | return or 66 | } 67 | return or 68 | } 69 | 70 | func (xchk *zkXchk) Exists(xid zetcd.Xid, op *zetcd.ExistsRequest) zetcd.ZKResponse { 71 | cf := func() zetcd.ZKResponse { return xchk.cZK.Exists(xid, op) } 72 | of := func() zetcd.ZKResponse { return xchk.oZK.Exists(xid, op) } 73 | cr, or, err := xchkResp(cf, of) 74 | defer func() { reportErr(cr, or, err) }() 75 | if err != nil || or.Resp == nil { 76 | return or 77 | } 78 | crr, orr := cr.Resp.(*zetcd.ExistsResponse), or.Resp.(*zetcd.ExistsResponse) 79 | 80 | if !xchk.xchkStat(crr.Stat, orr.Stat) { 81 | err = errStat 82 | } 83 | return or 84 | } 85 | 86 | func (xchk *zkXchk) GetData(xid zetcd.Xid, op *zetcd.GetDataRequest) zetcd.ZKResponse { 87 | cf := func() zetcd.ZKResponse { return xchk.cZK.GetData(xid, op) } 88 | of := func() zetcd.ZKResponse { return xchk.oZK.GetData(xid, op) } 89 | cr, or, err := xchkResp(cf, of) 90 | defer func() { reportErr(cr, or, err) }() 91 | if err != nil || or.Resp == nil { 92 | return or 93 | } 94 | crr, orr := cr.Resp.(*zetcd.GetDataResponse), or.Resp.(*zetcd.GetDataResponse) 95 | 96 | if bytes.Compare(crr.Data, orr.Data) != 0 { 97 | err = errData 98 | } 99 | if !xchk.xchkStat(crr.Stat, orr.Stat) { 100 | err = errStat 101 | } 102 | return or 103 | } 104 | 105 | func (xchk *zkXchk) SetData(xid zetcd.Xid, op *zetcd.SetDataRequest) zetcd.ZKResponse { 106 | cf := func() zetcd.ZKResponse { return xchk.cZK.SetData(xid, op) } 107 | of := func() zetcd.ZKResponse { return xchk.oZK.SetData(xid, op) } 108 | cr, or, err := xchkResp(cf, of) 109 | defer func() { reportErr(cr, or, err) }() 110 | if err != nil || or.Resp == nil { 111 | return or 112 | } 113 | crr, orr := cr.Resp.(*zetcd.SetDataResponse), or.Resp.(*zetcd.SetDataResponse) 114 | 115 | if !xchk.xchkStat(crr.Stat, orr.Stat) { 116 | err = errStat 117 | } 118 | return or 119 | } 120 | 121 | func (xchk *zkXchk) GetAcl(xid zetcd.Xid, op *zetcd.GetAclRequest) zetcd.ZKResponse { 122 | cf := func() zetcd.ZKResponse { return xchk.cZK.GetAcl(xid, op) } 123 | of := func() zetcd.ZKResponse { return xchk.oZK.GetAcl(xid, op) } 124 | cr, or, err := xchkResp(cf, of) 125 | defer func() { reportErr(cr, or, err) }() 126 | if err != nil || or.Resp == nil { 127 | return or 128 | } 129 | crr, orr := cr.Resp.(*zetcd.GetAclResponse), or.Resp.(*zetcd.GetAclResponse) 130 | 131 | if len(crr.Acl) != len(orr.Acl) { 132 | err = errNumAcl 133 | return or 134 | } 135 | 136 | for i := range crr.Acl { 137 | if crr.Acl[i] != orr.Acl[i] { 138 | err = errAcl 139 | return or 140 | } 141 | } 142 | 143 | if !xchk.xchkStat(crr.Stat, orr.Stat) { 144 | err = errStat 145 | } 146 | 147 | return or 148 | } 149 | 150 | func (xchk *zkXchk) SetAcl(xid zetcd.Xid, op *zetcd.SetAclRequest) zetcd.ZKResponse { 151 | cf := func() zetcd.ZKResponse { return xchk.cZK.SetAcl(xid, op) } 152 | of := func() zetcd.ZKResponse { return xchk.oZK.SetAcl(xid, op) } 153 | cr, or, err := xchkResp(cf, of) 154 | defer func() { reportErr(cr, or, err) }() 155 | if err != nil || or.Resp == nil { 156 | return or 157 | } 158 | crr, orr := cr.Resp.(*zetcd.SetAclResponse), or.Resp.(*zetcd.SetAclResponse) 159 | 160 | if !xchk.xchkStat(crr.Stat, orr.Stat) { 161 | err = errStat 162 | } 163 | 164 | return or 165 | } 166 | 167 | func (xchk *zkXchk) GetChildren(xid zetcd.Xid, op *zetcd.GetChildrenRequest) zetcd.ZKResponse { 168 | cf := func() zetcd.ZKResponse { return xchk.cZK.GetChildren(xid, op) } 169 | of := func() zetcd.ZKResponse { return xchk.oZK.GetChildren(xid, op) } 170 | cr, or, err := xchkResp(cf, of) 171 | defer func() { reportErr(cr, or, err) }() 172 | if err != nil || or.Resp == nil { 173 | return or 174 | } 175 | crr, orr := cr.Resp.(*zetcd.GetChildrenResponse), or.Resp.(*zetcd.GetChildrenResponse) 176 | 177 | if len(crr.Children) != len(orr.Children) { 178 | err = errNumChildren 179 | return or 180 | } 181 | for i := range crr.Children { 182 | if crr.Children[i] != orr.Children[i] { 183 | err = errChildren 184 | return or 185 | } 186 | } 187 | 188 | return or 189 | } 190 | 191 | func (xchk *zkXchk) Sync(xid zetcd.Xid, op *zetcd.SyncRequest) zetcd.ZKResponse { 192 | cf := func() zetcd.ZKResponse { return xchk.cZK.Sync(xid, op) } 193 | of := func() zetcd.ZKResponse { return xchk.oZK.Sync(xid, op) } 194 | cr, or, err := xchkResp(cf, of) 195 | defer func() { reportErr(cr, or, err) }() 196 | if err != nil || or.Resp == nil { 197 | return or 198 | } 199 | crr, orr := cr.Resp.(*zetcd.SyncResponse), or.Resp.(*zetcd.SyncResponse) 200 | if crr.Path != orr.Path { 201 | err = errPath 202 | } 203 | return or 204 | } 205 | 206 | func (xchk *zkXchk) Ping(xid zetcd.Xid, op *zetcd.PingRequest) zetcd.ZKResponse { 207 | cf := func() zetcd.ZKResponse { return xchk.cZK.Ping(xid, op) } 208 | of := func() zetcd.ZKResponse { return xchk.oZK.Ping(xid, op) } 209 | cr, or, err := xchkResp(cf, of) 210 | defer func() { reportErr(cr, or, err) }() 211 | return or 212 | } 213 | 214 | func (xchk *zkXchk) GetChildren2(xid zetcd.Xid, op *zetcd.GetChildren2Request) zetcd.ZKResponse { 215 | cf := func() zetcd.ZKResponse { return xchk.cZK.GetChildren2(xid, op) } 216 | of := func() zetcd.ZKResponse { return xchk.oZK.GetChildren2(xid, op) } 217 | cr, or, err := xchkResp(cf, of) 218 | defer func() { reportErr(cr, or, err) }() 219 | if err != nil || or.Resp == nil { 220 | return or 221 | } 222 | crr, orr := cr.Resp.(*zetcd.GetChildren2Response), or.Resp.(*zetcd.GetChildren2Response) 223 | if len(crr.Children) != len(orr.Children) { 224 | err = errNumChildren 225 | return or 226 | } 227 | for i := range crr.Children { 228 | if crr.Children[i] != orr.Children[i] { 229 | err = errChildren 230 | return or 231 | } 232 | } 233 | if !xchk.xchkStat(crr.Stat, orr.Stat) { 234 | err = errStat 235 | } 236 | return or 237 | } 238 | 239 | func (xchk *zkXchk) Multi(xid zetcd.Xid, op *zetcd.MultiRequest) zetcd.ZKResponse { panic("wut") } 240 | 241 | func (xchk *zkXchk) Close(xid zetcd.Xid, op *zetcd.CloseRequest) zetcd.ZKResponse { 242 | cf := func() zetcd.ZKResponse { return xchk.cZK.Close(xid, op) } 243 | of := func() zetcd.ZKResponse { return xchk.oZK.Close(xid, op) } 244 | cr, or, err := xchkResp(cf, of) 245 | defer func() { reportErr(cr, or, err) }() 246 | return or 247 | } 248 | 249 | func (xchk *zkXchk) SetAuth(xid zetcd.Xid, op *zetcd.SetAuthRequest) zetcd.ZKResponse { 250 | cf := func() zetcd.ZKResponse { return xchk.cZK.SetAuth(xid, op) } 251 | of := func() zetcd.ZKResponse { return xchk.oZK.SetAuth(xid, op) } 252 | cr, or, err := xchkResp(cf, of) 253 | defer func() { reportErr(cr, or, err) }() 254 | return or 255 | } 256 | 257 | func (xchk *zkXchk) SetWatches(xid zetcd.Xid, op *zetcd.SetWatchesRequest) zetcd.ZKResponse { 258 | cf := func() zetcd.ZKResponse { return xchk.cZK.SetWatches(xid, op) } 259 | of := func() zetcd.ZKResponse { return xchk.oZK.SetWatches(xid, op) } 260 | cr, or, err := xchkResp(cf, of) 261 | defer func() { reportErr(cr, or, err) }() 262 | return or 263 | } 264 | 265 | type zkfunc func() zetcd.ZKResponse 266 | 267 | func xchkHdr(cresp, oresp zetcd.ZKResponse) error { 268 | if cresp.Err != nil || oresp.Err != nil { 269 | return errErr 270 | } 271 | if cresp.Hdr == nil || oresp.Hdr == nil { 272 | return errErr 273 | } 274 | if cresp.Hdr.Err != oresp.Hdr.Err { 275 | return errErr 276 | } 277 | if cresp.Hdr.Zxid != oresp.Hdr.Zxid { 278 | return errZXid 279 | } 280 | return nil 281 | } 282 | 283 | func xchkResp(cf, of zkfunc) (cresp zetcd.ZKResponse, oresp zetcd.ZKResponse, err error) { 284 | cch, och := make(chan zetcd.ZKResponse, 1), make(chan zetcd.ZKResponse, 1) 285 | go func() { cch <- cf() }() 286 | go func() { och <- of() }() 287 | select { 288 | case cresp = <-cch: 289 | case oresp = <-och: 290 | } 291 | select { 292 | case cresp = <-cch: 293 | case oresp = <-och: 294 | case <-time.After(time.Second): 295 | glog.Warningf("took longer than 1s reading second resp %+v %+v", cresp, oresp) 296 | select { 297 | case cresp = <-cch: 298 | case oresp = <-och: 299 | } 300 | } 301 | return cresp, oresp, xchkHdr(cresp, oresp) 302 | } 303 | 304 | func reportErr(cr, or zetcd.ZKResponse, err error) { 305 | if err == nil { 306 | return 307 | } 308 | switch { 309 | case err == errErr || err == errZXid: 310 | glog.Warningf("xchk failed (%v)\ncandidate: %+v\noracle: %+v\n", err, cr.Hdr, or.Hdr) 311 | case cr.Resp != nil && or.Resp != nil: 312 | glog.Warningf("xchk failed (%v)\ncandiate: %+v\noracle: %+v\n", err, cr.Resp, or.Resp) 313 | case cr.Hdr != nil && or.Hdr != nil: 314 | glog.Warningf("xchk failed (%v)\ncandidate: %+v\noracle: %+v\n", err, cr.Hdr, or.Hdr) 315 | default: 316 | glog.Warningf("xchk failed (%v)\ncandidate: %+v\noracle: %+v", err, cr, or) 317 | } 318 | } 319 | 320 | func (xchk *zkXchk) xchkStat(cStat, oStat zetcd.Stat) bool { 321 | ctdiff, otdiff := cStat.Ctime-cStat.Mtime, oStat.Ctime-oStat.Mtime 322 | if ctdiff != otdiff && otdiff == 0 { 323 | // expect equal times to be equal 324 | return false 325 | } 326 | 327 | if oStat.EphemeralOwner != 0 { 328 | csid, _ := xchk.s.sp.get(oStat.EphemeralOwner) 329 | if cStat.EphemeralOwner != csid { 330 | return false 331 | } 332 | // ephemeral owners confirmed to be equivalent 333 | cStat.EphemeralOwner = oStat.EphemeralOwner 334 | } else if cStat.EphemeralOwner != 0 { 335 | return false 336 | } 337 | 338 | // times will never be equivalent, so fake it 339 | cStat.Ctime, cStat.Mtime = oStat.Ctime, oStat.Mtime 340 | return cStat == oStat 341 | } 342 | -------------------------------------------------------------------------------- /zketcd.go: -------------------------------------------------------------------------------- 1 | package zetcd 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "encoding/gob" 7 | "fmt" 8 | "path" 9 | "strings" 10 | "time" 11 | 12 | etcd "github.com/coreos/etcd/clientv3" 13 | v3sync "github.com/coreos/etcd/clientv3/concurrency" 14 | "github.com/golang/glog" 15 | ) 16 | 17 | type zkEtcd struct { 18 | c *etcd.Client 19 | s Session 20 | } 21 | 22 | // PerfectZXid is enabled to insert dummy writes to match zookeeper's zxids 23 | var PerfectZXidMode bool = true 24 | 25 | func NewZKEtcd(c *etcd.Client, s Session) ZK { return &zkEtcd{c, s} } 26 | 27 | func (z *zkEtcd) Create(xid Xid, op *CreateRequest) ZKResponse { 28 | opts := []etcd.OpOption{} 29 | if (op.Flags & FlagEphemeral) != 0 { 30 | opts = append(opts, etcd.WithLease(etcd.LeaseID(z.s.Sid()))) 31 | } 32 | if (op.Flags & ^(FlagSequence | FlagEphemeral)) != 0 { 33 | // support seq flag 34 | panic("unsupported create flags") 35 | } 36 | 37 | var p string // path of new node, passed back from txn 38 | pp := mkPath(path.Dir(op.Path)) 39 | pkey := "/zk/cver/" + pp 40 | applyf := func(s v3sync.STM) (err error) { 41 | defer func() { 42 | if PerfectZXidMode && err != nil { 43 | s.Put("/zk/moron-node", "1") 44 | } 45 | }() 46 | 47 | if len(op.Acl) == 0 { 48 | return ErrInvalidACL 49 | } 50 | if s.Rev(pkey) == 0 && len(pp) != 2 { 51 | // no parent 52 | return ErrNoNode 53 | } 54 | 55 | p = mkPath(op.Path) 56 | if op.Flags&FlagSequence != 0 { 57 | count := int32(decodeInt64([]byte(s.Get("/zk/count/" + pp)))) 58 | // force as int32 to get integer overflow as per zk docs 59 | p += fmt.Sprintf("%010d", count) 60 | count++ 61 | s.Put("/zk/count/"+pp, encodeInt64(int64(count))) 62 | } else if s.Rev("/zk/ver/"+p) != 0 { 63 | return ErrNodeExists 64 | } 65 | 66 | t := encodeTime() 67 | 68 | nextCVer := encodeInt64(decodeInt64([]byte(s.Get(pkey))) + 1) 69 | s.Put("/zk/cver/"+pp, nextCVer) 70 | // creating a znode will NOT update its parent mtime 71 | // s.Put("/zk/mtime/"+pp, t) 72 | 73 | s.Put("/zk/key/"+p, string(op.Data), opts...) 74 | s.Put("/zk/ctime/"+p, t, opts...) 75 | s.Put("/zk/mtime/"+p, t, opts...) 76 | s.Put("/zk/ver/"+p, encodeInt64(0), opts...) 77 | s.Put("/zk/cver/"+p, encodeInt64(0), opts...) 78 | s.Put("/zk/aver/"+p, encodeInt64(0), opts...) 79 | s.Put("/zk/acl/"+p, encodeACLs(op.Acl), opts...) 80 | s.Put("/zk/count/"+p, encodeInt64(0), opts...) 81 | 82 | return nil 83 | } 84 | 85 | var apiErr error 86 | resp, err := z.doSTM(wrapErr(&apiErr, applyf)) 87 | if err != nil { 88 | return mkErr(err) 89 | } 90 | 91 | zxid := ZXid(resp.Header.Revision) 92 | switch apiErr { 93 | case nil: 94 | case ErrNoNode: 95 | // parent missing 96 | return mkZKErr(xid, zxid, errNoNode) 97 | case ErrNodeExists: 98 | // this key already exists 99 | return mkZKErr(xid, zxid, errNodeExists) 100 | case ErrInvalidACL: 101 | return mkZKErr(xid, zxid, errInvalidAcl) 102 | default: 103 | return mkZKErr(xid, zxid, errAPIError) 104 | } 105 | 106 | z.s.Wait(zxid, p, EventNodeCreated) 107 | crResp := &CreateResponse{op.Path} 108 | 109 | glog.V(7).Infof("Create(%v) = (zxid=%v, resp=%+v); txnresp.Header: %+v", zxid, xid, *crResp, resp.Header) 110 | return mkZKResp(xid, zxid, crResp) 111 | } 112 | 113 | func (z *zkEtcd) GetChildren2(xid Xid, op *GetChildren2Request) ZKResponse { 114 | resp := &GetChildren2Response{} 115 | p := mkPath(op.Path) 116 | 117 | txnresp, err := z.c.Txn(z.c.Ctx()).Then(statGets(p)...).Commit() 118 | if err != nil { 119 | return mkErr(err) 120 | } 121 | 122 | resp.Stat = statTxn(txnresp) 123 | if len(p) != 2 && resp.Stat.Ctime == 0 { 124 | return mkZKErr(xid, ZXid(txnresp.Header.Revision), errNoNode) 125 | } 126 | 127 | children := txnresp.Responses[5].GetResponseRange() 128 | for _, kv := range children.Kvs { 129 | zkkey := strings.Replace(string(kv.Key), getListPfx(p), "", 1) 130 | resp.Children = append(resp.Children, zkkey) 131 | } 132 | 133 | zxid := ZXid(children.Header.Revision) 134 | z.s.Wait(zxid, p, EventNodeChildrenChanged) 135 | 136 | if op.Watch { 137 | f := func(newzxid ZXid) { 138 | wresp := &WatcherEvent{ 139 | Type: EventNodeChildrenChanged, 140 | State: StateSyncConnected, 141 | Path: op.Path, 142 | } 143 | glog.V(7).Infof("WatchChild (%v,%v,%+v)", xid, newzxid, *wresp) 144 | z.s.Send(-1, -1, wresp) 145 | } 146 | z.s.Watch(zxid, xid, p, EventNodeChildrenChanged, f) 147 | } 148 | 149 | glog.V(7).Infof("GetChildren2(%v) = (zxid=%v, resp=%+v)", zxid, xid, *resp) 150 | return mkZKResp(xid, zxid, resp) 151 | } 152 | 153 | func (z *zkEtcd) Ping(xid Xid, op *PingRequest) ZKResponse { 154 | return mkZKResp(xid, z.s.ZXid(), &PingResponse{}) 155 | } 156 | 157 | func (z *zkEtcd) Delete(xid Xid, op *DeleteRequest) ZKResponse { 158 | p := mkPath(op.Path) 159 | pp := mkPath(path.Dir(op.Path)) 160 | key := "/zk/key/" + p 161 | pkey := "/zk/cver/" + pp 162 | 163 | applyf := func(s v3sync.STM) error { 164 | if s.Rev(pkey) == 0 && len(pp) != 2 { 165 | // no parent 166 | if PerfectZXidMode { 167 | s.Put("/zk/moron-node", "1") 168 | } 169 | return ErrNoNode 170 | } 171 | if s.Rev("/zk/ver/"+p) == 0 { 172 | if PerfectZXidMode { 173 | s.Put("/zk/moron-node", "1") 174 | } 175 | return ErrNoNode 176 | } 177 | ver := Ver(decodeInt64([]byte(s.Get("/zk/ver/" + p)))) 178 | if op.Version != Ver(-1) && op.Version != ver { 179 | return ErrBadVersion 180 | } 181 | 182 | if decodeInt64([]byte(s.Get("/zk/cver/"+p))) != 0 { 183 | panic("how to delete children") 184 | } 185 | 186 | nextCVer := encodeInt64(decodeInt64([]byte(s.Get(pkey))) + 1) 187 | s.Put("/zk/cver/"+pp, nextCVer) 188 | s.Put("/zk/mtime/"+pp, encodeTime()) 189 | 190 | s.Del(key) 191 | s.Del("/zk/ctime/" + p) 192 | s.Del("/zk/mtime/" + p) 193 | s.Del("/zk/ver/" + p) 194 | s.Del("/zk/cver/" + p) 195 | s.Del("/zk/aver/" + p) 196 | s.Del("/zk/acl/" + p) 197 | s.Del("/zk/count/" + p) 198 | 199 | return nil 200 | } 201 | 202 | var apiErr error 203 | resp, err := z.doSTM(wrapErr(&apiErr, applyf)) 204 | if err != nil { 205 | return mkErr(err) 206 | } 207 | 208 | zxid := ZXid(resp.Header.Revision) 209 | switch apiErr { 210 | case nil: 211 | case ErrNoNode: 212 | return mkZKErr(xid, zxid, errNoNode) 213 | case ErrBadVersion: 214 | return mkZKErr(xid, zxid, errBadVersion) 215 | default: 216 | return mkZKErr(xid, zxid, errAPIError) 217 | } 218 | 219 | delResp := &DeleteResponse{} 220 | z.s.Wait(zxid, p, EventNodeDeleted) 221 | 222 | glog.V(7).Infof("Delete(%v) = (zxid=%v, resp=%+v)", xid, zxid, *delResp) 223 | return mkZKResp(xid, zxid, delResp) 224 | } 225 | 226 | func (z *zkEtcd) Exists(xid Xid, op *ExistsRequest) ZKResponse { 227 | p := mkPath(op.Path) 228 | gets := statGets(p) 229 | txnresp, err := z.c.Txn(z.c.Ctx()).Then(gets...).Commit() 230 | if err != nil { 231 | return mkErr(err) 232 | } 233 | 234 | exResp := &ExistsResponse{} 235 | exResp.Stat = statTxn(txnresp) 236 | zxid := ZXid(txnresp.Header.Revision) 237 | z.s.Wait(zxid, p, EventNodeCreated) 238 | 239 | if op.Watch { 240 | ev := EventNodeDeleted 241 | if exResp.Stat.Mtime == 0 { 242 | ev = EventNodeCreated 243 | } 244 | f := func(newzxid ZXid) { 245 | wresp := &WatcherEvent{ 246 | Type: ev, 247 | State: StateSyncConnected, 248 | Path: op.Path, 249 | } 250 | glog.V(7).Infof("WatchExists (%v,%v,%+v)", xid, newzxid, *wresp) 251 | z.s.Send(-1, -1, wresp) 252 | } 253 | z.s.Watch(zxid, xid, p, ev, f) 254 | } 255 | 256 | if exResp.Stat.Mtime == 0 { 257 | return mkZKErr(xid, zxid, errNoNode) 258 | } 259 | 260 | glog.V(7).Infof("Exists(%v) = (zxid=%v, resp=%+v)", xid, zxid, *exResp) 261 | return mkZKResp(xid, zxid, exResp) 262 | } 263 | 264 | func (z *zkEtcd) GetData(xid Xid, op *GetDataRequest) ZKResponse { 265 | p := mkPath(op.Path) 266 | gets := statGets(p) 267 | txnresp, err := z.c.Txn(z.c.Ctx()).Then(gets...).Commit() 268 | if err != nil { 269 | return mkErr(err) 270 | } 271 | 272 | zxid := ZXid(txnresp.Header.Revision) 273 | 274 | datResp := &GetDataResponse{} 275 | datResp.Stat = statTxn(txnresp) 276 | if datResp.Stat.Mtime == 0 { 277 | return mkZKErr(xid, zxid, errNoNode) 278 | } 279 | 280 | z.s.Wait(zxid, p, EventNodeDataChanged) 281 | 282 | if op.Watch { 283 | f := func(newzxid ZXid) { 284 | wresp := &WatcherEvent{ 285 | Type: EventNodeDataChanged, 286 | State: StateSyncConnected, 287 | Path: op.Path, 288 | } 289 | glog.V(7).Infof("WatchData (%v,%v,%+v)", xid, newzxid, *wresp) 290 | z.s.Send(-1, -1, wresp) 291 | } 292 | z.s.Watch(zxid, xid, p, EventNodeDataChanged, f) 293 | } 294 | datResp.Data = []byte(txnresp.Responses[2].GetResponseRange().Kvs[0].Value) 295 | 296 | glog.V(7).Infof("GetData(%v) = (zxid=%v, resp=%+v)", xid, zxid, *datResp) 297 | return mkZKResp(xid, zxid, datResp) 298 | } 299 | 300 | func (z *zkEtcd) SetData(xid Xid, op *SetDataRequest) ZKResponse { 301 | p := mkPath(op.Path) 302 | var statResp etcd.TxnResponse 303 | applyf := func(s v3sync.STM) error { 304 | if s.Rev("/zk/ver/"+p) == 0 { 305 | if PerfectZXidMode { 306 | s.Put("/zk/moron-node", "2") 307 | } 308 | return ErrNoNode 309 | } 310 | currentVersion := Ver(decodeInt64([]byte(s.Get("/zk/ver/" + p)))) 311 | if op.Version != Ver(-1) && op.Version != currentVersion { 312 | return ErrBadVersion 313 | 314 | } 315 | s.Put("/zk/key/"+p, string(op.Data)) 316 | s.Put("/zk/ver/"+p, string(encodeInt64(int64(currentVersion+1)))) 317 | s.Put("/zk/mtime/"+p, encodeTime()) 318 | 319 | resp, err := z.c.Txn(z.c.Ctx()).Then(statGets(p)...).Commit() 320 | if err != nil { 321 | return err 322 | } 323 | statResp = *resp 324 | return nil 325 | } 326 | var apiErr error 327 | resp, err := z.doSTM(wrapErr(&apiErr, applyf)) 328 | if err != nil { 329 | return mkErr(err) 330 | } 331 | 332 | zxid := ZXid(resp.Header.Revision) 333 | switch apiErr { 334 | case nil: 335 | case ErrNoNode: 336 | return mkZKErr(xid, zxid, errNoNode) 337 | case ErrBadVersion: 338 | return mkZKErr(xid, zxid, errBadVersion) 339 | default: 340 | return mkZKErr(xid, zxid, errAPIError) 341 | } 342 | 343 | sdresp := &SetDataResponse{} 344 | sdresp.Stat = statTxn(&statResp) 345 | 346 | glog.V(7).Infof("SetData(%v) = (zxid=%v, resp=%+v)", xid, zxid, *sdresp) 347 | return mkZKResp(xid, zxid, sdresp) 348 | } 349 | 350 | func (z *zkEtcd) GetAcl(xid Xid, op *GetAclRequest) ZKResponse { 351 | resp := &GetAclResponse{} 352 | p := mkPath(op.Path) 353 | 354 | gets := []etcd.Op{etcd.OpGet("/zk/acl/" + p)} 355 | gets = append(gets, statGets(p)...) 356 | txnresp, err := z.c.Txn(z.c.Ctx()).Then(gets...).Commit() 357 | if err != nil { 358 | return mkErr(err) 359 | } 360 | 361 | zxid := ZXid(txnresp.Header.Revision) 362 | resps := txnresp.Responses 363 | txnresp.Responses = resps[1:] 364 | resp.Stat = statTxn(txnresp) 365 | if resp.Stat.Ctime == 0 { 366 | return mkZKErr(xid, zxid, errNoNode) 367 | } 368 | resp.Acl = decodeACLs(resps[0].GetResponseRange().Kvs[0].Value) 369 | 370 | glog.V(7).Infof("GetAcl(%v) = (zxid=%v, resp=%+v)", xid, zxid, *resp) 371 | return mkZKResp(xid, zxid, resp) 372 | } 373 | 374 | func (z *zkEtcd) SetAcl(xid Xid, op *SetAclRequest) ZKResponse { panic("setAcl") } 375 | 376 | func (z *zkEtcd) GetChildren(xid Xid, op *GetChildrenRequest) ZKResponse { 377 | p := mkPath(op.Path) 378 | txnresp, err := z.c.Txn(z.c.Ctx()).Then(statGets(p)...).Commit() 379 | if err != nil { 380 | return mkErr(err) 381 | } 382 | 383 | s := statTxn(txnresp) 384 | if len(p) != 2 && s.Ctime == 0 { 385 | return mkZKErr(xid, ZXid(txnresp.Header.Revision), errNoNode) 386 | } 387 | 388 | children := txnresp.Responses[5].GetResponseRange() 389 | resp := &GetChildrenResponse{} 390 | for _, kv := range children.Kvs { 391 | zkkey := strings.Replace(string(kv.Key), getListPfx(p), "", 1) 392 | resp.Children = append(resp.Children, zkkey) 393 | } 394 | zxid := ZXid(children.Header.Revision) 395 | 396 | if op.Watch { 397 | f := func(newzxid ZXid) { 398 | wresp := &WatcherEvent{ 399 | Type: EventNodeChildrenChanged, 400 | State: StateSyncConnected, 401 | Path: op.Path, 402 | } 403 | glog.V(7).Infof("WatchChild (%v,%v,%+v)", xid, newzxid, *wresp) 404 | z.s.Send(-1, -1, wresp) 405 | } 406 | z.s.Watch(zxid, xid, p, EventNodeChildrenChanged, f) 407 | } 408 | 409 | glog.V(7).Infof("GetChildren(%v) = (zxid=%v, resp=%+v)", xid, zxid, *resp) 410 | return mkZKResp(xid, zxid, resp) 411 | } 412 | 413 | func (z *zkEtcd) Sync(xid Xid, op *SyncRequest) ZKResponse { 414 | // linearized read 415 | resp, err := z.c.Get(z.c.Ctx(), "/zk/ver/"+mkPath(op.Path)) 416 | if err != nil { 417 | return mkErr(err) 418 | } 419 | 420 | zxid := ZXid(resp.Header.Revision) 421 | if len(resp.Kvs) == 0 { 422 | return mkZKErr(xid, zxid, errNoNode) 423 | } 424 | 425 | glog.V(7).Infof("Sync(%v) = (zxid=%v, resp=%+v)", xid, zxid, *resp) 426 | return mkZKResp(xid, zxid, &CreateResponse{op.Path}) 427 | } 428 | 429 | func (z *zkEtcd) Multi(xid Xid, op *MultiRequest) ZKResponse { panic("multi") } 430 | 431 | func (z *zkEtcd) Close(xid Xid, op *CloseRequest) ZKResponse { 432 | // XXX this needs to kill the internal session 433 | return mkZKResp(xid, 0, &CloseResponse{}) 434 | } 435 | 436 | func (z *zkEtcd) SetAuth(xid Xid, op *SetAuthRequest) ZKResponse { panic("setAuth") } 437 | 438 | func (z *zkEtcd) SetWatches(xid Xid, op *SetWatchesRequest) ZKResponse { 439 | for _, dw := range op.DataWatches { 440 | dataPath := dw 441 | p := mkPath(dataPath) 442 | f := func(newzxid ZXid) { 443 | wresp := &WatcherEvent{ 444 | Type: EventNodeDataChanged, 445 | State: StateSyncConnected, 446 | Path: dataPath, 447 | } 448 | glog.V(7).Infof("WatchData* (%v,%v,%v)", xid, newzxid, *wresp) 449 | z.s.Send(-1, -1, wresp) 450 | } 451 | z.s.Watch(op.RelativeZxid, xid, p, EventNodeDataChanged, f) 452 | } 453 | 454 | ops := make([]etcd.Op, len(op.ExistWatches)) 455 | for i, ew := range op.ExistWatches { 456 | ops[i] = etcd.OpGet( 457 | "/zk/ver/ctime/"+mkPath(ew), 458 | etcd.WithSerializable(), 459 | etcd.WithRev(int64(op.RelativeZxid))) 460 | } 461 | 462 | resp, err := z.c.Txn(z.c.Ctx()).Then(ops...).Commit() 463 | if err != nil { 464 | return mkErr(err) 465 | } 466 | curZXid := ZXid(resp.Header.Revision) 467 | 468 | for i, ew := range op.ExistWatches { 469 | existPath := ew 470 | p := mkPath(existPath) 471 | 472 | ev := EventNodeDeleted 473 | if len(resp.Responses[i].GetResponseRange().Kvs) == 0 { 474 | ev = EventNodeCreated 475 | } 476 | f := func(newzxid ZXid) { 477 | wresp := &WatcherEvent{ 478 | Type: ev, 479 | State: StateSyncConnected, 480 | Path: existPath, 481 | } 482 | glog.V(7).Infof("WatchExist* (%v,%v,%v)", xid, newzxid, *wresp) 483 | z.s.Send(-1, -1, wresp) 484 | } 485 | z.s.Watch(op.RelativeZxid, xid, p, ev, f) 486 | } 487 | for _, cw := range op.ChildWatches { 488 | childPath := cw 489 | p := mkPath(childPath) 490 | f := func(newzxid ZXid) { 491 | wresp := &WatcherEvent{ 492 | Type: EventNodeChildrenChanged, 493 | State: StateSyncConnected, 494 | Path: childPath, 495 | } 496 | glog.V(7).Infof("WatchChild* (%v,%v,%v)", xid, newzxid, *wresp) 497 | z.s.Send(-1, -1, wresp) 498 | } 499 | z.s.Watch(op.RelativeZxid, xid, p, EventNodeChildrenChanged, f) 500 | } 501 | 502 | swresp := &SetWatchesResponse{} 503 | 504 | glog.V(7).Infof("SetWatches(%v) = (zxid=%v, resp=%+v)", xid, curZXid, *swresp) 505 | return mkZKResp(xid, curZXid, swresp) 506 | } 507 | 508 | func (z *zkEtcd) doSTM(applyf func(s v3sync.STM) error) (*etcd.TxnResponse, error) { 509 | return v3sync.NewSTMSerializable(z.c.Ctx(), z.c, applyf) 510 | } 511 | 512 | func encodeACLs(acls []ACL) string { 513 | var b bytes.Buffer 514 | gob.NewEncoder(&b).Encode(acls) 515 | return b.String() 516 | } 517 | 518 | func decodeACLs(acls []byte) (ret []ACL) { 519 | var b bytes.Buffer 520 | b.Write(acls) 521 | gob.NewDecoder(&b).Decode(&ret) 522 | return ret 523 | } 524 | 525 | func encodeTime() string { 526 | return encodeInt64(time.Now().UnixNano() / 1000) 527 | } 528 | 529 | func decodeInt64(v []byte) int64 { x, _ := binary.Varint(v); return x } 530 | 531 | func encodeInt64(v int64) string { 532 | b := make([]byte, binary.MaxVarintLen64) 533 | return string(b[:binary.PutVarint(b, v)]) 534 | } 535 | 536 | func mkErr(err error) ZKResponse { return ZKResponse{Err: err} } 537 | 538 | func rev2zxid(rev int64) ZXid { 539 | // zxid is -1 because etcd starts at 1 but zk starts at 0 540 | return ZXid(rev - 1) 541 | } 542 | 543 | func mkZKErr(xid Xid, zxid ZXid, err ErrCode) ZKResponse { 544 | return ZKResponse{Hdr: &ResponseHeader{xid, zxid - 1, err}} 545 | } 546 | 547 | func mkZKResp(xid Xid, zxid ZXid, resp interface{}) ZKResponse { 548 | return ZKResponse{Hdr: &ResponseHeader{xid, zxid - 1, 0}, Resp: resp} 549 | } 550 | 551 | // wrapErr is to pass back error info but still get the txn response 552 | func wrapErr(err *error, f func(s v3sync.STM) error) func(s v3sync.STM) error { 553 | return func(s v3sync.STM) error { 554 | if ferr := f(s); ferr != nil { 555 | *err = ferr 556 | } 557 | return nil 558 | } 559 | } 560 | --------------------------------------------------------------------------------