├── CHANGELOG.md ├── CONTRIBUTING.md ├── fixtures ├── patch_proc2.json ├── patch_proc.json ├── new_proc.json ├── engine.go ├── requests.go └── podgroup.go ├── .dockerignore ├── deployd-design.png ├── network ├── calico │ ├── network_test.go │ └── calico.go └── network.go ├── test.sh ├── Dockerfile ├── engine ├── errors.go ├── ports_test.go ├── container.go ├── eagleview_test.go ├── node.go ├── pod_test.go ├── depends_test.go ├── config.go ├── constraint.go ├── engine_ops.go ├── events.go ├── podgroup_test.go ├── notify.go ├── eagleview.go ├── runtimes.go ├── depends.go ├── histories.go ├── ports.go └── specs.go ├── version └── version.go ├── .gitignore ├── utils ├── regex │ ├── regex_test.go │ └── regex.go ├── proxy │ ├── proxy_test.go │ └── proxy.go ├── elector │ ├── elect_test.go │ └── elect.go ├── util │ ├── util_test.go │ └── util.go └── units │ ├── size.go │ └── size_test.go ├── storage ├── store.go └── etcd │ └── store.go ├── apiserver ├── cntstatus.go ├── engine.go ├── middleware.go ├── ports.go ├── status.go ├── node.go ├── notify.go ├── constraint.go ├── dependency.go ├── podgroup.go └── server.go ├── LICENSE ├── cluster ├── swarm │ └── cluster.go └── cluster.go ├── Gopkg.toml ├── .travis.yml ├── main.go ├── README.md └── Gopkg.lock /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Deployd 2 | -------------------------------------------------------------------------------- /fixtures/patch_proc2.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_instances": 1 3 | } 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git* 2 | deployd-design.png 3 | deployd 4 | fixtures* 5 | *.md 6 | -------------------------------------------------------------------------------- /deployd-design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laincloud/deployd/HEAD/deployd-design.png -------------------------------------------------------------------------------- /fixtures/patch_proc.json: -------------------------------------------------------------------------------- 1 | { 2 | "cpu": 0, 3 | "expose": 5000, 4 | "num_instances": 2, 5 | "memory": "256m" 6 | } 7 | -------------------------------------------------------------------------------- /fixtures/new_proc.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "training/webapp", 3 | "command": ["python", "app.py"], 4 | "expose": 5000, 5 | "num_instances": 1, 6 | "cpu": 1, 7 | "memory": "512m" 8 | } 9 | -------------------------------------------------------------------------------- /network/calico/network_test.go: -------------------------------------------------------------------------------- 1 | package calico 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/projectcalico/libcalico-go/lib/net" 8 | ) 9 | 10 | func Test_parseIp(t *testing.T) { 11 | fmt.Printf("ip: %v", net.ParseIP("127.0.0.1")) 12 | } 13 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | echo "" > coverage.txt 5 | 6 | for d in $(go list ./... | grep -Ev 'vendor|fixtures|utils'); do 7 | go test -v -race -coverprofile=profile.out -covermode=atomic $d 8 | if [ -f profile.out ]; then 9 | cat profile.out >> coverage.txt 10 | rm profile.out 11 | fi 12 | done 13 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM laincloud/centos-lain 2 | 3 | RUN mkdir -p $GOPATH/src/github.com/laincloud 4 | 5 | ADD . $GOPATH/src/github.com/laincloud/deployd 6 | 7 | RUN cd $GOPATH/src/github.com/laincloud/deployd && go build -v -a -tags netgo -installsuffix netgo -o deployd 8 | 9 | RUN mv $GOPATH/src/github.com/laincloud/deployd/deployd /usr/bin/ 10 | 11 | -------------------------------------------------------------------------------- /engine/errors.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | var ( 8 | ErrOperLockedFormat = "Another operation \"%s\" is progressing" 9 | ) 10 | 11 | type LockedError interface { 12 | error 13 | } 14 | 15 | type OperLockedError struct { 16 | info string 17 | } 18 | 19 | func (ole OperLockedError) Error() string { 20 | return fmt.Sprintf(ErrOperLockedFormat, ole.info) 21 | } 22 | -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/coreos/go-semver/semver" 7 | ) 8 | 9 | var ( 10 | Version = "2.4.5+git" 11 | APIVersion = "unknown" 12 | 13 | // Git SHA Value will be set during build 14 | GitSHA = "Not provided (use ./build instead of go build)" 15 | ) 16 | 17 | func init() { 18 | ver, err := semver.NewVersion(Version) 19 | if err == nil { 20 | APIVersion = fmt.Sprintf("%d.%d", ver.Major, ver.Minor) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | lain-deployd* 27 | assets_gen.go 28 | deployd* 29 | .idea/ 30 | bin/ 31 | vendor/ 32 | *DS_Store 33 | coverage.txt 34 | default.etcd/ 35 | etcd-v2.3.7-linux-amd64.tar.gz 36 | etcd-v2.3.7-linux-amd64/ -------------------------------------------------------------------------------- /network/network.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/laincloud/deployd/network/calico" 7 | ) 8 | 9 | type NWMInterface interface { 10 | ReleaseIp(ip string) error 11 | } 12 | 13 | var ( 14 | ni NWMInterface 15 | 16 | ErrNoNetworkMgrSupported = errors.New("No NetWork Manager Supported ") 17 | ) 18 | 19 | // networkDriver could calico, other overlay etc. 20 | // fixme: now we just support calico. 21 | func InitNetWorkManager(networkDriver, endpoint string) { 22 | ni = calico.NewNetWorkMgr(endpoint) 23 | } 24 | 25 | func ReleaseIp(ip string) error { 26 | if ni == nil { 27 | return ErrNoNetworkMgrSupported 28 | } 29 | return ni.ReleaseIp(ip) 30 | } 31 | -------------------------------------------------------------------------------- /engine/ports_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "testing" 7 | ) 8 | 9 | func TestRegisterPorts(t *testing.T) { 10 | fmt.Println("Start") 11 | ConfigPortsManager("http://127.0.0.1:2379") 12 | test := make([]*StreamProc, 0) 13 | for i := 0; i < 2; i++ { 14 | test = append(test, &StreamProc{ 15 | StreamPort: StreamPort{ 16 | SrcPort: 9001 + i, 17 | DstPort: 9001 + i, 18 | Proto: "tcp", 19 | }, 20 | NameSpace: "test" + strconv.Itoa(i+1), 21 | ProcName: "test" + strconv.Itoa(i+1), 22 | }) 23 | } 24 | ok, faileds := RegisterPorts(test...) 25 | if !ok { 26 | fmt.Printf("failed with ports:%v\n", faileds) 27 | } 28 | CancelPorts(test...) 29 | } 30 | -------------------------------------------------------------------------------- /utils/regex/regex_test.go: -------------------------------------------------------------------------------- 1 | package regex 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestMatch(t *testing.T) { 9 | p := MustCompile("a(.*)c(.*)f") 10 | m := p.Match("abcdef") 11 | if m == nil { 12 | fmt.Println("unmatch!") 13 | } else { 14 | fmt.Printf("match:%s\n", m.Group(1)) 15 | } 16 | 17 | } 18 | 19 | func TestPortsMatch(t *testing.T) { 20 | p := MustCompile("([0-9]*):([0-9]*)/(tcp|udp)") 21 | m := p.Match("9501:2132/udp") 22 | if m == nil { 23 | fmt.Println("unmatch!") 24 | } else { 25 | fmt.Printf("match:%s\n", m.Group(3)) 26 | } 27 | 28 | m = p.Match("9501:2f132/fdp") 29 | if m == nil { 30 | fmt.Println("unmatch!") 31 | } else { 32 | fmt.Printf("match:%s\n", m.Group(3)) 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /storage/store.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | KMissingError = errors.New("No such key") 9 | KNilNodeError = errors.New("Etcd Store returns a nil node") 10 | KDirNodeError = errors.New("Etcd Store returns this is a directory node") 11 | KNonDirNodeError = errors.New("Etcd Store returns this is a non-directory node") 12 | ) 13 | 14 | type Store interface { 15 | Get(key string, v interface{}) error 16 | GetRaw(key string) (string, error) 17 | Set(key string, v interface{}, force ...bool) error 18 | SetWithTTL(key string, v interface{}, ttlSec int, force ...bool) error 19 | Watch(key string) chan string 20 | KeysByPrefix(prefix string) ([]string, error) 21 | Remove(key string) error 22 | TryRemoveDir(key string) 23 | RemoveDir(key string) error 24 | } 25 | -------------------------------------------------------------------------------- /utils/regex/regex.go: -------------------------------------------------------------------------------- 1 | package regex 2 | 3 | import "regexp" 4 | 5 | type Pattern struct { 6 | ptn *regexp.Regexp 7 | } 8 | 9 | type Match struct { 10 | matches []string 11 | } 12 | 13 | func MustCompile(rule string) *Pattern { 14 | ptn := regexp.MustCompile(rule) 15 | return &Pattern{ptn: ptn} 16 | } 17 | 18 | func Compile(rule string) (*Pattern, error) { 19 | ptn, err := regexp.Compile(rule) 20 | if err != nil { 21 | return nil, err 22 | } 23 | return &Pattern{ptn: ptn}, nil 24 | } 25 | 26 | func (p *Pattern) Match(value string) *Match { 27 | matches := p.ptn.FindStringSubmatch(value) 28 | if len(matches) == 0 { 29 | return nil 30 | } 31 | return &Match{matches: matches} 32 | } 33 | 34 | func (m *Match) Group(n int) string { 35 | if len(m.matches) <= n { 36 | return "" 37 | } 38 | return m.matches[n] 39 | } 40 | -------------------------------------------------------------------------------- /utils/proxy/proxy_test.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "fmt" 5 | "os/exec" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestRun(t *testing.T) { 11 | from, to := "127.0.0.1:1234", "docker.io" 12 | t.Logf("testing proxy %s => %s", from, to) 13 | p := New(from, to) 14 | defer p.Stop() 15 | go p.Run() 16 | time.Sleep(time.Second) 17 | t.Logf("send GET request to http://%s/v1/search", from) 18 | output, err := exec.Command("curl", fmt.Sprintf("http://%s/v1/search", from)).Output() 19 | if err != nil { 20 | t.Error(err) 21 | } 22 | t.Log(string(output)) 23 | } 24 | 25 | func TestStop(t *testing.T) { 26 | from, to := "127.0.0.1:1234", "docker.io" 27 | p := New(from, to) 28 | 29 | go func() { 30 | time.Sleep(time.Second) 31 | p.Stop() 32 | }() 33 | 34 | if err := p.Run(); err != nil { 35 | t.Log(err) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /apiserver/cntstatus.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/mijia/sweb/form" 8 | "github.com/mijia/sweb/log" 9 | "github.com/mijia/sweb/server" 10 | "golang.org/x/net/context" 11 | ) 12 | 13 | // cnt means container 14 | 15 | type RestfulCntStatusHstry struct { 16 | server.BaseResource 17 | } 18 | 19 | func (rpg RestfulCntStatusHstry) Get(ctx context.Context, r *http.Request) (int, interface{}) { 20 | pgName := form.ParamString(r, "name", "") 21 | if pgName == "" { 22 | return http.StatusBadRequest, fmt.Sprintf("No pod group name provided.") 23 | } 24 | instance := form.ParamInt(r, "instance", -1) 25 | if instance == -1 { 26 | return http.StatusBadRequest, fmt.Sprintf("No pod instance provided.") 27 | } 28 | orcEngine := getEngine(ctx) 29 | podStatusHstries := orcEngine.FetchPodStaHstry(pgName, instance) 30 | log.Infof("podStatusHstry:%v", podStatusHstries) 31 | return http.StatusOK, podStatusHstries 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 LAIN Cloud 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cluster/swarm/cluster.go: -------------------------------------------------------------------------------- 1 | package swarm 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/laincloud/deployd/cluster" 8 | "github.com/mijia/adoc" 9 | ) 10 | 11 | type SwarmCluster struct { 12 | *adoc.DockerClient 13 | } 14 | 15 | func (c *SwarmCluster) GetResources() ([]cluster.Node, error) { 16 | if info, err := c.DockerClient.SwarmInfo(); err != nil { 17 | return nil, err 18 | } else { 19 | nodes := make([]cluster.Node, len(info.Nodes)) 20 | for i, node := range info.Nodes { 21 | nodes[i] = cluster.Node{ 22 | Name: node.Name, 23 | Address: node.Address, 24 | Containers: node.Containers, 25 | CPUs: node.CPUs, 26 | UsedCPUs: node.UsedCPUs, 27 | Memory: node.Memory, 28 | UsedMemory: node.UsedMemory, 29 | } 30 | } 31 | return nodes, nil 32 | } 33 | } 34 | 35 | func NewCluster(addr string, timeout, rwTimeout time.Duration) (cluster.Cluster, error) { 36 | docker, err := adoc.NewSwarmClientTimeout(addr, nil, timeout, rwTimeout) 37 | if err != nil { 38 | return nil, fmt.Errorf("Cannot connect swarm master[%s], %s", addr, err) 39 | } 40 | swarm := &SwarmCluster{} 41 | swarm.DockerClient = docker 42 | return swarm, nil 43 | } 44 | -------------------------------------------------------------------------------- /apiserver/engine.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/mijia/sweb/form" 8 | "github.com/mijia/sweb/log" 9 | "github.com/mijia/sweb/server" 10 | "golang.org/x/net/context" 11 | ) 12 | 13 | type EngineConfigApi struct { 14 | server.BaseResource 15 | } 16 | 17 | func (eca EngineConfigApi) Get(ctx context.Context, r *http.Request) (int, interface{}) { 18 | e := getEngine(ctx) 19 | return http.StatusOK, e.Config() 20 | } 21 | 22 | func (eca EngineConfigApi) Patch(ctx context.Context, r *http.Request) (int, interface{}) { 23 | e := getEngine(ctx) 24 | config := e.Config() 25 | if err := form.ParamBodyJson(r, &config); err != nil { 26 | log.Warnf("Failed to decode Engine Config, %s", err) 27 | return http.StatusBadRequest, fmt.Sprintf("Invalid Engine Config params format: %s", err) 28 | } 29 | e.SetConfig(config) 30 | return http.StatusOK, e.Config() 31 | } 32 | 33 | type EngineMaintenanceApi struct { 34 | server.BaseResource 35 | } 36 | 37 | func (ema EngineMaintenanceApi) Patch(ctx context.Context, r *http.Request) (int, interface{}) { 38 | e := getEngine(ctx) 39 | status := form.ParamBoolean(r, "on", false) 40 | e.Maintaince(status) 41 | return http.StatusOK, e.Config() 42 | } 43 | -------------------------------------------------------------------------------- /Gopkg.toml: -------------------------------------------------------------------------------- 1 | # Gopkg.toml example 2 | # 3 | # Refer to https://golang.github.io/dep/docs/Gopkg.toml.html 4 | # for detailed Gopkg.toml documentation. 5 | # 6 | # required = ["github.com/user/thing/cmd/thing"] 7 | # ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] 8 | # 9 | # [[constraint]] 10 | # name = "github.com/user/project" 11 | # version = "1.0.0" 12 | # 13 | # [[constraint]] 14 | # name = "github.com/user/project2" 15 | # branch = "dev" 16 | # source = "github.com/myfork/project2" 17 | # 18 | # [[override]] 19 | # name = "github.com/x/y" 20 | # version = "2.4.0" 21 | # 22 | # [prune] 23 | # non-go = false 24 | # go-tests = true 25 | # unused-packages = true 26 | 27 | 28 | [[constraint]] 29 | name = "github.com/coreos/etcd" 30 | version = "3.1.8" 31 | 32 | [[constraint]] 33 | name = "github.com/coreos/go-semver" 34 | version = "0.2.0" 35 | 36 | [[constraint]] 37 | name = "github.com/docker/libkv" 38 | version = "0.2.1" 39 | 40 | [[constraint]] 41 | branch = "master" 42 | name = "github.com/mijia/go-generics" 43 | 44 | [[constraint]] 45 | name = "github.com/projectcalico/libcalico-go" 46 | version = "1.2.2" 47 | 48 | [prune] 49 | go-tests = true 50 | unused-packages = true 51 | -------------------------------------------------------------------------------- /apiserver/middleware.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | "time" 7 | 8 | "github.com/mijia/sweb/log" 9 | "github.com/mijia/sweb/server" 10 | "golang.org/x/net/context" 11 | ) 12 | 13 | // StatWare is the statistics middleware which would log all the access and performation information. 14 | type ReadOnlySwitch struct { 15 | } 16 | 17 | // ServeHTTP implements the Middleware interface. Would log all the access, status and performance information. 18 | func (m *ReadOnlySwitch) ServeHTTP(ctx context.Context, w http.ResponseWriter, r *http.Request, next server.Handler) context.Context { 19 | start := time.Now() 20 | e := getEngine(ctx) 21 | 22 | if e.ReadOnly() && strings.ToUpper(r.Method) != "GET" && 23 | !strings.HasPrefix(r.URL.Path, "/api/engine/") { 24 | log.Warnf("Do ReadOnly Request Permitted!, %q %q, duration=%v", 25 | r.Method, r.URL.Path, time.Since(start)) 26 | w.WriteHeader(403) 27 | w.Write([]byte("Do ReadOnly Permitted.\n")) 28 | return ctx 29 | } 30 | return next(ctx, w, r) 31 | } 32 | 33 | // NewStatWare returns a new StatWare, some ignored urls can be specified with prefixes which would not be logged. 34 | func NewReadOnlySwitch() server.Middleware { 35 | return &ReadOnlySwitch{} 36 | } 37 | -------------------------------------------------------------------------------- /apiserver/ports.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/laincloud/deployd/engine" 8 | "github.com/mijia/sweb/form" 9 | "github.com/mijia/sweb/log" 10 | "github.com/mijia/sweb/server" 11 | "golang.org/x/net/context" 12 | ) 13 | 14 | type RestfulPorts struct { 15 | server.BaseResource 16 | } 17 | 18 | type Ports struct { 19 | Ports []int 20 | } 21 | 22 | func (rn RestfulPorts) Get(ctx context.Context, r *http.Request) (int, interface{}) { 23 | return http.StatusOK, engine.FetchAllPortsInfo() 24 | } 25 | 26 | func (rn RestfulPorts) Post(ctx context.Context, r *http.Request) (int, interface{}) { 27 | options := []string{"validate"} 28 | cmd := form.ParamStringOptions(r, "cmd", options, "noop") 29 | switch cmd { 30 | case "validate": 31 | var ports Ports 32 | if err := form.ParamBodyJson(r, &ports); err != nil { 33 | log.Warnf("Failed to decode valiad ports, %s", err) 34 | return http.StatusBadRequest, fmt.Sprintf("Invalid ports params format: %s", err) 35 | } 36 | occs := engine.OccupiedPorts(ports.Ports...) 37 | if len(occs) == 0 { 38 | return http.StatusOK, nil 39 | } 40 | return http.StatusBadRequest, fmt.Sprintf("Conflicted ports: %v", occs) 41 | } 42 | return http.StatusBadRequest, fmt.Sprintf("Unknown request!") 43 | } 44 | -------------------------------------------------------------------------------- /cluster/cluster.go: -------------------------------------------------------------------------------- 1 | package cluster 2 | 3 | import "github.com/mijia/adoc" 4 | 5 | type Node struct { 6 | Name string 7 | Address string 8 | Containers int64 9 | CPUs int 10 | UsedCPUs int 11 | Memory int64 12 | UsedMemory int64 13 | } 14 | 15 | func (n Node) SpareCPUs() int { 16 | return n.CPUs - n.UsedCPUs 17 | } 18 | 19 | func (n Node) SpareMemory() int64 { 20 | return n.Memory - n.UsedMemory 21 | } 22 | 23 | type Cluster interface { 24 | GetResources() ([]Node, error) 25 | 26 | ListContainers(showAll bool, showSize bool, filters ...string) ([]adoc.Container, error) 27 | CreateContainer(cc adoc.ContainerConfig, hc adoc.HostConfig, nc adoc.NetworkingConfig, name ...string) (string, error) 28 | ConnectContainer(networkName string, id string, ipAddr string) error 29 | DisconnectContainer(networkName string, id string, force bool) error 30 | StartContainer(id string) error 31 | StopContainer(id string, timeout ...int) error 32 | RestartContainer(id string, timeout ...int) error 33 | InspectContainer(id string) (adoc.ContainerDetail, error) 34 | RemoveContainer(id string, force bool, volumes bool) error 35 | RenameContainer(id string, name string) error 36 | 37 | MonitorEvents(filter string, callback adoc.EventCallback) int64 38 | StopMonitor(monitorId int64) 39 | } 40 | -------------------------------------------------------------------------------- /network/calico/calico.go: -------------------------------------------------------------------------------- 1 | package calico 2 | 3 | import ( 4 | calico "github.com/projectcalico/libcalico-go/lib/client" 5 | 6 | "github.com/mijia/sweb/log" 7 | "github.com/projectcalico/libcalico-go/lib/api" 8 | "github.com/projectcalico/libcalico-go/lib/net" 9 | ) 10 | 11 | type NetWorkManager struct { 12 | calico *calico.Client 13 | ipam calico.IPAMInterface 14 | } 15 | 16 | // FIx me: just support etcd endpoint now 17 | func NewNetWorkMgr(endpoint string) *NetWorkManager { 18 | config := api.CalicoAPIConfig{ 19 | Spec: api.CalicoAPIConfigSpec{ 20 | DatastoreType: api.EtcdV2, 21 | EtcdConfig: api.EtcdConfig{ 22 | EtcdEndpoints: endpoint, 23 | }, 24 | }, 25 | } 26 | c, err := calico.New(config) 27 | defer func() { 28 | if err == nil { 29 | log.Infof("Init calico network manager succeed") 30 | } 31 | }() 32 | if err != nil { 33 | log.Warnf("New Calico NetWork Manager Failed!!") 34 | return nil 35 | } 36 | nwm := &NetWorkManager{} 37 | nwm.calico = c 38 | nwm.ipam = c.IPAM() 39 | log.Infof("nwm.ipam: %v", nwm.ipam) 40 | return nwm 41 | } 42 | 43 | func (self *NetWorkManager) ReleaseIp(ip string) error { 44 | IP := net.ParseIP(ip) 45 | if IP == nil { 46 | log.Warnf("Ip %v is invalid to parse", ip) 47 | return nil 48 | } 49 | _, err := self.ipam.ReleaseIPs([]net.IP{*IP}) 50 | return err 51 | } 52 | -------------------------------------------------------------------------------- /utils/elector/elect_test.go: -------------------------------------------------------------------------------- 1 | package elector 2 | 3 | import ( 4 | "fmt" 5 | "github.com/docker/libkv" 6 | "github.com/docker/libkv/store" 7 | "os" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | var ( 13 | e *Elector 14 | etcds []string 15 | err error 16 | ) 17 | 18 | func init() { 19 | etcds = strings.Split(os.Getenv("ETCD_TEST"), "127.0.0.1:2379") 20 | e, err = New(etcds, LeaderKey, "127.0.0.1:2376") 21 | if err != nil { 22 | panic(err) 23 | } 24 | } 25 | 26 | func TestRunElection(t *testing.T) { 27 | 28 | stop := make(chan struct{}) 29 | defer close(stop) 30 | 31 | ch := e.Run(stop) 32 | 33 | st, err := libkv.NewStore(store.ETCD, etcds, nil) 34 | fmt.Println(st) 35 | if err != nil { 36 | t.Error(err) 37 | } 38 | 39 | t.Log("leader changed to ", <-ch) 40 | if err := st.Put(LeaderKey, []byte("hello"), nil); err != nil { 41 | t.Error(err) 42 | } 43 | t.Log("leader changed to ", <-ch) 44 | if err := st.Put(LeaderKey, []byte("world"), nil); err != nil { 45 | t.Error(err) 46 | } 47 | t.Log("leader changed to ", <-ch) 48 | if err := st.Put(LeaderKey, []byte(""), nil); err != nil { 49 | t.Error(err) 50 | } 51 | t.Log("leader changed to ", <-ch) 52 | if err := st.Delete(LeaderKey); err != nil { 53 | t.Error(err) 54 | } 55 | t.Log("leader changed to ", <-ch) 56 | if err := st.Put(LeaderKey, []byte("192.168.77.22:1234"), nil); err != nil { 57 | t.Error(err) 58 | } 59 | t.Log("leader changed to ", <-ch) 60 | } 61 | 62 | func TestIsLeader(t *testing.T) { 63 | t.Log("isleader:", e.IsLeader()) 64 | } 65 | -------------------------------------------------------------------------------- /engine/container.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | ) 7 | 8 | // By is the type of a "less" function that defines the ordering of its Planet arguments. 9 | type By func(c1, c2 *container) bool 10 | 11 | // Sort is a method on the function type, By, that sorts the argument slice according to the function. 12 | func (by By) Sort(containers []*container) { 13 | cs := &containerSorter{ 14 | containers: containers, 15 | by: by, // The Sort method's receiver is the function (closure) that defines the sort order. 16 | } 17 | sort.Sort(cs) 18 | } 19 | 20 | var ByVersionAndDriftCounter = func(c1, c2 *container) bool { 21 | return c1.version > c2.version || 22 | (c1.version == c2.version && c1.driftCount > c2.driftCount) 23 | } 24 | 25 | type container struct { 26 | version int 27 | instance int 28 | driftCount int 29 | id string 30 | } 31 | 32 | func (c *container) String() string { 33 | return fmt.Sprintf("id:%s, version:%d, instance:%d, driftCount:%d", c.id, c.version, c.instance, c.driftCount) 34 | } 35 | 36 | type containerSorter struct { 37 | containers []*container 38 | by By 39 | } 40 | 41 | // Len is part of sort.Interface. 42 | func (s *containerSorter) Len() int { 43 | return len(s.containers) 44 | } 45 | 46 | // Swap is part of sort.Interface. 47 | func (s *containerSorter) Swap(i, j int) { 48 | s.containers[i], s.containers[j] = s.containers[j], s.containers[i] 49 | } 50 | 51 | // Less is part of sort.Interface. 52 | func (s *containerSorter) Less(i, j int) bool { 53 | return s.by(s.containers[i], s.containers[j]) 54 | } 55 | -------------------------------------------------------------------------------- /utils/util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "reflect" 8 | "testing" 9 | "time" 10 | 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func Test_ParseNameInstanceNo(t *testing.T) { 15 | containerName := "webrouter.worker.worker.v3-i1-d0" 16 | 17 | name, incetance, err := ParseNameInstanceNo(containerName) 18 | assert.Equal(t, nil, err) 19 | assert.Equal(t, "webrouter.worker.worker", name) 20 | assert.Equal(t, 1, incetance) 21 | } 22 | 23 | func Test_IpConflictErrorMatch(t *testing.T) { 24 | err := "IP assignment error, data: {IP:172.20.111.131 HandleID: Attrs:map[] Hostname:lain}: Address already assigned in block" 25 | fmt.Printf("match : %v\n", IpConflictErrorMatch(err)) 26 | } 27 | 28 | func Test_IsConnectionError(t *testing.T) { 29 | err := errors.New("dial tcp 192.168.77.21:2376: getsockopt: connection refused") 30 | assert.Equal(t, true, IsConnectionError(err)) 31 | } 32 | 33 | func Test_deepEqual(t *testing.T) { 34 | a := []string{"a", "b"} 35 | b := []string{"a", "b"} 36 | fmt.Println(reflect.DeepEqual(a, b)) 37 | } 38 | 39 | func Test_timeFormat(t *testing.T) { 40 | fmt.Println(time.Now().Format("Jan 2 15:04:05")) 41 | // Jan 1 00:00:00 42 | // time.Now().Format("2006-01-02 15:04:05") 43 | } 44 | 45 | type Test struct { 46 | Time time.Time 47 | } 48 | 49 | func Test_timeMarshal(t *testing.T) { 50 | fmt.Println(time.Now().Format("Jan 2 15:04:05")) 51 | // Jan 1 00:00:00 52 | // time.Now().Format("2006-01-02 15:04:05") 53 | tt := &Test{Time: time.Now()} 54 | data, _ := json.Marshal(tt) 55 | fmt.Println("t:", string(data)) 56 | } 57 | -------------------------------------------------------------------------------- /engine/eagleview_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestEagleViewRefresh(t *testing.T) { 10 | etcdAddr := "http://127.0.0.1:2379" 11 | ConfigPortsManager(etcdAddr) 12 | 13 | kluster, store, err := initClusterAndStore() 14 | if err != nil { 15 | t.Fatalf("Cannot create the cluster and storage, %s", err) 16 | } 17 | 18 | engine, err := New(kluster, store) 19 | if err != nil { 20 | t.Fatalf("Cannot create the orc engine, %s", err) 21 | } 22 | 23 | namespace := "hello" 24 | name := "hello.web.web" 25 | pgSpec := createPodGroupSpec(namespace, name, 1) 26 | pgSpec.RestartPolicy = RestartPolicyAlways 27 | if err := engine.NewPodGroup(pgSpec); err != nil { 28 | t.Fatalf("Should not return error, %s", err) 29 | } 30 | 31 | time.Sleep(20 * time.Second) 32 | ev := NewRuntimeEagleView() 33 | if err := ev.Refresh(kluster); err != nil { 34 | t.Errorf("Cannot refresh eagle view, %s", err) 35 | } 36 | 37 | if pods, ok := ev.GetRuntimeEaglePods("hello.web.web"); !ok { 38 | t.Errorf("Didn't get back runtime pods") 39 | } else { 40 | fmt.Printf("%+v\n", pods) 41 | } 42 | 43 | if _, err := ev.RefreshPodGroup(kluster, "hello.web.web"); err != nil { 44 | t.Errorf("Cannot refresh the pod group in eagle view") 45 | } 46 | 47 | if pods, ok := ev.GetRuntimeEaglePods("hello.web.web"); !ok { 48 | t.Errorf("Didn't get back runtime pods") 49 | } else { 50 | fmt.Printf("%+v\n", pods) 51 | } 52 | 53 | if err := engine.RemovePodGroup(name); err != nil { 54 | t.Errorf("We should be able to remove the pod group, %s", err) 55 | } 56 | 57 | time.Sleep(20 * time.Second) 58 | } 59 | -------------------------------------------------------------------------------- /apiserver/status.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "github.com/laincloud/deployd/engine" 6 | "github.com/mijia/sweb/form" 7 | "github.com/mijia/sweb/log" 8 | "github.com/mijia/sweb/server" 9 | "golang.org/x/net/context" 10 | "net/http" 11 | ) 12 | 13 | type RestfulStatus struct { 14 | server.BaseResource 15 | } 16 | 17 | func (rs RestfulStatus) Patch(ctx context.Context, r *http.Request) (int, interface{}) { 18 | var status struct { 19 | Status string `json:"status"` 20 | } 21 | if err := form.ParamBodyJson(r, &status); err != nil { 22 | log.Warnf("Failed to decode engine status, %s", err) 23 | return http.StatusBadRequest, fmt.Sprintf("Invalid Status params format: %s", err) 24 | } 25 | 26 | switch status.Status { 27 | case "start": 28 | getEngine(ctx).Start() 29 | case "stop": 30 | getEngine(ctx).Stop() 31 | default: 32 | return http.StatusBadRequest, fmt.Sprintf("Invalid Status, it should be start or stop") 33 | } 34 | return http.StatusAccepted, "Accept" 35 | } 36 | 37 | func (rs RestfulStatus) Get(ctx context.Context, r *http.Request) (int, interface{}) { 38 | status := "started" 39 | if !getEngine(ctx).Started() { 40 | status = "stopped" 41 | } 42 | return http.StatusOK, map[string]string{ 43 | "status": status, 44 | } 45 | } 46 | 47 | type RestfulGuard struct { 48 | server.BaseResource 49 | } 50 | 51 | func (rs RestfulGuard) Get(ctx context.Context, r *http.Request) (int, interface{}) { 52 | status := "sleeping" 53 | if engine.FetchGuard().Working { 54 | status = "working" 55 | } 56 | return http.StatusOK, map[string]string{ 57 | "guard": status, 58 | } 59 | } 60 | 61 | func (rs RestfulGuard) Post(ctx context.Context, r *http.Request) (int, interface{}) { 62 | work := form.ParamBoolean(r, "work", false) 63 | eg := getEngine(ctx) 64 | ok := "Failed" 65 | if work && eg.GuardGotoWork() { 66 | ok = "OK" 67 | } else if eg.GuardGotoSleep() { 68 | ok = "OK" 69 | } 70 | return http.StatusOK, map[string]string{ 71 | "successed": ok, 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /engine/node.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/laincloud/deployd/cluster" 7 | "github.com/mijia/sweb/log" 8 | ) 9 | 10 | // remove a node should be in such steps show below 11 | // 1. make target node in maintenance with constraint 12 | // 2. fetch all containers in target node 13 | // 3. drift all containers in target node Asynchronously 14 | // (which can make cluster corrupted but eagle will correct it, don't worry! 15 | // in situation: schedule instance(generally shrink) and drift concurrently) 16 | // 4. stop all process service for lain (generally by lainctl) 17 | // 5. remove maintenance(generally by lainctl or called in add node phase) 18 | func (engine *OrcEngine) RemoveNode(node string) error { 19 | // step 1 20 | constraint := ConstraintSpec{"node", false, node, true} 21 | cstController.SetConstraint(constraint, engine.store) 22 | // step 2 23 | pods, err := engine.eagleView.refreshPodsByNode(engine.cluster, []string{node}) 24 | if err != nil { 25 | log.Warn("refreshPodsByNode err:%v", err) 26 | return err 27 | } 28 | log.Infof("pods %v will be drift", pods) 29 | // step 3 30 | for _, pod := range pods { 31 | engine.DriftNode(node, "", pod.Name, pod.InstanceNo, true) 32 | } 33 | return nil 34 | } 35 | 36 | // Fetch all containers in target nodes 37 | func (ev *RuntimeEagleView) refreshPodsByNode(c cluster.Cluster, nodes []string) ([]RuntimeEaglePod, error) { 38 | totalContainers := 0 39 | start := time.Now() 40 | defer func() { 41 | log.Infof(" pods by node %v refreshed, #containers=%d, duration=%s", 42 | nodes, totalContainers, time.Now().Sub(start)) 43 | }() 44 | nodeFilters := make([]string, len(nodes)) 45 | for i, node := range nodes { 46 | nodeFilters[i] = node 47 | } 48 | filters := make(map[string][]string) 49 | labelFilters := []string{ 50 | "com.docker.swarm.id", 51 | } 52 | filters["node"] = nodeFilters 53 | filters["label"] = labelFilters 54 | log.Infof("filters: %v", filters) 55 | pods, err := ev.refreshByFilters(c, filters) 56 | totalContainers = len(pods) 57 | return pods, err 58 | } 59 | -------------------------------------------------------------------------------- /apiserver/node.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/mijia/sweb/form" 8 | "github.com/mijia/sweb/server" 9 | "golang.org/x/net/context" 10 | ) 11 | 12 | type RestfulNodes struct { 13 | server.BaseResource 14 | } 15 | 16 | func (rn RestfulNodes) Get(ctx context.Context, r *http.Request) (int, interface{}) { 17 | nodes, err := getEngine(ctx).GetNodes() 18 | if err != nil { 19 | return http.StatusInternalServerError, err.Error() 20 | } 21 | return http.StatusAccepted, nodes 22 | } 23 | 24 | func (rn RestfulNodes) Patch(ctx context.Context, r *http.Request) (int, interface{}) { 25 | fromNode := form.ParamString(r, "from", "") 26 | targetNode := form.ParamString(r, "to", "") 27 | forceDrift := form.ParamBoolean(r, "force", false) 28 | pgName := form.ParamString(r, "pg", "") 29 | pgInstance := form.ParamInt(r, "pg_instance", -1) 30 | 31 | if fromNode == "" { 32 | return http.StatusBadRequest, "from node name required" 33 | } 34 | if fromNode == targetNode { 35 | return http.StatusBadRequest, "from node equals to target node" 36 | } 37 | 38 | cmd := form.ParamString(r, "cmd", "") 39 | switch cmd { 40 | case "drift": 41 | engine := getEngine(ctx) 42 | engine.DriftNode(fromNode, targetNode, pgName, pgInstance, forceDrift) 43 | return http.StatusAccepted, map[string]interface{}{ 44 | "message": "PodGroups will be drifting", 45 | "from": fromNode, 46 | "to": targetNode, 47 | "pgName": pgName, 48 | "pgInstance": pgInstance, 49 | "forceDrift": forceDrift, 50 | } 51 | default: 52 | return http.StatusBadRequest, fmt.Sprintf("Unkown command %s", cmd) 53 | } 54 | } 55 | 56 | func (rn RestfulNodes) Delete(ctx context.Context, r *http.Request) (int, interface{}) { 57 | node := form.ParamString(r, "node", "") 58 | 59 | if node == "" { 60 | return http.StatusBadRequest, "from node name required" 61 | } 62 | engine := getEngine(ctx) 63 | engine.RemoveNode(node) 64 | return http.StatusAccepted, map[string]interface{}{ 65 | "message": "containers in node will be drift", 66 | "node": node, 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /apiserver/notify.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/laincloud/deployd/engine" 7 | "github.com/mijia/sweb/form" 8 | "github.com/mijia/sweb/server" 9 | "golang.org/x/net/context" 10 | "net/http" 11 | "net/url" 12 | ) 13 | 14 | type RestfulNotifies struct { 15 | server.BaseResource 16 | } 17 | 18 | func (rc RestfulNotifies) Get(ctx context.Context, r *http.Request) (int, interface{}) { 19 | notifies := getEngine(ctx).GetNotifies() 20 | if len(notifies) == 0 { 21 | return http.StatusNotFound, fmt.Sprintf("No notify found") 22 | } else { 23 | return http.StatusOK, notifies 24 | } 25 | } 26 | 27 | func (rc RestfulNotifies) Post(ctx context.Context, r *http.Request) (int, interface{}) { 28 | 29 | callback := form.ParamString(r, "callback", "") 30 | 31 | if callback == "" { 32 | return http.StatusBadRequest, "constaint type required" 33 | } 34 | 35 | if _, err := url.ParseRequestURI(callback); err != nil { 36 | return http.StatusBadRequest, fmt.Sprintf("callback url not valid: %s", err) 37 | } 38 | 39 | if err := getEngine(ctx).AddNotify(callback); err != nil { 40 | return http.StatusInternalServerError, err.Error() 41 | } 42 | 43 | urlReverser := getUrlReverser(ctx) 44 | return http.StatusAccepted, map[string]string{ 45 | "message": "notify will be added", 46 | "check_url": urlReverser.Reverse("Get_RestfulNotifies"), 47 | } 48 | } 49 | 50 | func (rc RestfulNotifies) Delete(ctx context.Context, r *http.Request) (int, interface{}) { 51 | callback := form.ParamString(r, "callback", "") 52 | 53 | if callback == "" { 54 | return http.StatusBadRequest, "callback value requird" 55 | } 56 | 57 | if err := getEngine(ctx).DeleteNotify(callback); err != nil { 58 | if err == engine.ErrNotifyNotExists { 59 | return http.StatusNotFound, err.Error() 60 | } 61 | return http.StatusInternalServerError, err.Error() 62 | } 63 | 64 | urlReverser := getUrlReverser(ctx) 65 | return http.StatusAccepted, map[string]string{ 66 | "message": "notify uri will be deleted from the orc engine.", 67 | "check_url": urlReverser.Reverse("Get_RestfulNotifies"), 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /fixtures/engine.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | "os/signal" 8 | "time" 9 | 10 | "github.com/laincloud/deployd/cluster/swarm" 11 | "github.com/laincloud/deployd/engine" 12 | "github.com/laincloud/deployd/storage/etcd" 13 | "github.com/mijia/sweb/log" 14 | ) 15 | 16 | func main() { 17 | timeout := time.Duration(30 * time.Second) 18 | cluster, err := swarm.NewCluster("tcp://192.168.51.21:8178", timeout, 30*time.Minute) 19 | if err != nil { 20 | panic(err) 21 | } 22 | store, err := etcd.NewStore("http://192.168.51.21:4001", debug) 23 | if err != nil { 24 | panic(err) 25 | } 26 | 27 | orcEngine, err := engine.New(cluster, store) 28 | if err != nil { 29 | panic(err) 30 | } 31 | 32 | tRschSpec := false 33 | 34 | containerSpec := engine.NewContainerSpec("training/webapp") 35 | containerSpec.Name = "c1" 36 | containerSpec.Command = []string{"python", "app.py"} 37 | if tRschSpec { 38 | containerSpec.Volumes = []string{"/tmp"} 39 | } 40 | containerSpec.MemoryLimit = 15 * 1024 * 1024 41 | containerSpec.Expose = 5000 42 | 43 | pgSpec := engine.NewPodGroupSpec("hello.proc.web.foo", "hello", engine.NewPodSpec(containerSpec), 1) 44 | pgSpec.RestartPolicy = engine.RestartPolicyAlways 45 | 46 | err = orcEngine.NewPodGroup(pgSpec) 47 | if err != nil { 48 | panic(fmt.Sprintf("Fail to create new pod group, %s", err)) 49 | } 50 | 51 | time.Sleep(3 * time.Second) 52 | orcEngine.RescheduleInstance(pgSpec.Name, 2) 53 | 54 | if tRschSpec { 55 | time.Sleep(3 * time.Second) 56 | containerSpec.MemoryLimit = 32 * 1024 * 1024 57 | cSpec2 := containerSpec 58 | cSpec2.Name = "c2" 59 | orcEngine.RescheduleSpec(pgSpec.Name, engine.NewPodSpec(containerSpec, cSpec2)) 60 | } 61 | 62 | sigs := make(chan os.Signal, 1) 63 | signal.Notify(sigs, os.Interrupt, os.Kill) 64 | go func() { 65 | <-sigs 66 | orcEngine.RemovePodGroup(pgSpec.Name) 67 | time.Sleep(5 * time.Second) 68 | os.Exit(1) 69 | }() 70 | 71 | time.Sleep(20 * time.Minute) 72 | } 73 | 74 | func dddd(title string, v interface{}) { 75 | fmt.Println(title) 76 | data, _ := json.MarshalIndent(v, "", " ") 77 | fmt.Printf("%s\n\n", string(data)) 78 | } 79 | -------------------------------------------------------------------------------- /apiserver/constraint.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/laincloud/deployd/engine" 7 | "github.com/mijia/sweb/form" 8 | "github.com/mijia/sweb/server" 9 | "golang.org/x/net/context" 10 | "net/http" 11 | ) 12 | 13 | type RestfulConstraints struct { 14 | server.BaseResource 15 | } 16 | 17 | func (rc RestfulConstraints) Get(ctx context.Context, r *http.Request) (int, interface{}) { 18 | cstType := form.ParamString(r, "type", "node") 19 | 20 | if constraint, ok := getEngine(ctx).GetConstraints(cstType); !ok { 21 | return http.StatusNotFound, fmt.Sprintf("No constraint found") 22 | } else { 23 | return http.StatusOK, constraint 24 | } 25 | } 26 | 27 | func (rc RestfulConstraints) Patch(ctx context.Context, r *http.Request) (int, interface{}) { 28 | 29 | cstType := form.ParamString(r, "type", "") 30 | cstValue := form.ParamString(r, "value", "") 31 | equal := form.ParamBoolean(r, "equal", false) 32 | soft := form.ParamBoolean(r, "soft", true) 33 | 34 | if cstType == "" { 35 | return http.StatusBadRequest, "constraint type required" 36 | } 37 | if cstValue == "" { 38 | return http.StatusBadRequest, "constraint value required" 39 | } 40 | 41 | constraint := engine.ConstraintSpec{cstType, equal, cstValue, soft} 42 | 43 | if err := getEngine(ctx).UpdateConstraints(constraint); err != nil { 44 | return http.StatusInternalServerError, err.Error() 45 | } 46 | 47 | urlReverser := getUrlReverser(ctx) 48 | return http.StatusAccepted, map[string]string{ 49 | "message": "Constraints will be patched", 50 | "check_url": urlReverser.Reverse("Get_RestfulConstraints") + "?type=" + cstType, 51 | } 52 | } 53 | 54 | func (rc RestfulConstraints) Delete(ctx context.Context, r *http.Request) (int, interface{}) { 55 | cstType := form.ParamString(r, "type", "node") 56 | 57 | if err := getEngine(ctx).DeleteConstraints(cstType); err != nil { 58 | if err == engine.ErrConstraintNotExists { 59 | return http.StatusNotFound, err.Error() 60 | } 61 | return http.StatusInternalServerError, err.Error() 62 | } 63 | 64 | urlReverser := getUrlReverser(ctx) 65 | return http.StatusAccepted, map[string]string{ 66 | "message": "Constraint will be deleted from the orc engine.", 67 | "check_url": urlReverser.Reverse("Get_RestfulConstraints") + "?type=" + cstType, 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /engine/pod_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/laincloud/deployd/cluster/swarm" 8 | "github.com/laincloud/deployd/storage/etcd" 9 | "github.com/mijia/sweb/log" 10 | ) 11 | 12 | func TestPodController(t *testing.T) { 13 | etcdAddr := "http://127.0.0.1:2379" 14 | swarmAddr := "tcp://127.0.0.1:2376" 15 | isDebug := false 16 | 17 | _, err := etcd.NewStore(etcdAddr, isDebug) 18 | if err != nil { 19 | t.Errorf("Cannot init the etcd storage") 20 | } 21 | 22 | c, err := swarm.NewCluster(swarmAddr, 30*time.Second, 10*time.Minute) 23 | if err != nil { 24 | t.Errorf("Cannot init the swarm cluster manager") 25 | } 26 | 27 | cstController = NewConstraintController() 28 | 29 | cSpec := NewContainerSpec("training/webapp") 30 | cSpec.Command = []string{"python", "app.py"} 31 | cSpec.MemoryLimit = 15 * 1024 * 1024 32 | cSpec.Expose = 5000 33 | podSpec := NewPodSpec(cSpec) 34 | podSpec.Name = "hello.proc.web.foo" 35 | podSpec.Namespace = "hello" 36 | 37 | pc := &podController{ 38 | spec: podSpec, 39 | pod: Pod{ 40 | InstanceNo: 1, 41 | }, 42 | } 43 | pc.pod.State = RunStatePending 44 | 45 | pc.Deploy(c, []string{}) 46 | if pc.pod.State != RunStateSuccess { 47 | t.Fatal("Pod should be deployed") 48 | } 49 | 50 | cId := pc.pod.Containers[0].Id 51 | 52 | ev := NewRuntimeEagleView() 53 | if _, err := ev.RefreshPodGroup(c, podSpec.Name); err != nil { 54 | t.Fatal("Failed to refresh the pod group") 55 | } 56 | podContainers, ok := ev.GetRuntimeEaglePods(podSpec.Name) 57 | if !ok || len(podContainers) == 0 { 58 | t.Fatal("Failed to get the runtime eagle pods from swarm") 59 | } 60 | log.Infof("podContainers:%v len:%v", podContainers, len(podContainers)) 61 | if podContainers[0].Container.Id != cId { 62 | t.Fatal("Should have the same container id as we deployed") 63 | } 64 | 65 | pc.Refresh(c) 66 | if pc.pod.State != RunStateSuccess { 67 | t.Fatal("The pod should be in success run state") 68 | } 69 | 70 | pc.Stop(c) 71 | if pc.pod.State != RunStateExit { 72 | t.Fatal("The pod should be stopped and exited") 73 | } 74 | 75 | pc.Start(c) 76 | if pc.pod.State != RunStateSuccess { 77 | t.Fatal("The pod should be restarted and in success run state") 78 | } 79 | 80 | pc.Remove(c) 81 | if err := ev.Refresh(c); err != nil { 82 | t.Fatal("Failed to refresh the pod group") 83 | } 84 | podContainers, ok = ev.GetRuntimeEaglePods(podSpec.Name) 85 | if ok { 86 | t.Fatal("Should not get data for the pods") 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: go 4 | go: 5 | - 1.8.x 6 | 7 | services: 8 | - docker 9 | 10 | install: 11 | - go get -u github.com/golang/dep/cmd/dep 12 | - go get github.com/aliyun/ossutil 13 | 14 | before_script: 15 | - dep ensure 16 | - echo 'DOCKER_OPTS="-H tcp://0.0.0.0:2375 -H unix:///var/run/docker.sock --cluster-store=etcd://127.0.0.1:2379 -s devicemapper"' | sudo tee /etc/default/docker > /dev/null 17 | - sudo service docker restart 18 | - ip=$(ip addr show eth0| grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*') 19 | - curl -L https://github.com/coreos/etcd/releases/download/v2.3.7/etcd-v2.3.7-linux-amd64.tar.gz -o etcd-v2.3.7-linux-amd64.tar.gz 20 | - tar xzvf etcd-v2.3.7-linux-amd64.tar.gz 21 | - nohup ./etcd-v2.3.7-linux-amd64/etcd --listen-client-urls=http://$ip:2379,http://localhost:2379 --advertise-client-urls=http://$ip:2379 & 22 | - cat /lib/systemd/system/docker.service 23 | - docker network create hello 24 | - docker pull swarm:1.2.8 25 | - docker run -d --name swarm-manage.service -p 2376:2375 swarm:1.2.8 manage --replication --addr=$ip:2376 etcd://$ip:2379/lain/swarm 26 | - docker run -d --name swarm-agent.service swarm:1.2.8 join --addr=$ip:2375 etcd://$ip:2379/lain/swarm 27 | - docker ps -a 28 | - docker -H :2376 info 29 | script: 30 | - go build -o bin/deployd main.go 31 | - ./test.sh 32 | after_success: 33 | - bash <(curl -s https://codecov.io/bash) 34 | - xz -9k bin/deployd 35 | - if [ "$TRAVIS_TAG" != "" ]; then 36 | ossutil cp bin/deployd.xz oss://lain/binary/deployd/releases/download/${TRAVIS_TAG}/deployd.xz -f -e ${ALIYUN_OSS_ENDPOINT} -i ${ALIYUN_OSS_ACCESS_KEY_ID} -k ${ALIYUN_OSS_ACCESS_KEY_SECRET}; 37 | fi 38 | deploy: 39 | provider: releases 40 | api_key: 41 | secure: NZjqwP3WFYrQSqwdad4HfdJKFGAOBuoRZx5CQQutg94Y92Le5JtgGXpzNSZA8jYoK5PjJCRBE0BQbT/VZv8DDN6U7qMhZkblaqeISxralrZZOh1wVA/bn6TlOcci2SYSMEVSakT/V4dwAJESO3P6KRZQUaOHktY0OlCZWp2n5NpLEQBf7H+r+UwibUoBizATrTfN+K0/2kbbyvx4G6Dcouo9T92N2A1di5ZUWCH6XBwdVOVRfco1Ove11PaEyrylQc08Mspo7KE44xLUDE4kvkjvlDZT34uAQn0cNF/EtWbF2oXsMRpNvr9d7H5iie8WEfbT5NYFb2gAp6fIvTdsCEhOOz6h5GIHHAeJGPtTg13g+ZW5jB+Yux/d/h5FNSYTkwn0ozPqMgBIn90as9qNkl+IQxasDlGb7j+C2PwF6DSsTIVM4yTGz+fzJV1dr4fu2Pb3ux4Gxbb9cXOzJAqBShh3xYS+XX3Vba8WOYI8iA7KMSC3ZJQ3L17HUOhQ7nKlR+5Ph5b5xUZ9TNQa6rdUEMItVPTrI7ZKOWXGY2xhEEc1It0t4np/AoWrLggyTwq8JDEAZwukbn+X1U4OD18gkFW5R0COtKT+r3URFRbgdv015H3KXz/2saIkVxJBIQCrxuJUC5dnCesZmupvYlQ5i9nyDiW1osOB+0zNOD084qU= 42 | file: 43 | - bin/deployd 44 | - bin/deployd.xz 45 | skip_cleanup: true 46 | on: 47 | tags: true 48 | -------------------------------------------------------------------------------- /engine/depends_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestDependsPodCtrl(t *testing.T) { 10 | etcdAddr := "http://127.0.0.1:2379" 11 | ConfigPortsManager(etcdAddr) 12 | c, store, err := initClusterAndStore() 13 | if err != nil { 14 | t.Fatalf("Cannot create the cluster and storage, %s", err) 15 | } 16 | 17 | engine, err := New(c, store) 18 | if err != nil { 19 | t.Fatalf("Cannot create the orc engine, %s", err) 20 | } 21 | 22 | publisher := NewPublisher(true) 23 | publisher.AddListener(engine) 24 | 25 | podSpec := createPodSpec("hello", "hello.portal") 26 | if err := engine.NewDependencyPod(podSpec); err != nil { 27 | t.Fatalf("Cannot create dependency pod, %s", err) 28 | } 29 | 30 | event := DependencyEvent{ 31 | Type: "add", 32 | Name: "hello.portal", 33 | NodeName: "node1", 34 | Namespace: "client.proc.foo", 35 | } 36 | publisher.EmitEvent(event) 37 | publisher.EmitEvent(event) 38 | // event.NodeName = "node2" 39 | // publisher.EmitEvent(event) 40 | 41 | time.Sleep(30 * time.Second) 42 | if pods, err := engine.GetDependencyPod("hello.portal"); err != nil { 43 | t.Errorf("Cannot get the depends pods back, %s", err) 44 | } else { 45 | if nsPods, ok := pods.Pods["client.proc.foo"]; !ok { 46 | t.Errorf("We should get the namespace back") 47 | } else if len(nsPods) != 1 { 48 | t.Errorf("We should have 1 portal pods running on each node") 49 | } 50 | } 51 | 52 | fmt.Println("==========================\n\n") 53 | 54 | podSpec = podSpec.Clone() 55 | podSpec.Containers[0].MemoryLimit = 20 * 1024 * 1024 56 | if err := engine.UpdateDependencyPod(podSpec); err != nil { 57 | t.Errorf("Cannot update the depends pod, %s", err) 58 | } 59 | time.Sleep(20 * time.Second) 60 | if pods, err := engine.GetDependencyPod("hello.portal"); err != nil { 61 | t.Errorf("Cannot get the depends pods back, %s", err) 62 | } else { 63 | if nsPods, ok := pods.Pods["client.proc.foo"]; !ok { 64 | t.Errorf("We should get the namespace back") 65 | } else if len(nsPods) != 1 { 66 | t.Errorf("We should have 1 portal pods running on each node") 67 | } 68 | } 69 | 70 | time.Sleep(20 * time.Second) 71 | 72 | fmt.Println("==========================\n\n") 73 | if err := engine.RemoveDependencyPod("hello.portal", true); err != nil { 74 | t.Errorf("Cannot remove the depends pods, %s", err) 75 | } 76 | time.Sleep(20 * time.Second) 77 | if _, err := engine.GetDependencyPod("hello.portal"); err == nil { 78 | t.Errorf("We should not get the depends pods back, %s", err) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /utils/util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net" 7 | "net/url" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/laincloud/deployd/utils/regex" 12 | ) 13 | 14 | var ( 15 | ErrContainerMatchFailed = errors.New("Container Match Failed!") 16 | ) 17 | 18 | func ParseNameInstanceNo(containerName string) (string, int, error) { 19 | p := regex.MustCompile(`(.*)\.v([0-9]+)-i([0-9]+)-d([0-9]+)`) 20 | g := p.Match(containerName) 21 | if g == nil { 22 | return "", 0, ErrContainerMatchFailed 23 | } 24 | instance, err := strconv.Atoi(g.Group(3)) 25 | if err != nil { 26 | return "", 0, ErrContainerMatchFailed 27 | } 28 | return g.Group(1), instance, nil 29 | } 30 | 31 | // pgname, version, instance, driftcount 32 | func ParseContainerName(containerName string) (string, int, int, int, error) { 33 | p := regex.MustCompile(`(.*)\.v([0-9]+)-i([0-9]+)-d([0-9]+)`) 34 | g := p.Match(containerName) 35 | if g == nil { 36 | return "", 0, 0, 0, ErrContainerMatchFailed 37 | } 38 | version, err := strconv.Atoi(g.Group(2)) 39 | if err != nil { 40 | return "", 0, 0, 0, ErrContainerMatchFailed 41 | } 42 | instance, err := strconv.Atoi(g.Group(3)) 43 | if err != nil { 44 | return "", 0, 0, 0, ErrContainerMatchFailed 45 | } 46 | driftCount, err := strconv.Atoi(g.Group(4)) 47 | if err != nil { 48 | return "", 0, 0, 0, ErrContainerMatchFailed 49 | } 50 | return g.Group(1), version, instance, driftCount, nil 51 | } 52 | 53 | func IpConflictErrorMatch(err string) string { 54 | p := regex.MustCompile(`IP assignment error, data: {IP:([0-9.]+) HandleID:(.*)}: Address already assigned in block`) 55 | g := p.Match(err) 56 | if g == nil { 57 | return "" 58 | } 59 | return g.Group(1) 60 | } 61 | 62 | func IsConnectionError(err error) bool { 63 | if err == nil { 64 | return false 65 | } 66 | p := regex.MustCompile(`getsockopt: connection refused`) 67 | g := p.Match(err.Error()) 68 | if g != nil { 69 | return true 70 | } 71 | switch err := err.(type) { 72 | case net.Error: 73 | return err.Timeout() 74 | case *url.Error: 75 | if err, ok := err.Err.(net.Error); ok { 76 | return err.Timeout() 77 | } 78 | } 79 | return false 80 | } 81 | 82 | func AddNodeConstraint(filters []string, nodeName string) []string { 83 | newFilters := make([]string, 0, len(filters)) 84 | for _, filter := range filters { 85 | if strings.HasPrefix(filter, "constraint:node==") { 86 | continue 87 | } 88 | newFilters = append(newFilters, filter) 89 | } 90 | newFilters = append(newFilters, fmt.Sprintf("constraint:node==%s", nodeName)) 91 | return newFilters 92 | } 93 | -------------------------------------------------------------------------------- /utils/units/size.go: -------------------------------------------------------------------------------- 1 | package units 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | // Copied from github.com/docker/docker/pkg/units/size.go 11 | 12 | // See: http://en.wikipedia.org/wiki/Binary_prefix 13 | const ( 14 | // Decimal 15 | 16 | KB = 1000 17 | MB = 1000 * KB 18 | GB = 1000 * MB 19 | TB = 1000 * GB 20 | PB = 1000 * TB 21 | 22 | // Binary 23 | 24 | KiB = 1024 25 | MiB = 1024 * KiB 26 | GiB = 1024 * MiB 27 | TiB = 1024 * GiB 28 | PiB = 1024 * TiB 29 | ) 30 | 31 | type unitMap map[string]int64 32 | 33 | var ( 34 | decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB} 35 | binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB} 36 | sizeRegex = regexp.MustCompile(`^(\d+)([kKmMgGtTpP])?[bB]?$`) 37 | ) 38 | 39 | var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} 40 | var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} 41 | 42 | // HumanSize returns a human-readable approximation of a size 43 | // using SI standard (eg. "44kB", "17MB") 44 | func HumanSize(size float64) string { 45 | return intToString(float64(size), 1000.0, decimapAbbrs) 46 | } 47 | 48 | func BytesSize(size float64) string { 49 | return intToString(size, 1024.0, binaryAbbrs) 50 | } 51 | 52 | func intToString(size, unit float64, _map []string) string { 53 | i := 0 54 | for size >= unit { 55 | size = size / unit 56 | i++ 57 | } 58 | return fmt.Sprintf("%.4g %s", size, _map[i]) 59 | } 60 | 61 | // FromHumanSize returns an integer from a human-readable specification of a 62 | // size using SI standard (eg. "44kB", "17MB") 63 | func FromHumanSize(size string) (int64, error) { 64 | return parseSize(size, decimalMap) 65 | } 66 | 67 | // RAMInBytes parses a human-readable string representing an amount of RAM 68 | // in bytes, kibibytes, mebibytes, gibibytes, or tebibytes and 69 | // returns the number of bytes, or -1 if the string is unparseable. 70 | // Units are case-insensitive, and the 'b' suffix is optional. 71 | func RAMInBytes(size string) (int64, error) { 72 | return parseSize(size, binaryMap) 73 | } 74 | 75 | // Parses the human-readable size string into the amount it represents 76 | func parseSize(sizeStr string, uMap unitMap) (int64, error) { 77 | matches := sizeRegex.FindStringSubmatch(sizeStr) 78 | if len(matches) != 3 { 79 | return -1, fmt.Errorf("invalid size: '%s'", sizeStr) 80 | } 81 | 82 | size, err := strconv.ParseInt(matches[1], 10, 0) 83 | if err != nil { 84 | return -1, err 85 | } 86 | 87 | unitPrefix := strings.ToLower(matches[2]) 88 | if mul, ok := uMap[unitPrefix]; ok { 89 | size *= mul 90 | } 91 | 92 | return size, nil 93 | } 94 | -------------------------------------------------------------------------------- /engine/config.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/laincloud/deployd/storage" 7 | "github.com/mijia/sweb/log" 8 | ) 9 | 10 | type Device struct { 11 | Path string `json:"path"` 12 | MaxIops uint64 `json:"iops"` 13 | MaxRate string `json:"rate"` 14 | Ratio uint64 `json:"ratio,omitempty"` 15 | } 16 | 17 | type Resource struct { 18 | Cpu int `json:"cpu"` 19 | Memory string `json:"memory"` 20 | Devices []Device `json:"devices"` 21 | } 22 | 23 | type Guard struct { 24 | Working bool `json:"Working"` 25 | } 26 | 27 | const ( 28 | EtcdResourcesKey = "/lain/config/resources" 29 | EtcdGuardSwitchKey = "/lain/config/guardswitch" 30 | EtcdCloudVolumeRootKey = "/lain/config/cloud_volumes_root" 31 | EtcdVolumeRootKey = "/lain/config/volumes_root" 32 | 33 | EtcdConfigKey = "/lain/deployd/engine/config" 34 | 35 | DefaultLastSpecCacheTTL = 10 * 60 // 10min 36 | DefautDeviceRatio = uint64(70) // 70% so should / 100 37 | ) 38 | 39 | var ( 40 | resource = &Resource{Cpu: 8, Memory: "16G"} 41 | guard = &Guard{Working: true} 42 | ) 43 | 44 | func watchGuard(store storage.Store) { 45 | watcher(store, EtcdGuardSwitchKey, guard) 46 | } 47 | 48 | func watchResource(store storage.Store) { 49 | watcher(store, EtcdResourcesKey, resource) 50 | } 51 | 52 | func WatchEngineConfig(engine *OrcEngine) { 53 | watcher(engine.store, EtcdConfigKey, engine.config) 54 | } 55 | 56 | func watcher(store storage.Store, key string, v interface{}) { 57 | rsCh := store.Watch(key) 58 | store.Get(key, v) 59 | go func() { 60 | for { 61 | select { 62 | case rsc := <-rsCh: 63 | if err := json.Unmarshal([]byte(rsc), v); err == nil { 64 | log.Infof("got value:%v", v) 65 | } else { 66 | log.Warnf("watcher faild with marshall error:%v", err) 67 | } 68 | break 69 | } 70 | } 71 | }() 72 | } 73 | 74 | func FetchResource() *Resource { 75 | return resource 76 | } 77 | 78 | func FetchGuard() *Guard { 79 | return guard 80 | } 81 | 82 | func GuardGotoSleep(store storage.Store) bool { 83 | g := &Guard{Working: false} 84 | if err := store.Set(EtcdGuardSwitchKey, g, true); err != nil { 85 | return false 86 | } 87 | guard = g 88 | return true 89 | } 90 | 91 | func GuardGotoWork(store storage.Store) bool { 92 | g := &Guard{Working: true} 93 | if err := store.Set(EtcdGuardSwitchKey, g, true); err != nil { 94 | return false 95 | } 96 | guard = g 97 | return true 98 | } 99 | 100 | func ConfigEngine(engine *OrcEngine) bool { 101 | if err := engine.store.Set(EtcdConfigKey, engine.config, true); err != nil { 102 | return false 103 | } 104 | return true 105 | } 106 | -------------------------------------------------------------------------------- /utils/proxy/proxy.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "github.com/mijia/sweb/log" 5 | "io" 6 | "net" 7 | "net/http" 8 | "strings" 9 | "sync" 10 | ) 11 | 12 | type Proxy struct { 13 | s *http.Server 14 | addr string 15 | dest string 16 | lock *sync.RWMutex 17 | stop chan struct{} 18 | started bool 19 | } 20 | 21 | func New(addr string, dest string) *Proxy { 22 | p := &Proxy{ 23 | addr: addr, 24 | dest: dest, 25 | lock: &sync.RWMutex{}, 26 | started: false, 27 | stop: make(chan struct{}), 28 | } 29 | p.s = &http.Server{Addr: p.addr, Handler: p} 30 | return p 31 | } 32 | 33 | func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { 34 | p.lock.RLock() 35 | defer p.lock.RUnlock() 36 | if p.dest == "" { 37 | log.Warnf("Proxy's destination is empty") 38 | return 39 | } 40 | if err := hijack(p.dest, w, r); err != nil { 41 | log.Errorf("Hijack return error: %v", err) 42 | } 43 | } 44 | 45 | func (p *Proxy) Run() error { 46 | log.Infof("Proxy running, working on [%s] => [%s]", p.addr, p.dest) 47 | 48 | p.started = true 49 | defer func() { p.started = false }() 50 | 51 | l, err := net.Listen("tcp", p.addr) 52 | if err != nil { 53 | return err 54 | } 55 | 56 | go func() { 57 | <-p.stop 58 | l.Close() 59 | }() 60 | 61 | return p.s.Serve(l) 62 | } 63 | 64 | func (p *Proxy) Shutdown() { 65 | if p.started { 66 | p.stop <- struct{}{} 67 | } 68 | } 69 | 70 | func (p *Proxy) SetDest(dest string) { 71 | p.lock.Lock() 72 | defer p.lock.Unlock() 73 | p.dest = dest 74 | log.Infof("Proxy's destination changed, now working on [%s] => [%s]", p.addr, p.dest) 75 | } 76 | 77 | func (p *Proxy) Dest() string { 78 | p.lock.RLock() 79 | defer p.lock.RUnlock() 80 | return p.dest 81 | } 82 | 83 | func hijack(addr string, w http.ResponseWriter, r *http.Request) error { 84 | var ( 85 | conn net.Conn 86 | err error 87 | ) 88 | 89 | if parts := strings.SplitN(addr, "://", 2); len(parts) == 2 { 90 | addr = parts[1] 91 | } 92 | 93 | conn, err = net.Dial("tcp", addr) 94 | if err != nil { 95 | return err 96 | } 97 | hj, _, err := w.(http.Hijacker).Hijack() 98 | if err != nil { 99 | return err 100 | } 101 | defer hj.Close() 102 | defer conn.Close() 103 | 104 | // write request to 105 | if err := r.Write(conn); err != nil { 106 | return err 107 | } 108 | 109 | // transfer data for this two connection 110 | errc := make(chan error, 2) 111 | cp := func(dst io.Writer, src io.Reader) { 112 | _, err := io.Copy(dst, src) 113 | // io.Writer do not having CloseWrite(), convert. 114 | if conn, ok := dst.(interface { 115 | CloseWrite() error 116 | }); ok { 117 | conn.CloseWrite() 118 | } 119 | errc <- err 120 | } 121 | go cp(conn, hj) 122 | go cp(hj, conn) 123 | 124 | // wait copy finished 125 | if err := <-errc; err != nil { 126 | return err 127 | } 128 | if err := <-errc; err != nil { 129 | return err 130 | } 131 | return nil 132 | } 133 | -------------------------------------------------------------------------------- /engine/constraint.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | "github.com/laincloud/deployd/storage" 8 | "github.com/mijia/sweb/log" 9 | ) 10 | 11 | type ConstraintSpec struct { 12 | Type string 13 | Equal bool 14 | Value string 15 | Soft bool 16 | } 17 | 18 | type constraintController struct { 19 | sync.RWMutex 20 | 21 | constraints map[string]ConstraintSpec 22 | } 23 | 24 | func NewConstraintController() *constraintController { 25 | cc := &constraintController{ 26 | constraints: make(map[string]ConstraintSpec), 27 | } 28 | return cc 29 | } 30 | 31 | func (cc *constraintController) LoadConstraints(store storage.Store) error { 32 | constraints := make(map[string]ConstraintSpec) 33 | cstKey := fmt.Sprintf("%s/%s", kLainDeploydRootKey, kLainConstraintKey) 34 | if cstNames, err := store.KeysByPrefix(cstKey); err != nil { 35 | if err != storage.KMissingError { 36 | return err 37 | } 38 | } else { 39 | for _, cstName := range cstNames { 40 | var cstSpec ConstraintSpec 41 | if err := store.Get(cstName, &cstSpec); err != nil { 42 | log.Errorf("Failed to load constraint %s from storage, %s", cstName, err) 43 | return err 44 | } 45 | constraints[cstSpec.Type] = cstSpec 46 | log.Infof("Loaded constraint %s from storage, %s", cstSpec.Type, cstSpec) 47 | } 48 | } 49 | cc.constraints = constraints 50 | return nil 51 | } 52 | 53 | func (cc *constraintController) LoadFilterFromConstrain(cstSpec ConstraintSpec) string { 54 | operator := "==" 55 | if !cstSpec.Equal { 56 | operator = "!=" 57 | } 58 | if cstSpec.Soft { 59 | operator += "~" 60 | } 61 | return fmt.Sprintf("constraint:%s%s%s", cstSpec.Type, operator, cstSpec.Value) 62 | } 63 | 64 | func (cc *constraintController) GetAllConstraints() map[string]ConstraintSpec { 65 | cc.RLock() 66 | defer cc.RUnlock() 67 | return cc.constraints 68 | } 69 | 70 | func (cc *constraintController) GetConstraint(cstType string) (ConstraintSpec, bool) { 71 | cc.RLock() 72 | defer cc.RUnlock() 73 | if cstSpec, ok := cc.constraints[cstType]; !ok { 74 | return ConstraintSpec{}, false 75 | } else { 76 | return cstSpec, true 77 | } 78 | } 79 | 80 | func (cc *constraintController) SetConstraint(cstSpec ConstraintSpec, store storage.Store) error { 81 | cc.Lock() 82 | defer cc.Unlock() 83 | constraintKey := fmt.Sprintf("%s/%s/%s", kLainDeploydRootKey, kLainConstraintKey, cstSpec.Type) 84 | if err := store.Set(constraintKey, cstSpec); err != nil { 85 | log.Warnf("Failed to set constraint key %s, %s", constraintKey, err) 86 | return err 87 | } 88 | cc.constraints[cstSpec.Type] = cstSpec 89 | return nil 90 | } 91 | 92 | func (cc *constraintController) RemoveConstraint(cstType string, store storage.Store) error { 93 | cc.Lock() 94 | defer cc.Unlock() 95 | constraintKey := fmt.Sprintf("%s/%s/%s", kLainDeploydRootKey, kLainConstraintKey, cstType) 96 | if err := store.Remove(constraintKey); err != nil { 97 | log.Warnf("Failed to remove constraint key %s, %s", constraintKey, err) 98 | return err 99 | } 100 | delete(cc.constraints, cstType) 101 | return nil 102 | } 103 | -------------------------------------------------------------------------------- /engine/engine_ops.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | type orcOperation interface { 4 | Do(engine *OrcEngine) 5 | } 6 | 7 | // Depends Operations 8 | 9 | type orcOperDependsAddSpec struct { 10 | depCtrl *dependsController 11 | } 12 | 13 | func (op orcOperDependsAddSpec) Do(engine *OrcEngine) { 14 | op.depCtrl.AddSpec() 15 | } 16 | 17 | type orcOperDependsUpdateSpec struct { 18 | depCtrl *dependsController 19 | newSpec PodSpec 20 | } 21 | 22 | func (op orcOperDependsUpdateSpec) Do(engine *OrcEngine) { 23 | op.depCtrl.UpdateSpec(op.newSpec) 24 | } 25 | 26 | type orcOperDependsRemoveSpec struct { 27 | depCtrl *dependsController 28 | force bool 29 | } 30 | 31 | func (op orcOperDependsRemoveSpec) Do(engine *OrcEngine) { 32 | op.depCtrl.RemoveSpec(op.force) 33 | } 34 | 35 | type orcOperDependsRefresh struct { 36 | depCtrl *dependsController 37 | } 38 | 39 | func (op orcOperDependsRefresh) Do(engine *OrcEngine) { 40 | op.depCtrl.Refresh() 41 | } 42 | 43 | type orcOperDependsDispatch struct { 44 | depCtrl *dependsController 45 | event DependencyEvent 46 | } 47 | 48 | func (op orcOperDependsDispatch) Do(engine *OrcEngine) { 49 | event := op.event 50 | switch event.Type { 51 | case "add": 52 | op.depCtrl.AddPod(event.Namespace, event.NodeName) 53 | case "remove": 54 | op.depCtrl.RemovePod(event.Namespace, event.NodeName) 55 | case "verify": 56 | op.depCtrl.VerifyPod(event.Namespace, event.NodeName) 57 | } 58 | } 59 | 60 | type orcOperEventHandler struct { 61 | event OperationEvent 62 | } 63 | 64 | func (op orcOperEventHandler) Do(engine *OrcEngine) { 65 | event := op.event 66 | switch event.Type { 67 | case OperationStart: 68 | engine.PgOpStart(event.PgName) 69 | case OperationOver: 70 | engine.PgOpOver(event.PgName) 71 | } 72 | } 73 | 74 | // PodGroup Operations 75 | 76 | type orcOperDeploy struct { 77 | pgCtrl *podGroupController 78 | } 79 | 80 | func (op orcOperDeploy) Do(engine *OrcEngine) { 81 | op.pgCtrl.Deploy() 82 | } 83 | 84 | type orcOperRefresh struct { 85 | pgCtrl *podGroupController 86 | forceUpdate bool 87 | } 88 | 89 | func (op orcOperRefresh) Do(engine *OrcEngine) { 90 | op.pgCtrl.Refresh(op.forceUpdate) 91 | } 92 | 93 | type orcOperRemove struct { 94 | pgCtrl *podGroupController 95 | } 96 | 97 | func (op orcOperRemove) Do(engine *OrcEngine) { 98 | op.pgCtrl.Remove() 99 | } 100 | 101 | type orcOperRescheduleInstance struct { 102 | pgCtrl *podGroupController 103 | numInstances int 104 | restartPolicy []RestartPolicy 105 | } 106 | 107 | func (op orcOperRescheduleInstance) Do(engine *OrcEngine) { 108 | op.pgCtrl.RescheduleInstance(op.numInstances, op.restartPolicy...) 109 | } 110 | 111 | type orcOperRescheduleSpec struct { 112 | pgCtrl *podGroupController 113 | podSpec PodSpec 114 | } 115 | 116 | func (op orcOperRescheduleSpec) Do(engine *OrcEngine) { 117 | op.pgCtrl.RescheduleSpec(op.podSpec) 118 | } 119 | 120 | type orcOperScheduleDrift struct { 121 | pgCtrl *podGroupController 122 | fromNode string 123 | toNode string 124 | instanceNo int 125 | force bool 126 | } 127 | 128 | func (op orcOperScheduleDrift) Do(engine *OrcEngine) { 129 | op.pgCtrl.RescheduleDrift(op.fromNode, op.toNode, op.instanceNo, op.force) 130 | } 131 | 132 | type orcOperChangeState struct { 133 | pgCtrl *podGroupController 134 | op string 135 | instance int 136 | } 137 | 138 | func (op orcOperChangeState) Do(engine *OrcEngine) { 139 | op.pgCtrl.ChangeState(op.op, op.instance) 140 | } 141 | -------------------------------------------------------------------------------- /apiserver/dependency.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/laincloud/deployd/engine" 8 | 9 | "github.com/mijia/sweb/form" 10 | "github.com/mijia/sweb/log" 11 | "github.com/mijia/sweb/server" 12 | "golang.org/x/net/context" 13 | ) 14 | 15 | type RestfulDependPods struct { 16 | server.BaseResource 17 | } 18 | 19 | func (rdp RestfulDependPods) Get(ctx context.Context, r *http.Request) (int, interface{}) { 20 | dpName := form.ParamString(r, "name", "") 21 | if dpName == "" { 22 | return http.StatusBadRequest, fmt.Sprintf("Missing dependency pod name for the request") 23 | } 24 | orcEngine := getEngine(ctx) 25 | if podsSpec, err := orcEngine.GetDependencyPod(dpName); err != nil { 26 | if err == engine.ErrDependencyPodNotExists { 27 | return http.StatusNotFound, err.Error() 28 | } 29 | return http.StatusInternalServerError, err.Error() 30 | } else { 31 | return http.StatusOK, podsSpec 32 | } 33 | } 34 | 35 | func (rdp RestfulDependPods) Delete(ctx context.Context, r *http.Request) (int, interface{}) { 36 | dpName := form.ParamString(r, "name", "") 37 | if dpName == "" { 38 | return http.StatusBadRequest, fmt.Sprintf("Missing dependency pod name for the request") 39 | } 40 | force := form.ParamBoolean(r, "force", false) 41 | orcEngine := getEngine(ctx) 42 | if err := orcEngine.RemoveDependencyPod(dpName, force); err != nil { 43 | if err == engine.ErrDependencyPodNotExists { 44 | return http.StatusNotFound, err.Error() 45 | } 46 | return http.StatusInternalServerError, err.Error() 47 | } 48 | urlReverser := getUrlReverser(ctx) 49 | return http.StatusAccepted, map[string]string{ 50 | "message": "Dependency pod will be removed from the orc engine.", 51 | "check_url": urlReverser.Reverse("Get_RestfulDependPods") + "?name=" + dpName, 52 | } 53 | } 54 | 55 | func (rdp RestfulDependPods) Put(ctx context.Context, r *http.Request) (int, interface{}) { 56 | var podSpec engine.PodSpec 57 | if err := form.ParamBodyJson(r, &podSpec); err != nil { 58 | return http.StatusBadRequest, fmt.Sprintf("Bad parameter format for PodSpec, %s", err) 59 | } 60 | if !podSpec.VerifyParams() { 61 | return http.StatusBadRequest, fmt.Sprintf("Missing parameters for PodSpec") 62 | } 63 | 64 | orcEngine := getEngine(ctx) 65 | if err := orcEngine.UpdateDependencyPod(podSpec); err != nil { 66 | if err == engine.ErrDependencyPodNotExists { 67 | return http.StatusNotFound, err.Error() 68 | } 69 | return http.StatusInternalServerError, err.Error() 70 | } 71 | urlReverser := getUrlReverser(ctx) 72 | return http.StatusAccepted, map[string]string{ 73 | "message": "Dependency PodSpec would be updated in orc engine.", 74 | "check_url": urlReverser.Reverse("Get_RestfulDependPods") + "?name=" + podSpec.Name, 75 | } 76 | } 77 | 78 | func (rdp RestfulDependPods) Post(ctx context.Context, r *http.Request) (int, interface{}) { 79 | var podSpec engine.PodSpec 80 | if err := form.ParamBodyJson(r, &podSpec); err != nil { 81 | log.Warnf("Failed to decode PodSpec, %s", err) 82 | return http.StatusBadRequest, fmt.Sprintf("Bad parameter format for PodSpec, %s", err) 83 | } 84 | if ok := podSpec.VerifyParams(); !ok { 85 | return http.StatusBadRequest, fmt.Sprintf("Missing paremeters for PodSpec") 86 | } 87 | 88 | orcEngine := getEngine(ctx) 89 | if err := orcEngine.NewDependencyPod(podSpec); err != nil { 90 | if err == engine.ErrDependencyPodExists { 91 | return http.StatusMethodNotAllowed, err.Error() 92 | } 93 | return http.StatusInternalServerError, err.Error() 94 | } 95 | urlReverser := getUrlReverser(ctx) 96 | return http.StatusAccepted, map[string]string{ 97 | "message": "Dependency pod will be added into orc engine.", 98 | "check_url": urlReverser.Reverse("Get_RestfulDependPods") + "?name=" + podSpec.Name, 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /utils/elector/elect.go: -------------------------------------------------------------------------------- 1 | package elector 2 | 3 | import ( 4 | "github.com/docker/libkv" 5 | "github.com/docker/libkv/store" 6 | etcdLibkv "github.com/docker/libkv/store/etcd" 7 | "github.com/mijia/sweb/log" 8 | "strings" 9 | "time" 10 | ) 11 | 12 | const ( 13 | LeaderKey = "/lain/deployd/leader" 14 | defaultLockTTL = 20 * time.Second 15 | ) 16 | 17 | type Elector struct { 18 | store store.Store 19 | key string 20 | value string 21 | ttl time.Duration 22 | leader bool 23 | } 24 | 25 | func init() { 26 | libkv.AddStore(store.ETCD, etcdLibkv.New) 27 | } 28 | 29 | func New(etcds []string, key string, value string) (*Elector, error) { 30 | for i, v := range etcds { 31 | if parts := strings.SplitN(v, "://", 2); len(parts) == 2 { 32 | etcds[i] = parts[1] 33 | } 34 | } 35 | st, err := libkv.NewStore(store.ETCD, etcds, nil) 36 | if err != nil { 37 | return nil, err 38 | } 39 | return &Elector{ 40 | store: st, 41 | key: key, 42 | value: value, 43 | leader: false, 44 | ttl: defaultLockTTL, 45 | }, nil 46 | } 47 | 48 | func (e *Elector) Run(stop chan struct{}) chan string { 49 | stopWatchCh, leaderCh := make(chan struct{}), make(chan string) 50 | 51 | go e.elect(stopWatchCh, stop) 52 | go e.watch(leaderCh, stopWatchCh) 53 | 54 | return leaderCh 55 | } 56 | 57 | func (e *Elector) IsLeader() bool { 58 | return e.leader 59 | } 60 | 61 | func (e *Elector) watch(leaderCh chan string, stop chan struct{}) { 62 | defer close(leaderCh) 63 | 64 | var ( 65 | current string = "" 66 | ch <-chan *store.KVPair 67 | err error 68 | retry int = 0 69 | ) 70 | // watch would be failed if leader key not exist 71 | // sometimes it should wait for elect() to create(lock) leader key 72 | // try 3 times, waiting for the leader key created 73 | for { 74 | ch, err = e.store.Watch(e.key, stop) 75 | if err != nil { 76 | if retry >= 3 { 77 | log.Fatalf("Fail to watch leader key[%s] for 3 times, exit", e.key) 78 | } 79 | log.Warnf("Fail to watch leader key[%s], %s, try again", 80 | e.key, err.Error()) 81 | time.Sleep(time.Second) 82 | retry += 1 83 | continue 84 | } 85 | retry = 0 86 | 87 | // libkv locker refresh ttl every ttl / 3, so will receive data each refresh time. 88 | for kv := range ch { 89 | value := string(kv.Value) 90 | log.Debugf("Get watch event, leader value changed to %s", value) 91 | if current != value && value != "" { 92 | current = value 93 | leaderCh <- value 94 | } 95 | } 96 | select { 97 | case <-stop: // real stop 98 | return 99 | default: 100 | log.Warnf("elector's watcher stoped for some unkown reason, retry") 101 | time.Sleep(time.Millisecond * 100) 102 | } 103 | } 104 | } 105 | 106 | func (e *Elector) elect(stopWatchCh chan struct{}, stop chan struct{}) { 107 | defer close(stopWatchCh) 108 | lock, err := e.store.NewLock(e.key, &store.LockOptions{[]byte(e.value), defaultLockTTL, nil}) 109 | if err != nil { 110 | log.Fatalf("Fail to create distribution locker, %s", err.Error()) 111 | } 112 | for { 113 | e.leader = false 114 | log.Debug("Try to get the lock for becoming a leader") 115 | // follower will block here waiting for lock 116 | lostCh, err := lock.Lock(nil) 117 | if err != nil { 118 | log.Errorf("Fail to lock %s:%s", e.key, err.Error()) 119 | time.Sleep(time.Second * 3) // sleep for a while to try again 120 | continue 121 | } 122 | 123 | log.Debug("Becomming a leader") 124 | // leader will block here until it stoped or others becoming leader 125 | e.leader = true 126 | select { 127 | case <-stop: 128 | // stop election 129 | log.Debug("Get a stop-signal, stop election routine") 130 | if err := lock.Unlock(); err != nil { 131 | log.Errorf("Fail to give up the leader identity, %s", err.Error()) 132 | } 133 | return 134 | case <-lostCh: 135 | // lost leader key, try to elect again 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "os/signal" 8 | "runtime" 9 | "strings" 10 | "syscall" 11 | "time" 12 | 13 | "github.com/laincloud/deployd/apiserver" 14 | "github.com/laincloud/deployd/engine" 15 | "github.com/laincloud/deployd/utils/elector" 16 | "github.com/laincloud/deployd/utils/proxy" 17 | "github.com/laincloud/deployd/version" 18 | "github.com/mijia/sweb/log" 19 | ) 20 | 21 | func main() { 22 | var webAddr, swarmAddr, etcdAddr, advertise string 23 | var isDebug, V bool 24 | var refreshInterval, dependsGCTime, maxRestartTimes, restartInfoClearInterval int 25 | 26 | flag.StringVar(&advertise, "advertise", "", "The address advertise to other peers, this will open HA mode") 27 | flag.StringVar(&webAddr, "web", ":9000", "The address which lain-deployd is listenning on") 28 | flag.StringVar(&swarmAddr, "swarm", "", "The tcp://: address that Swarm master is deployed") 29 | flag.StringVar(&etcdAddr, "etcd", "", "The etcd cluster access points, e.g. http://127.0.0.1:4001") 30 | flag.IntVar(&dependsGCTime, "dependsGCTime", 5, "The depends garbage collection time (minutes)") 31 | flag.IntVar(&refreshInterval, "refreshInterval", 90, "The refresh interval time (seconds)") 32 | flag.IntVar(&maxRestartTimes, "maxRestartTimes", 3, "The max restart times for pod") 33 | flag.IntVar(&restartInfoClearInterval, "restartInfoClearInterval", 30, "The interval to clear restart info (minutes)") 34 | flag.BoolVar(&isDebug, "debug", false, "Debug mode switch") 35 | flag.BoolVar(&V, "version", false, "Show version") 36 | flag.Parse() 37 | 38 | if V { 39 | fmt.Printf("deployd version %s\n", version.Version) 40 | fmt.Printf("Git SHA: %s\n", version.GitSHA) 41 | fmt.Printf("Go Version: %s\n", runtime.Version()) 42 | 43 | return 44 | } 45 | 46 | usage(swarmAddr != "", "Please provide the swarm master address!") 47 | usage(etcdAddr != "", "Please provide the etcd access points address!") 48 | 49 | if isDebug { 50 | log.EnableDebug() 51 | } 52 | 53 | engine.DependsGarbageCollectTimeout = time.Duration(dependsGCTime) * time.Minute 54 | engine.RefreshInterval = refreshInterval 55 | engine.RestartMaxCount = maxRestartTimes 56 | engine.RestartInfoClearInterval = time.Duration(restartInfoClearInterval) * time.Minute 57 | 58 | server := apiserver.New(swarmAddr, etcdAddr, isDebug) 59 | 60 | engine.ConfigPortsManager(etcdAddr) 61 | 62 | if advertise == "" { 63 | // no advertise, running without election 64 | go server.ListenAndServe(webAddr) 65 | } else { 66 | // running with election, make deploy service HA 67 | elec, err := elector.New(strings.Split(etcdAddr, ","), elector.LeaderKey, advertise) 68 | if err != nil { 69 | log.Fatal(err.Error()) 70 | } 71 | stop := make(chan struct{}) 72 | defer close(stop) 73 | leaderCh := elec.Run(stop) 74 | 75 | p := proxy.New(webAddr, "") 76 | 77 | // run api server 78 | go func() { 79 | for leader := range leaderCh { 80 | if leader == advertise { 81 | log.Infof("Becomming a leader, shutdown proxy server") 82 | p.Shutdown() // stop proxy 83 | log.Infof("Starting api server") 84 | go server.ListenAndServe(webAddr) // start api server 85 | } else { 86 | log.Infof("Becomming a follower, shutdown api server") 87 | server.Shutdown() // stop api server 88 | log.Infof("Starting proxy server") 89 | p.SetDest(leader) 90 | go p.Run() // start proxy 91 | } 92 | } 93 | }() 94 | } 95 | 96 | waitSignal() 97 | } 98 | 99 | func waitSignal() { 100 | ch := make(chan os.Signal) 101 | signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM) 102 | // do some ending job 103 | log.Infof("Get signal %s, exit.", <-ch) 104 | } 105 | 106 | func usage(condition bool, msg string) { 107 | if !condition { 108 | fmt.Println("lain-deployd:") 109 | fmt.Println(" " + msg) 110 | flag.Usage() 111 | os.Exit(1) 112 | } 113 | } 114 | 115 | func init() { 116 | runtime.GOMAXPROCS(runtime.NumCPU()) 117 | } 118 | -------------------------------------------------------------------------------- /utils/units/size_test.go: -------------------------------------------------------------------------------- 1 | package units 2 | 3 | import ( 4 | "reflect" 5 | "runtime" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | func TestBytesSize(t *testing.T) { 11 | assertEquals(t, "1 KiB", BytesSize(1024)) 12 | assertEquals(t, "1 MiB", BytesSize(1024*1024)) 13 | assertEquals(t, "1 MiB", BytesSize(1048576)) 14 | assertEquals(t, "2 MiB", BytesSize(2*MiB)) 15 | assertEquals(t, "3.42 GiB", BytesSize(3.42*GiB)) 16 | assertEquals(t, "5.372 TiB", BytesSize(5.372*TiB)) 17 | assertEquals(t, "2.22 PiB", BytesSize(2.22*PiB)) 18 | } 19 | 20 | func TestHumanSize(t *testing.T) { 21 | assertEquals(t, "1 kB", HumanSize(1000)) 22 | assertEquals(t, "1.024 kB", HumanSize(1024)) 23 | assertEquals(t, "1 MB", HumanSize(1000000)) 24 | assertEquals(t, "1.049 MB", HumanSize(1048576)) 25 | assertEquals(t, "2 MB", HumanSize(2*MB)) 26 | assertEquals(t, "3.42 GB", HumanSize(float64(3.42*GB))) 27 | assertEquals(t, "5.372 TB", HumanSize(float64(5.372*TB))) 28 | assertEquals(t, "2.22 PB", HumanSize(float64(2.22*PB))) 29 | } 30 | 31 | func TestFromHumanSize(t *testing.T) { 32 | assertSuccessEquals(t, 32, FromHumanSize, "32") 33 | assertSuccessEquals(t, 32, FromHumanSize, "32b") 34 | assertSuccessEquals(t, 32, FromHumanSize, "32B") 35 | assertSuccessEquals(t, 32*KB, FromHumanSize, "32k") 36 | assertSuccessEquals(t, 32*KB, FromHumanSize, "32K") 37 | assertSuccessEquals(t, 32*KB, FromHumanSize, "32kb") 38 | assertSuccessEquals(t, 32*KB, FromHumanSize, "32Kb") 39 | assertSuccessEquals(t, 32*MB, FromHumanSize, "32Mb") 40 | assertSuccessEquals(t, 32*GB, FromHumanSize, "32Gb") 41 | assertSuccessEquals(t, 32*TB, FromHumanSize, "32Tb") 42 | assertSuccessEquals(t, 32*PB, FromHumanSize, "32Pb") 43 | 44 | assertError(t, FromHumanSize, "") 45 | assertError(t, FromHumanSize, "hello") 46 | assertError(t, FromHumanSize, "-32") 47 | assertError(t, FromHumanSize, "32.3") 48 | assertError(t, FromHumanSize, " 32 ") 49 | assertError(t, FromHumanSize, "32.3Kb") 50 | assertError(t, FromHumanSize, "32 mb") 51 | assertError(t, FromHumanSize, "32m b") 52 | assertError(t, FromHumanSize, "32bm") 53 | } 54 | 55 | func TestRAMInBytes(t *testing.T) { 56 | assertSuccessEquals(t, 32, RAMInBytes, "32") 57 | assertSuccessEquals(t, 32, RAMInBytes, "32b") 58 | assertSuccessEquals(t, 32, RAMInBytes, "32B") 59 | assertSuccessEquals(t, 32*KiB, RAMInBytes, "32k") 60 | assertSuccessEquals(t, 32*KiB, RAMInBytes, "32K") 61 | assertSuccessEquals(t, 32*KiB, RAMInBytes, "32kb") 62 | assertSuccessEquals(t, 32*KiB, RAMInBytes, "32Kb") 63 | assertSuccessEquals(t, 32*MiB, RAMInBytes, "32Mb") 64 | assertSuccessEquals(t, 32*GiB, RAMInBytes, "32Gb") 65 | assertSuccessEquals(t, 32*TiB, RAMInBytes, "32Tb") 66 | assertSuccessEquals(t, 32*PiB, RAMInBytes, "32Pb") 67 | assertSuccessEquals(t, 32*PiB, RAMInBytes, "32PB") 68 | assertSuccessEquals(t, 32*PiB, RAMInBytes, "32P") 69 | 70 | assertError(t, RAMInBytes, "") 71 | assertError(t, RAMInBytes, "hello") 72 | assertError(t, RAMInBytes, "-32") 73 | assertError(t, RAMInBytes, "32.3") 74 | assertError(t, RAMInBytes, " 32 ") 75 | assertError(t, RAMInBytes, "32.3Kb") 76 | assertError(t, RAMInBytes, "32 mb") 77 | assertError(t, RAMInBytes, "32m b") 78 | assertError(t, RAMInBytes, "32bm") 79 | } 80 | 81 | func assertEquals(t *testing.T, expected, actual interface{}) { 82 | if expected != actual { 83 | t.Errorf("Expected '%v' but got '%v'", expected, actual) 84 | } 85 | } 86 | 87 | // func that maps to the parse function signatures as testing abstraction 88 | type parseFn func(string) (int64, error) 89 | 90 | // Define 'String()' for pretty-print 91 | func (fn parseFn) String() string { 92 | fnName := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name() 93 | return fnName[strings.LastIndex(fnName, ".")+1:] 94 | } 95 | 96 | func assertSuccessEquals(t *testing.T, expected int64, fn parseFn, arg string) { 97 | res, err := fn(arg) 98 | if err != nil || res != expected { 99 | t.Errorf("%s(\"%s\") -> expected '%d' but got '%d' with error '%v'", fn, arg, expected, res, err) 100 | } 101 | } 102 | 103 | func assertError(t *testing.T, fn parseFn, arg string) { 104 | res, err := fn(arg) 105 | if err == nil && res != -1 { 106 | t.Errorf("%s(\"%s\") -> expected error but got '%d'", fn, arg, res) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /engine/events.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "strings" 5 | "sync" 6 | "sync/atomic" 7 | 8 | "github.com/laincloud/deployd/utils/util" 9 | "github.com/mijia/adoc" 10 | "github.com/mijia/sweb/log" 11 | ) 12 | 13 | type Listener interface { 14 | ListenerId() string 15 | HandleEvent(payload interface{}) 16 | } 17 | 18 | type Publisher interface { 19 | EmitEvent(payload interface{}) 20 | AddListener(subscriber Listener) 21 | RemoveListener(subscriber Listener) 22 | } 23 | 24 | type _BasePublisher struct { 25 | sync.RWMutex 26 | goRoutine bool 27 | listeners map[string]Listener 28 | } 29 | 30 | func NewPublisher(goRoutine bool) Publisher { 31 | return &_BasePublisher{ 32 | goRoutine: goRoutine, 33 | listeners: make(map[string]Listener), 34 | } 35 | } 36 | 37 | func (pub *_BasePublisher) EmitEvent(payload interface{}) { 38 | pub.RLock() 39 | listeners := make([]Listener, 0, len(pub.listeners)) 40 | for _, listener := range pub.listeners { 41 | listeners = append(listeners, listener) 42 | } 43 | pub.RUnlock() 44 | 45 | emitFn := func() { 46 | for _, listener := range listeners { 47 | listener.HandleEvent(payload) 48 | } 49 | } 50 | if pub.goRoutine { 51 | go emitFn() 52 | } else { 53 | emitFn() 54 | } 55 | } 56 | 57 | func (pub *_BasePublisher) AddListener(listener Listener) { 58 | pub.Lock() 59 | defer pub.Unlock() 60 | pub.listeners[listener.ListenerId()] = listener 61 | } 62 | 63 | func (pub *_BasePublisher) RemoveListener(listener Listener) { 64 | pub.Lock() 65 | defer pub.Unlock() 66 | delete(pub.listeners, listener.ListenerId()) 67 | } 68 | 69 | //*************************container events ****************************// 70 | func handleDieEvent(engine *OrcEngine, event *adoc.Event) { 71 | actor := event.Actor 72 | if name, ok := actor.Attributes["name"]; ok { 73 | if pgname, _, instance, _, err := util.ParseContainerName(name); err == nil { 74 | engine.RLock() 75 | pgCtrl, ok := engine.pgCtrls[pgname] 76 | engine.RUnlock() 77 | if !ok { 78 | return 79 | } 80 | pgCtrl.RLock() 81 | spec := pgCtrl.spec.Clone() 82 | pgCtrl.RUnlock() 83 | if atomic.LoadInt32((*int32)(&pgCtrl.opState)) != PGOpStateUpgrading { 84 | log.Warnf("got %s event from %s, refresh this instance", event.Status, name) 85 | pgCtrl.opsChan <- pgOperRefreshInstance{instance, spec} 86 | } 87 | } 88 | } 89 | } 90 | 91 | func handleContainerEvent(engine *OrcEngine, event *adoc.Event) { 92 | if strings.HasPrefix(event.Status, "health_status") { 93 | id := event.ID 94 | if cont, err := engine.cluster.InspectContainer(id); err == nil { 95 | status := HealthState(HealthStateNone) 96 | switch event.Status { 97 | case "health_status: starting": 98 | status = HealthStateStarting 99 | break 100 | case "health_status: healthy": 101 | status = HealthStateHealthy 102 | break 103 | case "health_status: unhealthy": 104 | status = HealthStateUnHealthy 105 | break 106 | } 107 | containerName := strings.TrimLeft(cont.Name, "/") 108 | if podName, instance, err := util.ParseNameInstanceNo(containerName); err == nil { 109 | pgCtrl, ok := engine.pgCtrls[podName] 110 | if ok { 111 | pgCtrl.Lock() 112 | if len(pgCtrl.podCtrls) >= instance { 113 | podCtrl := pgCtrl.podCtrls[instance-1] 114 | podCtrl.pod.Healthst = status 115 | if status == HealthStateHealthy { 116 | podCtrl.launchEvent(struct{}{}) 117 | } 118 | pgCtrl.opsChan <- pgOperSnapshotGroup{true} 119 | pgCtrl.opsChan <- pgOperSaveStore{true} 120 | } 121 | pgCtrl.Unlock() 122 | } 123 | } 124 | } else { 125 | log.Errorf("ParseNameInstanceNo error:%v", err) 126 | } 127 | } else { 128 | switch event.Status { 129 | case adoc.DockerEventStop: 130 | savePodStaHstry(engine, event) 131 | case adoc.DockerEventStart: 132 | savePodStaHstry(engine, event) 133 | case adoc.DockerEventDie: 134 | // operations like OOM, Stop, Kill all emit Die Event. 135 | // so we can just handle Die event and skip OOM, Stop and Kill event 136 | handleDieEvent(engine, event) 137 | } 138 | } 139 | } 140 | 141 | func HandleDockerEvent(engine *OrcEngine, event *adoc.Event) { 142 | switch event.Type { 143 | case adoc.ContainerEventType: 144 | handleContainerEvent(engine, event) 145 | break 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /storage/etcd/store.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "encoding/json" 5 | "hash/fnv" 6 | "strings" 7 | "sync" 8 | "time" 9 | 10 | "github.com/coreos/etcd/client" 11 | "github.com/laincloud/deployd/storage" 12 | "golang.org/x/net/context" 13 | ) 14 | 15 | type EtcdStore struct { 16 | keysApi client.KeysAPI 17 | ctx context.Context 18 | 19 | sync.RWMutex 20 | keyHashes map[string]uint64 21 | } 22 | 23 | func (store *EtcdStore) GetRaw(key string) (string, error) { 24 | resp, err := store.keysApi.Get(store.ctx, key, &client.GetOptions{Quorum: true}) 25 | if err != nil { 26 | if cerr, ok := err.(client.Error); ok && cerr.Code == client.ErrorCodeKeyNotFound { 27 | return "", storage.KMissingError 28 | } 29 | return "", err 30 | } 31 | if resp.Node == nil { 32 | return "", storage.KNilNodeError 33 | } 34 | if resp.Node.Dir { 35 | return "", storage.KDirNodeError 36 | } 37 | return resp.Node.Value, nil 38 | } 39 | 40 | func (store *EtcdStore) Get(key string, v interface{}) error { 41 | value, err := store.GetRaw(key) 42 | if err != nil { 43 | return err 44 | } 45 | if err := json.Unmarshal([]byte(value), v); err != nil { 46 | return err 47 | } 48 | return nil 49 | } 50 | 51 | func (store *EtcdStore) Watch(key string) chan string { 52 | resp := make(chan string) 53 | errSleepTime := 10 * time.Second 54 | go func() { 55 | for { 56 | wather := store.keysApi.Watcher(key, &client.WatcherOptions{Recursive: true}) 57 | for { 58 | if response, err := wather.Next(store.ctx); err == nil { 59 | if response.Node == nil || response.Node.Dir { 60 | continue 61 | } 62 | resp <- response.Node.Value 63 | } 64 | } 65 | time.Sleep(errSleepTime) 66 | } 67 | }() 68 | return resp 69 | } 70 | 71 | func (store *EtcdStore) KeysByPrefix(prefix string) ([]string, error) { 72 | // Prefix should corresponding to a directory name, and will return all the nodes inside the directory 73 | keys := make([]string, 0) 74 | if resp, err := store.keysApi.Get(store.ctx, prefix, &client.GetOptions{Quorum: true}); err != nil { 75 | if cerr, ok := err.(client.Error); ok && cerr.Code == client.ErrorCodeKeyNotFound { 76 | return keys, storage.KMissingError 77 | } 78 | return keys, err 79 | } else { 80 | if resp.Node == nil { 81 | return keys, storage.KNilNodeError 82 | } 83 | if !resp.Node.Dir { 84 | return keys, storage.KNonDirNodeError 85 | } 86 | for _, node := range resp.Node.Nodes { 87 | if node != nil { 88 | keys = append(keys, node.Key) 89 | } 90 | } 91 | } 92 | return keys, nil 93 | } 94 | 95 | func (store *EtcdStore) Set(key string, v interface{}, force ...bool) error { 96 | return store.SetWithTTL(key, v, -1, force...) 97 | } 98 | 99 | func (store *EtcdStore) SetWithTTL(key string, v interface{}, ttlSec int, force ...bool) error { 100 | if data, err := json.Marshal(v); err != nil { 101 | return err 102 | } else { 103 | h := fnv.New64a() 104 | h.Write(data) 105 | dataHash := h.Sum64() 106 | forceSave := false 107 | if len(force) > 0 { 108 | forceSave = force[0] 109 | } 110 | 111 | store.Lock() 112 | defer store.Unlock() 113 | if !forceSave { 114 | if lastHash, ok := store.keyHashes[key]; ok && lastHash == dataHash { 115 | return nil 116 | } 117 | } 118 | var setOpts *client.SetOptions 119 | if ttlSec > 0 { 120 | setOpts = &client.SetOptions{TTL: time.Duration(ttlSec) * time.Second} 121 | } 122 | _, err := store.keysApi.Set(store.ctx, key, string(data), setOpts) 123 | if err == nil { 124 | store.keyHashes[key] = dataHash 125 | } 126 | return err 127 | } 128 | } 129 | 130 | func (store *EtcdStore) Remove(key string) error { 131 | _, err := store.keysApi.Delete(store.ctx, key, nil) 132 | if err == nil { 133 | store.Lock() 134 | delete(store.keyHashes, key) 135 | store.Unlock() 136 | } 137 | return err 138 | } 139 | 140 | func (store *EtcdStore) RemoveDir(key string) error { 141 | err := store.deleteDir(key, true) 142 | return err 143 | } 144 | 145 | func (store *EtcdStore) TryRemoveDir(key string) { 146 | store.deleteDir(key, false) 147 | } 148 | 149 | func (store *EtcdStore) deleteDir(key string, recursive bool) error { 150 | opts := client.DeleteOptions{ 151 | Recursive: recursive, 152 | Dir: true, 153 | } 154 | _, err := store.keysApi.Delete(store.ctx, key, &opts) 155 | return err 156 | } 157 | 158 | func NewStore(addr string, isDebug bool) (storage.Store, error) { 159 | c, err := client.New(client.Config{ 160 | Endpoints: strings.Split(addr, ","), 161 | }) 162 | if err != nil { 163 | return nil, err 164 | } 165 | if false && isDebug { 166 | client.EnablecURLDebug() 167 | } 168 | s := &EtcdStore{ 169 | keysApi: client.NewKeysAPI(c), 170 | ctx: context.Background(), 171 | keyHashes: make(map[string]uint64), 172 | } 173 | return s, nil 174 | } 175 | -------------------------------------------------------------------------------- /fixtures/requests.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "net/http" 10 | "runtime" 11 | ) 12 | 13 | var apiServer string 14 | 15 | func main() { 16 | var command string 17 | flag.StringVar(&apiServer, "api", "http://localhost:9000", "Deployd api server address") 18 | flag.StringVar(&command, "cmd", "create", "Command to test the server") 19 | flag.Parse() 20 | 21 | procName := "hello/procs/web/foo" 22 | 23 | switch command { 24 | case "create": 25 | createProc(procName) 26 | case "rm": 27 | removeProc(procName) 28 | case "get": 29 | getProc(procName) 30 | case "ctrl": 31 | controlProc(procName) 32 | case "r1": 33 | rescheduleProc(procName) 34 | case "r2": 35 | rescheduleProc2(procName) 36 | case "clean": 37 | cleanProc(procName) 38 | default: 39 | fmt.Println("Unknown command", command) 40 | } 41 | } 42 | 43 | func rescheduleProc2(procName string) { 44 | p := map[string]interface{}{ 45 | "num_instances": 1, 46 | } 47 | body, err := json.Marshal(p) 48 | if err != nil { 49 | panic(err) 50 | } 51 | data, err := sendRequest("PATCH", "api/apps/"+procName+"?command=reschedule", body, nil) 52 | if err != nil { 53 | panic(err) 54 | } 55 | fmt.Println("Instance Schedule Example") 56 | fmt.Println(string(data)) 57 | } 58 | 59 | func rescheduleProc(procName string) { 60 | p := map[string]interface{}{ 61 | "cpu": 0, 62 | "num_instances": 2, 63 | "memory": "25m", 64 | "env": []string{"DEBUG=true"}, 65 | } 66 | body, err := json.Marshal(p) 67 | if err != nil { 68 | panic(err) 69 | } 70 | data, err := sendRequest("PATCH", "api/apps/"+procName+"?command=reschedule", body, nil) 71 | if err != nil { 72 | panic(err) 73 | } 74 | fmt.Println("Full Schedule Example") 75 | fmt.Println(string(data)) 76 | } 77 | 78 | func cleanProc(procName string) { 79 | data, err := sendRequest("PATCH", "api/apps/"+procName+"?command=clean", nil, nil) 80 | if err != nil { 81 | panic(err) 82 | } 83 | fmt.Println("clean") 84 | fmt.Println(string(data)) 85 | } 86 | 87 | func controlProc(procName string) { 88 | ctrlRequest := func(command string) { 89 | fmt.Println("Running patch command", command) 90 | data, err := sendRequest("PATCH", "api/apps/"+procName+"?command="+command, nil, nil) 91 | if err != nil { 92 | panic(err) 93 | } 94 | fmt.Println(string(data)) 95 | } 96 | ctrlRequest("stop") 97 | ctrlRequest("start") 98 | ctrlRequest("restart") 99 | } 100 | 101 | func getProc(procName string) { 102 | data, err := sendRequest("GET", "api/apps/"+procName+"?force_update=true", nil, nil) 103 | if err != nil { 104 | panic(err) 105 | } 106 | fmt.Println(string(data)) 107 | } 108 | 109 | func removeProc(procName string) { 110 | _, err := sendRequest("DELETE", "api/apps/"+procName+"?force=true", nil, nil) 111 | if err != nil { 112 | panic(err) 113 | } 114 | } 115 | 116 | func createProc(procName string) { 117 | var p struct { 118 | Image string `json:"image"` 119 | Env []string `json:"env"` 120 | User string `json:"user"` 121 | WorkingDir string `json:"working_dir"` 122 | Volumes []string `jsone"volumes"` 123 | Command []string `json:"command"` 124 | NumInstances int `json:"num_instances"` 125 | CpuLimit int `json:"cpu"` 126 | MemoryLimit string `json:"memory"` 127 | Expose int `json:"expose"` 128 | } 129 | p.Image = "training/webapp" 130 | p.Command = []string{"python", "app.py"} 131 | p.Expose = 5000 132 | p.NumInstances = 1 133 | p.CpuLimit = 1 134 | p.MemoryLimit = "10m" 135 | p.Volumes = []string{"tmp"} 136 | 137 | body, err := json.Marshal(p) 138 | if err != nil { 139 | panic(err) 140 | } 141 | data, err := sendRequest("POST", "api/apps/"+procName, body, nil) 142 | if err != nil { 143 | panic(err) 144 | } 145 | fmt.Println(string(data)) 146 | } 147 | 148 | func sendRequest(method string, path string, body []byte, headers map[string]string) ([]byte, error) { 149 | b := bytes.NewBuffer(body) 150 | urlPath := fmt.Sprintf("%s/%s", apiServer, path) 151 | fmt.Printf("SendRequest %q, [%s]\n", method, urlPath) 152 | req, err := http.NewRequest(method, urlPath, b) 153 | if err != nil { 154 | return nil, err 155 | } 156 | req.Header.Add("Content-Type", "application/json") 157 | if headers != nil { 158 | for key, value := range headers { 159 | req.Header.Add(key, value) 160 | } 161 | } 162 | resp, err := httpClient.Do(req) 163 | if err != nil { 164 | return nil, err 165 | } 166 | if resp.StatusCode >= 400 { 167 | return nil, fmt.Errorf("Response error: %d - %s", resp.StatusCode, resp.Status) 168 | } 169 | 170 | defer resp.Body.Close() 171 | data, err := ioutil.ReadAll(resp.Body) 172 | return data, err 173 | } 174 | 175 | var httpClient *http.Client 176 | 177 | func init() { 178 | runtime.GOMAXPROCS(runtime.NumCPU()) 179 | httpClient = &http.Client{} 180 | } 181 | -------------------------------------------------------------------------------- /fixtures/podgroup.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "net/http" 10 | "net/url" 11 | "runtime" 12 | 13 | "github.com/laincloud/deployd/engine" 14 | ) 15 | 16 | var apiServer string 17 | 18 | func main() { 19 | var command string 20 | flag.StringVar(&apiServer, "api", "http://localhost:9000", "Deployd api server address") 21 | flag.StringVar(&command, "cmd", "create", "Command to test the server") 22 | flag.Parse() 23 | 24 | pgName := "hello.proc.web.foo" 25 | depName := "rpcserver.proc.portal" 26 | 27 | switch command { 28 | case "create": 29 | create(pgName) 30 | case "get": 31 | inspect(pgName) 32 | case "rm": 33 | remove(pgName) 34 | case "ri": 35 | patchInstance(pgName) 36 | case "rs": 37 | patchSpec(pgName) 38 | case "dcreate": 39 | createDependency(depName) 40 | case "dget": 41 | getDependency(depName) 42 | case "drm": 43 | removeDependency(depName) 44 | case "dup": 45 | updateDependency(depName) 46 | default: 47 | fmt.Println("Unknown command", command) 48 | } 49 | } 50 | 51 | func removeDependency(depName string) { 52 | data, err := sendRequest("DELETE", "api/depends?name="+depName, nil, nil) 53 | if err != nil { 54 | panic(err) 55 | } 56 | fmt.Println(string(data)) 57 | } 58 | 59 | func getDependency(depName string) { 60 | data, err := sendRequest("GET", "api/depends?name="+depName, nil, nil) 61 | if err != nil { 62 | panic(err) 63 | } 64 | fmt.Println(string(data)) 65 | } 66 | 67 | func updateDependency(depName string) { 68 | containerSpec := getContainerSpec() 69 | podSpec := engine.NewPodSpec(containerSpec) 70 | podSpec.Name = "rpcserver.proc.portal" 71 | podSpec.Namespace = "rpcserver" 72 | body, err := json.Marshal(podSpec) 73 | if err != nil { 74 | panic(err) 75 | } 76 | data, err := sendRequest("PUT", "api/depends", body, nil) 77 | if err != nil { 78 | panic(err) 79 | } 80 | fmt.Println(string(data)) 81 | } 82 | 83 | func createDependency(depName string) { 84 | containerSpec := getContainerSpec() 85 | podSpec := engine.NewPodSpec(containerSpec) 86 | podSpec.Name = "rpcserver.proc.portal" 87 | podSpec.Namespace = "rpcserver" 88 | body, err := json.Marshal(podSpec) 89 | if err != nil { 90 | panic(err) 91 | } 92 | data, err := sendRequest("POST", "api/depends", body, nil) 93 | if err != nil { 94 | panic(err) 95 | } 96 | fmt.Println(string(data)) 97 | } 98 | 99 | func remove(pgName string) { 100 | data, err := sendRequest("DELETE", "api/podgroups?name="+pgName, nil, nil) 101 | if err != nil { 102 | panic(err) 103 | } 104 | fmt.Println(string(data)) 105 | } 106 | 107 | func inspect(pgName string) { 108 | data, err := sendRequest("GET", "api/podgroups?name="+pgName+"&force_update=false", nil, nil) 109 | if err != nil { 110 | panic(err) 111 | } 112 | fmt.Println(string(data)) 113 | } 114 | 115 | func patchInstance(pgName string) { 116 | v := url.Values{} 117 | v.Set("name", pgName) 118 | v.Set("cmd", "replica") 119 | v.Set("num_instances", "2") 120 | data, err := sendRequest("PATCH", "api/podgroups?"+v.Encode(), nil, nil) 121 | if err != nil { 122 | panic(err) 123 | } 124 | fmt.Println(string(data)) 125 | } 126 | 127 | func patchSpec(pgName string) { 128 | containerSpec := getContainerSpec() 129 | containerSpec.MemoryLimit = 25 * 1024 * 1024 130 | podSpec := engine.NewPodSpec(containerSpec) 131 | podSpec.Name = pgName 132 | podSpec.Namespace = "hello" 133 | 134 | body, err := json.Marshal(podSpec) 135 | if err != nil { 136 | panic(err) 137 | } 138 | v := url.Values{} 139 | v.Set("name", pgName) 140 | v.Set("cmd", "spec") 141 | data, err := sendRequest("PATCH", "api/podgroups?"+v.Encode(), body, nil) 142 | if err != nil { 143 | panic(err) 144 | } 145 | fmt.Println(string(data)) 146 | } 147 | 148 | func create(pgName string) { 149 | containerSpec := getContainerSpec() 150 | podSpec := engine.NewPodSpec(containerSpec) 151 | //podSpec.Dependencies = []engine.Dependency{ 152 | //engine.Dependency{ 153 | //PodName: "rpcserver.proc.portal", 154 | //}, 155 | //} 156 | pgSpec := engine.NewPodGroupSpec(pgName, "hello", podSpec, 2) 157 | 158 | body, err := json.Marshal(pgSpec) 159 | if err != nil { 160 | panic(err) 161 | } 162 | data, err := sendRequest("POST", "api/podgroups", body, nil) 163 | if err != nil { 164 | panic(err) 165 | } 166 | fmt.Println(string(data)) 167 | } 168 | 169 | func getContainerSpec() engine.ContainerSpec { 170 | containerSpec := engine.NewContainerSpec("busybox") 171 | containerSpec.Entrypoint = []string{"/bin/sh", "-c", "while true; do echo Hello world; sleep 1; done"} 172 | containerSpec.MemoryLimit = 15 * 1024 * 1024 173 | containerSpec.Expose = 5000 174 | return containerSpec 175 | } 176 | 177 | func sendRequest(method string, path string, body []byte, headers map[string]string) ([]byte, error) { 178 | b := bytes.NewBuffer(body) 179 | urlPath := fmt.Sprintf("%s/%s", apiServer, path) 180 | fmt.Printf("SendRequest %q, [%s]\n", method, urlPath) 181 | req, err := http.NewRequest(method, urlPath, b) 182 | if err != nil { 183 | return nil, err 184 | } 185 | req.Header.Add("Content-Type", "application/json") 186 | if headers != nil { 187 | for key, value := range headers { 188 | req.Header.Add(key, value) 189 | } 190 | } 191 | resp, err := httpClient.Do(req) 192 | if err != nil { 193 | return nil, err 194 | } 195 | defer resp.Body.Close() 196 | data, err := ioutil.ReadAll(resp.Body) 197 | if resp.StatusCode >= 400 { 198 | fmt.Printf("Response error: %d - %s\n", resp.StatusCode, resp.Status) 199 | } 200 | return data, err 201 | } 202 | 203 | var httpClient *http.Client 204 | 205 | func init() { 206 | runtime.GOMAXPROCS(runtime.NumCPU()) 207 | httpClient = &http.Client{} 208 | } 209 | -------------------------------------------------------------------------------- /apiserver/podgroup.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/laincloud/deployd/engine" 8 | "github.com/mijia/sweb/form" 9 | "github.com/mijia/sweb/log" 10 | "github.com/mijia/sweb/server" 11 | "golang.org/x/net/context" 12 | ) 13 | 14 | type RestfulPodGroups struct { 15 | server.BaseResource 16 | } 17 | 18 | func (rpg RestfulPodGroups) Post(ctx context.Context, r *http.Request) (int, interface{}) { 19 | var pgSpec engine.PodGroupSpec 20 | if err := form.ParamBodyJson(r, &pgSpec); err != nil { 21 | log.Warnf("Failed to decode PodGroupSpec, %s", err) 22 | return http.StatusBadRequest, fmt.Sprintf("Invalid PodGroupSpec params format: %s", err) 23 | } 24 | if ok := pgSpec.VerifyParams(); !ok { 25 | return http.StatusBadRequest, fmt.Sprintf("Missing paremeters for PodGroupSpec") 26 | } 27 | 28 | orcEngine := getEngine(ctx) 29 | if err := orcEngine.NewPodGroup(pgSpec); err != nil { 30 | switch err { 31 | case engine.ErrNotEnoughResources, engine.ErrPodGroupExists, engine.ErrDependencyPodNotExists: 32 | return http.StatusMethodNotAllowed, err.Error() 33 | default: 34 | return http.StatusInternalServerError, err.Error() 35 | } 36 | } 37 | 38 | urlReverser := getUrlReverser(ctx) 39 | return http.StatusAccepted, map[string]string{ 40 | "message": "PodGroupSpec added into the orc engine.", 41 | "check_url": urlReverser.Reverse("Get_RestfulPodGroups") + "?name=" + pgSpec.Name, 42 | } 43 | } 44 | 45 | func (rpg RestfulPodGroups) Delete(ctx context.Context, r *http.Request) (int, interface{}) { 46 | pgName := form.ParamString(r, "name", "") 47 | if pgName == "" { 48 | return http.StatusBadRequest, fmt.Sprintf("No pod group name provided.") 49 | } 50 | orcEngine := getEngine(ctx) 51 | if err := orcEngine.RemovePodGroup(pgName); err != nil { 52 | if err == engine.ErrPodGroupNotExists { 53 | return http.StatusNotFound, err.Error() 54 | } 55 | if _, ok := err.(engine.OperLockedError); ok { 56 | return http.StatusLocked, err.Error() 57 | } 58 | return http.StatusInternalServerError, err.Error() 59 | } 60 | 61 | urlReverser := getUrlReverser(ctx) 62 | return http.StatusAccepted, map[string]string{ 63 | "message": "PodGroupSpec will be deleted from the orc engine.", 64 | "check_url": urlReverser.Reverse("Get_RestfulPodGroups") + "?name=" + pgName, 65 | } 66 | } 67 | 68 | func (rpg RestfulPodGroups) Get(ctx context.Context, r *http.Request) (int, interface{}) { 69 | pgName := form.ParamString(r, "name", "") 70 | if pgName == "" { 71 | return http.StatusBadRequest, fmt.Sprintf("No pod group name provided.") 72 | } 73 | forceUpdate := form.ParamBoolean(r, "force_update", false) 74 | 75 | orcEngine := getEngine(ctx) 76 | if forceUpdate { 77 | if err := orcEngine.RefreshPodGroup(pgName, forceUpdate); err != nil { 78 | if err == engine.ErrPodGroupNotExists { 79 | return http.StatusNotFound, err.Error() 80 | } 81 | return http.StatusInternalServerError, err.Error() 82 | } 83 | } 84 | podGroup, ok := orcEngine.InspectPodGroup(pgName) 85 | if !ok { 86 | return http.StatusNotFound, fmt.Sprintf("No such pod group name=%s", pgName) 87 | } 88 | return http.StatusOK, podGroup 89 | } 90 | 91 | func (rpg RestfulPodGroups) Patch(ctx context.Context, r *http.Request) (int, interface{}) { 92 | pgName := form.ParamString(r, "name", "") 93 | if pgName == "" { 94 | return http.StatusBadRequest, fmt.Sprintf("No pod group name provided.") 95 | } 96 | 97 | orcEngine := getEngine(ctx) 98 | options := []string{"replica", "spec", "operation"} 99 | cmd := form.ParamStringOptions(r, "cmd", options, "noop") 100 | var err error 101 | switch cmd { 102 | case "replica": 103 | numInstance := form.ParamInt(r, "num_instances", -1) 104 | restartOption := form.ParamStringOptions(r, "restart_policy", []string{"never, always, onfail"}, "na") 105 | restartPolicy := -1 106 | switch restartOption { 107 | case "never": 108 | restartPolicy = engine.RestartPolicyNever 109 | case "always": 110 | restartPolicy = engine.RestartPolicyAlways 111 | case "onfail": 112 | restartPolicy = engine.RestartPolicyOnFail 113 | } 114 | if numInstance < 0 { 115 | return http.StatusBadRequest, fmt.Sprintf("Bad parameter for num_instances, should be > 0 but %d", numInstance) 116 | } 117 | if restartPolicy != -1 { 118 | err = orcEngine.RescheduleInstance(pgName, numInstance, engine.RestartPolicy(restartPolicy)) 119 | } else { 120 | err = orcEngine.RescheduleInstance(pgName, numInstance) 121 | } 122 | case "spec": 123 | var podSpec engine.PodSpec 124 | if bodyErr := form.ParamBodyJson(r, &podSpec); bodyErr != nil { 125 | return http.StatusBadRequest, fmt.Sprintf("Bad parameter format for PodSpec, %s", bodyErr) 126 | } 127 | if !podSpec.VerifyParams() { 128 | return http.StatusBadRequest, fmt.Sprintf("Missing parameter for PodSpec") 129 | } 130 | err = orcEngine.RescheduleSpec(pgName, podSpec) 131 | case "operation": 132 | instance := form.ParamInt(r, "instance", 0) 133 | opTypeOptions := []string{"start", "stop", "restart"} 134 | opType := form.ParamStringOptions(r, "optype", opTypeOptions, "noop") 135 | err = orcEngine.ChangeState(pgName, opType, instance) 136 | } 137 | 138 | if err != nil { 139 | if _, ok := err.(engine.OperLockedError); ok { 140 | return http.StatusLocked, err.Error() 141 | } 142 | switch err { 143 | case engine.ErrPodGroupNotExists: 144 | return http.StatusNotFound, err.Error() 145 | case engine.ErrNotEnoughResources, engine.ErrDependencyPodNotExists: 146 | return http.StatusMethodNotAllowed, err.Error() 147 | default: 148 | return http.StatusInternalServerError, err.Error() 149 | } 150 | } 151 | 152 | urlReverser := getUrlReverser(ctx) 153 | return http.StatusAccepted, map[string]string{ 154 | "message": "PodGroupSpec will be patched and rescheduled.", 155 | "check_url": urlReverser.Reverse("Get_RestfulPodGroups") + "?name=" + pgName, 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /engine/podgroup_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "testing" 7 | "time" 8 | 9 | "github.com/laincloud/deployd/cluster" 10 | "github.com/laincloud/deployd/cluster/swarm" 11 | "github.com/laincloud/deployd/storage" 12 | "github.com/laincloud/deployd/storage/etcd" 13 | ) 14 | 15 | func TestContainerSorter(t *testing.T) { 16 | containers := []*container{ 17 | &container{version: 1, instance: 1, driftCount: 1}, 18 | &container{version: 2, instance: 1, driftCount: 1}, 19 | &container{version: 1, instance: 1, driftCount: 2}, 20 | &container{version: 3, instance: 1, driftCount: 1}} 21 | By(ByVersionAndDriftCounter).Sort(containers) 22 | for _, container := range containers { 23 | fmt.Println(container) 24 | } 25 | } 26 | 27 | func TestPodGroupRefresh(t *testing.T) { 28 | etcdAddr := "http://127.0.0.1:2379" 29 | ConfigPortsManager(etcdAddr) 30 | c, store, err := initClusterAndStore() 31 | if err != nil { 32 | t.Fatalf("Cannot create the cluster and storage, %s", err) 33 | } 34 | 35 | engine, err := New(c, store) 36 | if err != nil { 37 | t.Fatalf("Cannot create the orc engine, %s", err) 38 | } 39 | 40 | namespace := "hello" 41 | name := "hello.proc.web.web" 42 | pgSpec := createPodGroupSpec(namespace, name, 2) 43 | pgSpec.RestartPolicy = RestartPolicyAlways 44 | if err := engine.NewPodGroup(pgSpec); err != nil { 45 | t.Fatalf("Should not return error, %s", err) 46 | } 47 | 48 | time.Sleep(20 * time.Second) 49 | if pg, ok := engine.InspectPodGroup(name); !ok { 50 | t.Errorf("We should have the pod group, but we don't get it") 51 | } else if pg.State != RunStateSuccess { 52 | t.Errorf("We should have the pod deployed and running, %#v", pg.State) 53 | } else { 54 | containerIds := pg.Pods[0].ContainerIds() 55 | for _, cId := range containerIds { 56 | c.RemoveContainer(cId, true, false) 57 | } 58 | } 59 | 60 | time.Sleep(20 * time.Second) 61 | 62 | if pg, ok := engine.InspectPodGroup(name); !ok { 63 | t.Errorf("We should have the pod group, but we don't get it") 64 | } else if pg.State != RunStateSuccess { 65 | t.Errorf("We should have the pod deployed and running, %#v", pg.State) 66 | } 67 | 68 | if err := engine.RemovePodGroup(name); err != nil { 69 | t.Errorf("We should be able to remove the pod group, %s", err) 70 | } 71 | 72 | time.Sleep(20 * time.Second) 73 | } 74 | 75 | func TestEnginePodGroup(t *testing.T) { 76 | etcdAddr := "http://127.0.0.1:2379" 77 | ConfigPortsManager(etcdAddr) 78 | c, store, err := initClusterAndStore() 79 | if err != nil { 80 | t.Fatalf("Cannot create the cluster and storage, %s", err) 81 | } 82 | 83 | engine, err := New(c, store) 84 | if err != nil { 85 | t.Fatalf("Cannot create the orc engine, %s", err) 86 | } 87 | 88 | namespace := "hello" 89 | name := "hello.proc.web.web" 90 | pgSpec := createPodGroupSpec(namespace, name, 1) 91 | if err := engine.NewPodGroup(pgSpec); err != nil { 92 | t.Fatalf("Should not return error, %s", err) 93 | } 94 | if err := engine.NewPodGroup(pgSpec); err == nil { 95 | t.Errorf("Should return exists error, but we got no problem") 96 | } 97 | 98 | time.Sleep(20 * time.Second) 99 | if pg, ok := engine.InspectPodGroup(name); !ok { 100 | t.Errorf("We should have the pod group, but we don't get it") 101 | } else if pg.State != RunStateSuccess { 102 | t.Errorf("We should have the pod deployed and running") 103 | } 104 | 105 | engine.RescheduleInstance(name, 3) 106 | time.Sleep(20 * time.Second) 107 | if pg, ok := engine.InspectPodGroup(name); !ok { 108 | t.Errorf("We should have the pod group, but we don't get it") 109 | } else if len(pg.Pods) != 3 { 110 | t.Errorf("We should have 3 instance of the pods") 111 | } 112 | 113 | engine.RescheduleInstance(name, 1) 114 | time.Sleep(30 * time.Second) 115 | if pg, ok := engine.InspectPodGroup(name); !ok { 116 | t.Errorf("We should have the pod group, but we don't get it") 117 | } else if len(pg.Pods) != 1 { 118 | bytes, err := json.Marshal(pg.Pods) 119 | pods := "" 120 | if err == nil { 121 | pods = string(bytes) 122 | } 123 | t.Errorf("We should have 1 instance of the pods : %v", pods) 124 | } 125 | 126 | podSpec := createPodSpec(namespace, name) 127 | podSpec.Containers[0].MemoryLimit = 24 * 1024 * 1024 128 | engine.RescheduleSpec(name, podSpec) 129 | time.Sleep(40 * time.Second) 130 | if pg, ok := engine.InspectPodGroup(name); !ok { 131 | t.Errorf("We should have the pod group, but we don't get it") 132 | } else if pg.Spec.Version != 2 { 133 | t.Errorf("We should have version 2 of the pods") 134 | } 135 | 136 | if err := engine.RemovePodGroup(name); err != nil { 137 | t.Errorf("We should be able to remove the pod group, %s", err) 138 | } else if err := engine.NewPodGroup(pgSpec); err == nil { 139 | t.Errorf("We should not be able to deploy pod group again in short time we remove it") 140 | } 141 | time.Sleep(20 * time.Second) 142 | } 143 | 144 | func initClusterAndStore() (cluster.Cluster, storage.Store, error) { 145 | etcdAddr := "http://127.0.0.1:2379" 146 | swarmAddr := "tcp://127.0.0.1:2376" 147 | 148 | store, err := etcd.NewStore(etcdAddr, false) 149 | if err != nil { 150 | return nil, nil, err 151 | } 152 | 153 | c, err := swarm.NewCluster(swarmAddr, 30*time.Second, 10*time.Minute) 154 | if err != nil { 155 | return nil, nil, err 156 | } 157 | 158 | return c, store, nil 159 | } 160 | 161 | func createPodGroupSpec(namespace, name string, numInstance int) PodGroupSpec { 162 | podSpec := createPodSpec(namespace, name) 163 | return NewPodGroupSpec(name, namespace, podSpec, numInstance) 164 | } 165 | 166 | func createPodSpec(namespace, name string) PodSpec { 167 | cSpec := NewContainerSpec("busybox") 168 | cSpec.Command = []string{"/bin/sh", "-c", "while true; do echo Hello world; sleep 1; done"} 169 | cSpec.MemoryLimit = 15 * 1024 * 1024 170 | cSpec.Expose = 5000 171 | podSpec := NewPodSpec(cSpec) 172 | podSpec.Name = name 173 | podSpec.Namespace = namespace 174 | podSpec.Annotation = fmt.Sprintf("{\"test\":\"Unit test for %s\"}", name) 175 | return podSpec 176 | } 177 | -------------------------------------------------------------------------------- /engine/notify.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "io/ioutil" 9 | "net/http" 10 | "strings" 11 | "sync" 12 | "time" 13 | 14 | "github.com/laincloud/deployd/storage" 15 | "github.com/mijia/sweb/log" 16 | ) 17 | 18 | var ( 19 | NotifyPodMissing = "LAIN found pod missing, ready to redeployd it" 20 | NotifyPodDown = "LAIN found pod down, ready to restart it" 21 | NotifyPodDownOOM = "LAIN found pod down with OOM, ready to restart it" 22 | NotifyLetPodGo = "LAIN found pod restart too many times in a short period, will let it go" 23 | NotifyPodIPLost = "LAIN found pod lost IP, please inform the SA team" 24 | NotifyPodUnHealthy = "LAIN found pod Unhealthy, please check your service" 25 | 26 | NotifyClusterUnHealthy = "LAIN found Cluster Manager Unhealthy, please check your cluster" 27 | NotifyClusterAbnormal = "LAIN found too many cluster nodes stoped in a short period, need stop the engine, please check your cluster" 28 | 29 | NotifyUpgradeFailedTmplt = "LAIN found Last version:%d upgrade is terrrible, please check your code carefully!!" 30 | ) 31 | 32 | type notifyController struct { 33 | sync.RWMutex 34 | 35 | callbacks map[string]string 36 | 37 | callbackChan chan NotifySpec 38 | } 39 | 40 | type NotifySpec struct { 41 | Level string 42 | Namespace string 43 | PodName string 44 | InstanceNo int 45 | Timestamp time.Time 46 | Message string 47 | } 48 | 49 | func NewNotifySpec(namespace string, podName string, instanceNo int, timestamp time.Time, message string) NotifySpec { 50 | notifySpec := NotifySpec{ 51 | Level: "Error", 52 | Namespace: namespace, 53 | PodName: podName, 54 | InstanceNo: instanceNo, 55 | Timestamp: timestamp, 56 | Message: message, 57 | } 58 | return notifySpec 59 | } 60 | 61 | func NewNotifyController(stop chan struct{}) *notifyController { 62 | nc := ¬ifyController{ 63 | callbacks: make(map[string]string), 64 | callbackChan: make(chan NotifySpec, 500), 65 | } 66 | nc.Activate(stop) 67 | return nc 68 | } 69 | 70 | func (nc *notifyController) LoadNotifies(store storage.Store) error { 71 | nc.Lock() 72 | defer nc.Unlock() 73 | callbacks := []string{} 74 | notifyKey := fmt.Sprintf("%s/%s", kLainDeploydRootKey, kLainNotifyKey) 75 | if err := store.Get(notifyKey, &callbacks); err != nil { 76 | if err != storage.KMissingError { 77 | log.Errorf("Failed to load nofities from storage, %s", err) 78 | return err 79 | } 80 | } 81 | for i := 0; i < len(callbacks); i++ { 82 | nc.callbacks[callbacks[i]] = "" 83 | } 84 | return nil 85 | } 86 | 87 | func (nc *notifyController) GetAllNotifies() map[string]string { 88 | nc.RLock() 89 | defer nc.RUnlock() 90 | return nc.callbacks 91 | } 92 | 93 | func (nc *notifyController) AddNotify(callback string, store storage.Store) error { 94 | nc.Lock() 95 | defer nc.Unlock() 96 | notifyKey := fmt.Sprintf("%s/%s", kLainDeploydRootKey, kLainNotifyKey) 97 | notifyMap := make(map[string]string) 98 | for k, v := range nc.callbacks { 99 | notifyMap[k] = v 100 | } 101 | notifyMap[callback] = "" 102 | log.Infof("ready to set Notify val %s", notifyMap) 103 | if err := store.Set(notifyKey, nc.CallbackList(notifyMap)); err != nil { 104 | log.Warnf("Failed to set Notify val %s, %s", callback, err) 105 | return err 106 | } else { 107 | log.Infof("Success set Notify val %s", callback) 108 | } 109 | nc.callbacks[callback] = "" 110 | return nil 111 | } 112 | 113 | func (nc *notifyController) RemoveNotify(callback string, store storage.Store) error { 114 | nc.Lock() 115 | defer nc.Unlock() 116 | NotifyKey := fmt.Sprintf("%s/%s", kLainDeploydRootKey, kLainNotifyKey) 117 | notifyMap := make(map[string]string) 118 | for k, v := range nc.callbacks { 119 | notifyMap[k] = v 120 | } 121 | delete(notifyMap, callback) 122 | if err := store.Set(NotifyKey, nc.CallbackList(notifyMap)); err != nil { 123 | log.Warnf("Failed to remove Notify %s, %s", callback, err) 124 | return err 125 | } else { 126 | log.Infof("Success remove Notify %s", callback) 127 | } 128 | delete(nc.callbacks, callback) 129 | return nil 130 | } 131 | 132 | func (nc *notifyController) CallbackList(callbackMap map[string]string) []string { 133 | notifyValue := []string{} 134 | for callback, _ := range callbackMap { 135 | notifyValue = append(notifyValue, callback) 136 | } 137 | return notifyValue 138 | } 139 | 140 | func (nc *notifyController) Send(notifySpec NotifySpec) { 141 | nc.callbackChan <- notifySpec 142 | } 143 | 144 | func (nc *notifyController) Notify(notifySpec NotifySpec) { 145 | nc.Lock() 146 | defer nc.Unlock() 147 | callbackList := nc.CallbackList(nc.callbacks) 148 | for i := 0; i < len(callbackList); i++ { 149 | uri := callbackList[i] 150 | if err := nc.Callback(uri, notifySpec); err != nil { 151 | log.Errorf("Fail notify spec %s to %s: %s", notifySpec, uri, err) 152 | } 153 | } 154 | } 155 | 156 | func (nc *notifyController) Callback(uri string, notifySpec NotifySpec) error { 157 | if body, err := json.Marshal(notifySpec); err != nil { 158 | return err 159 | } else { 160 | req, err := http.NewRequest("POST", uri, bytes.NewBuffer(body)) 161 | if err != nil { 162 | return err 163 | } 164 | req.Header.Add("Content-Type", "application/json") 165 | resp, err := http.DefaultClient.Do(req) 166 | if err != nil { 167 | return err 168 | } 169 | if resp.StatusCode >= 300 { 170 | log.Infof("Error response from %s: status %s", resp.StatusCode) 171 | var errMsg []byte 172 | var cbErr error 173 | defer resp.Body.Close() 174 | if errMsg, cbErr = ioutil.ReadAll(resp.Body); cbErr != nil { 175 | return cbErr 176 | } 177 | return errors.New(strings.TrimSpace(string(errMsg))) 178 | } 179 | return nil 180 | } 181 | } 182 | 183 | func (nc *notifyController) Activate(stop chan struct{}) { 184 | log.Infof("Ready listen notify request...") 185 | go func() { 186 | for { 187 | select { 188 | case notifySpec := <-nc.callbackChan: 189 | nc.Notify(notifySpec) 190 | case <-stop: 191 | if len(nc.callbackChan) == 0 { 192 | log.Infof("Stop listen notify request...") 193 | return 194 | } 195 | } 196 | } 197 | }() 198 | } 199 | -------------------------------------------------------------------------------- /engine/eagleview.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "regexp" 7 | "strconv" 8 | "strings" 9 | "sync" 10 | "time" 11 | 12 | "github.com/laincloud/deployd/cluster" 13 | "github.com/mijia/adoc" 14 | "github.com/mijia/sweb/log" 15 | ) 16 | 17 | type RuntimeEaglePod struct { 18 | ContainerLabel 19 | Container adoc.Container 20 | ParseSource string 21 | } 22 | 23 | func (pod RuntimeEaglePod) String() string { 24 | return fmt.Sprintf("", 25 | pod.Name, pod.InstanceNo, pod.Version, pod.DriftCount, pod.ContainerIndex, 26 | pod.Container.Id[:12], pod.ParseSource) 27 | } 28 | 29 | type RuntimeEagleView struct { 30 | sync.RWMutex 31 | podGroups map[string][]RuntimeEaglePod // pgname => podGroups 32 | } 33 | 34 | func (ev *RuntimeEagleView) GetRuntimeEaglePods(name string) ([]RuntimeEaglePod, bool) { 35 | ev.RLock() 36 | defer ev.RUnlock() 37 | pods, ok := ev.podGroups[name] 38 | return pods, ok 39 | } 40 | 41 | func (ev *RuntimeEagleView) Refresh(c cluster.Cluster) error { 42 | totalContainers, totalPodGroups := 0, 0 43 | start := time.Now() 44 | defer func() { 45 | log.Infof(" refreshed, podContainers=%d, podGroups=%d, duration=%s", 46 | totalContainers, totalPodGroups, time.Now().Sub(start)) 47 | }() 48 | 49 | labelFilter := []string{"com.docker.swarm.id"} 50 | filters := map[string][]string{ 51 | "label": labelFilter, 52 | } 53 | podGroups := make(map[string][]RuntimeEaglePod) 54 | err := ev.refreshCallback(c, filters, func(pod RuntimeEaglePod) { 55 | name := pod.Name 56 | podGroups[name] = append(podGroups[name], pod) 57 | totalContainers += 1 58 | }) 59 | if err == nil { 60 | ev.Lock() 61 | ev.podGroups = podGroups 62 | ev.Unlock() 63 | totalPodGroups = len(podGroups) 64 | } 65 | return err 66 | } 67 | 68 | func (ev *RuntimeEagleView) RefreshPodGroup(c cluster.Cluster, pgName string) ([]RuntimeEaglePod, error) { 69 | totalContainers := 0 70 | start := time.Now() 71 | defer func() { 72 | log.Infof(" pod group %s refreshed, podContainers=%d, duration=%s", 73 | pgName, totalContainers, time.Now().Sub(start)) 74 | }() 75 | 76 | labelFilters := []string{ 77 | fmt.Sprintf("%s.pg_name=%s", kLainLabelPrefix, pgName), 78 | } 79 | if pods, err := ev.filterByLabels(c, labelFilters); err == nil { 80 | ev.Lock() 81 | ev.podGroups[pgName] = pods 82 | ev.Unlock() 83 | totalContainers = len(pods) 84 | return pods, nil 85 | } else { 86 | log.Errorf("refresh by filter failed :%v ", err) 87 | return nil, err 88 | } 89 | 90 | } 91 | 92 | func (ev *RuntimeEagleView) RefreshPodsByNamespace(c cluster.Cluster, namespace string) ([]RuntimeEaglePod, error) { 93 | totalContainers := 0 94 | start := time.Now() 95 | defer func() { 96 | log.Infof(" pods by namespace %s refreshed, #containers=%d, duration=%s", 97 | namespace, totalContainers, time.Now().Sub(start)) 98 | }() 99 | 100 | labelFilters := []string{ 101 | fmt.Sprintf("%s.pg_namespace=%s", kLainLabelPrefix, namespace), 102 | "com.docker.swarm.id", 103 | } 104 | pods, err := ev.filterByLabels(c, labelFilters) 105 | totalContainers = len(pods) 106 | return pods, err 107 | } 108 | 109 | func (ev *RuntimeEagleView) filterByLabels(c cluster.Cluster, labelFilters []string) ([]RuntimeEaglePod, error) { 110 | filters := map[string][]string{ 111 | "label": labelFilters, 112 | } 113 | return ev.refreshByFilters(c, filters) 114 | } 115 | 116 | func (ev *RuntimeEagleView) refreshByFilters(c cluster.Cluster, filters map[string][]string) ([]RuntimeEaglePod, error) { 117 | pods := make([]RuntimeEaglePod, 0, 10) 118 | err := ev.refreshCallback(c, filters, func(pod RuntimeEaglePod) { 119 | pods = append(pods, pod) 120 | }) 121 | return pods, err 122 | } 123 | 124 | func (ev *RuntimeEagleView) refreshCallback(c cluster.Cluster, filters map[string][]string, callback func(RuntimeEaglePod)) error { 125 | filterJson, err := json.Marshal(filters) 126 | if err != nil { 127 | log.Warnf(" Failed to encode the filter json, %s", err) 128 | return err 129 | } 130 | if containers, err := c.ListContainers(true, false, string(filterJson)); err != nil { 131 | log.Warnf(" Failed to list all containers from swarm, %s", err) 132 | return err 133 | } else { 134 | for _, container := range containers { 135 | var pod *RuntimeEaglePod 136 | if _pod, ok := ev.extractFromLabel(container); ok { 137 | pod = &_pod 138 | } else if _pod, ok := ev.extractFromName(container); ok { 139 | pod = &_pod 140 | } 141 | if pod != nil { 142 | log.Debugf("Found runtime eagle pod container, %s", pod) 143 | callback(*pod) 144 | } 145 | } 146 | return nil 147 | } 148 | } 149 | 150 | func (ev *RuntimeEagleView) Activate(c cluster.Cluster) { 151 | // FIXME do nothing for now, don't know if we need to refresh this yet 152 | // or should we monitor the cluster event 153 | // go ev.startEventMonitor(c) 154 | } 155 | 156 | func (ev *RuntimeEagleView) startEventMonitor(c cluster.Cluster) { 157 | // FIXME nothing here yet 158 | } 159 | 160 | func (ev *RuntimeEagleView) extractFromLabel(container adoc.Container) (RuntimeEaglePod, bool) { 161 | var pod RuntimeEaglePod 162 | if ok := pod.ContainerLabel.FromMaps(container.Labels); !ok { 163 | return pod, false 164 | } 165 | pod.Container = container 166 | pod.ParseSource = "label" 167 | return pod, true 168 | } 169 | 170 | var ( 171 | // name is like "node1/deploy.web.web.v0-i1-d0" or "/deploy.web.web.v0-i1-d0-c0" 172 | // also we may have "node1/deploy.web.web.v0-i1-d0-lain_did_it_ddddddddd" to rename the conflict one which we should not create those anymore 173 | lainContainerNamePattern = regexp.MustCompile("\\.v([0-9]+)-i([0-9]+)-d([0-9]+)(-c([0-9]+))*$") 174 | ) 175 | 176 | func (ev *RuntimeEagleView) extractFromName(container adoc.Container) (RuntimeEaglePod, bool) { 177 | var pod RuntimeEaglePod 178 | if len(container.Names) == 0 { 179 | return pod, false 180 | } 181 | parts := strings.Split(container.Names[0], "/") 182 | name := parts[len(parts)-1] 183 | matches := lainContainerNamePattern.FindStringSubmatch(name) 184 | if len(matches) != 6 { 185 | return pod, false 186 | } 187 | var err error 188 | pod.Name = strings.TrimSuffix(name, matches[0]) 189 | hasError := pod.Name == "" 190 | pod.Version, err = strconv.Atoi(matches[1]) 191 | hasError = hasError || err != nil 192 | pod.InstanceNo, err = strconv.Atoi(matches[2]) 193 | hasError = hasError || err != nil 194 | pod.DriftCount, err = strconv.Atoi(matches[3]) 195 | hasError = hasError || err != nil 196 | if matches[5] != "" { 197 | pod.ContainerIndex, err = strconv.Atoi(matches[5]) 198 | hasError = hasError || err != nil 199 | } 200 | pod.Container = container 201 | pod.ParseSource = "name" 202 | return pod, !hasError 203 | } 204 | 205 | func NewRuntimeEagleView() *RuntimeEagleView { 206 | ev := &RuntimeEagleView{ 207 | podGroups: make(map[string][]RuntimeEaglePod), 208 | } 209 | return ev 210 | } 211 | -------------------------------------------------------------------------------- /apiserver/server.go: -------------------------------------------------------------------------------- 1 | package apiserver 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "time" 8 | 9 | "github.com/laincloud/deployd/cluster/swarm" 10 | "github.com/laincloud/deployd/engine" 11 | "github.com/laincloud/deployd/network" 12 | setcd "github.com/laincloud/deployd/storage/etcd" 13 | "github.com/mijia/sweb/log" 14 | "github.com/mijia/sweb/server" 15 | "golang.org/x/net/context" 16 | ) 17 | 18 | type UrlReverser interface { 19 | Reverse(name string, params ...interface{}) string 20 | Assets(path string) string 21 | } 22 | 23 | type Server struct { 24 | *server.Server 25 | 26 | swarmAddress string 27 | etcdAddress string 28 | isDebug bool 29 | started bool 30 | engine *engine.OrcEngine 31 | runtime *server.RuntimeWare 32 | } 33 | 34 | func (s *Server) ListenAndServe(addr string) error { 35 | orcEngine, err := initOrcEngine(s.swarmAddress, s.etcdAddress, s.isDebug) 36 | if err != nil { 37 | return err 38 | } 39 | s.engine = orcEngine 40 | 41 | // init network manager for net recover 42 | initNetwWorkMgr(s.etcdAddress) 43 | 44 | ctx := context.Background() 45 | ctx = context.WithValue(ctx, "engine", orcEngine) 46 | ctx = context.WithValue(ctx, "urlReverser", s) 47 | s.Server = server.New(ctx, s.isDebug) 48 | 49 | ignoredUrls := []string{"/debug/vars"} 50 | s.Middleware(server.NewRecoveryWare(s.isDebug)) 51 | s.Middleware(server.NewStatWare(ignoredUrls...)) 52 | if s.runtime == nil { 53 | s.runtime = server.NewRuntimeWare(ignoredUrls, true, 15*time.Minute).(*server.RuntimeWare) 54 | } 55 | s.Middleware(s.runtime) 56 | s.Middleware(NewReadOnlySwitch()) 57 | 58 | s.RestfulHandlerAdapter(s.adaptResourceHandler) 59 | s.AddRestfulResource("/api/podgroups", "RestfulPodGroups", RestfulPodGroups{}) 60 | s.AddRestfulResource("/api/depends", "RestfulDependPods", RestfulDependPods{}) 61 | s.AddRestfulResource("/api/nodes", "RestfulNodes", RestfulNodes{}) 62 | s.AddRestfulResource("/api/engine/config", "EngineConfig", EngineConfigApi{}) 63 | s.AddRestfulResource("/api/engine/maintenance", "EngineMaintenance", EngineMaintenanceApi{}) 64 | s.AddRestfulResource("/api/status", "RestfulStatus", RestfulStatus{}) 65 | s.AddRestfulResource("/api/constraints", "RestfulConstraints", RestfulConstraints{}) 66 | s.AddRestfulResource("/api/notifies", "RestfulNotifies", RestfulNotifies{}) 67 | s.AddRestfulResource("/api/ports", "RestfulPorts", RestfulPorts{}) 68 | s.AddRestfulResource("/api/guard", "RestfulGuard", RestfulGuard{}) 69 | s.AddRestfulResource("/api/cntstatushistory", "RestfulCntStatusHstry", RestfulCntStatusHstry{}) 70 | 71 | s.Get("/debug/vars", "RuntimeStat", s.getRuntimeStat) 72 | s.NotFound(func(ctx context.Context, w http.ResponseWriter, r *http.Request) context.Context { 73 | s.renderError(w, http.StatusNotFound, "Page not found", "") 74 | return ctx 75 | }) 76 | s.MethodNotAllowed(func(ctx context.Context, w http.ResponseWriter, r *http.Request) context.Context { 77 | s.renderError(w, http.StatusMethodNotAllowed, "Method is not allowed", "") 78 | return ctx 79 | }) 80 | 81 | s.started = true 82 | defer func() { s.started = false }() 83 | 84 | return s.Run(addr) 85 | } 86 | 87 | func (s *Server) getRuntimeStat(ctx context.Context, w http.ResponseWriter, r *http.Request) context.Context { 88 | http.DefaultServeMux.ServeHTTP(w, r) 89 | return ctx 90 | } 91 | 92 | func (s *Server) adaptResourceHandler(handler server.ResourceHandler) server.Handler { 93 | return func(ctx context.Context, w http.ResponseWriter, r *http.Request) context.Context { 94 | code, data := handler(ctx, r) 95 | if code < 400 { 96 | s.renderJsonOr500(w, code, data) 97 | } else { 98 | errMessage := "" 99 | if msg, ok := data.(string); ok { 100 | errMessage = msg 101 | } 102 | switch code { 103 | case http.StatusMethodNotAllowed: 104 | if errMessage == "" { 105 | errMessage = fmt.Sprintf("Method %q is not allowed", r.Method) 106 | } 107 | s.renderError(w, code, errMessage, data) 108 | case http.StatusNotFound: 109 | if errMessage == "" { 110 | errMessage = "Cannot find the resource" 111 | } 112 | s.renderError(w, code, errMessage, data) 113 | case http.StatusBadRequest: 114 | if errMessage == "" { 115 | errMessage = "Invalid request get or post params" 116 | } 117 | s.renderError(w, code, errMessage, data) 118 | default: 119 | if errMessage == "" { 120 | errMessage = fmt.Sprintf("HTTP Error Code: %d", code) 121 | } 122 | s.renderError(w, code, errMessage, data) 123 | } 124 | } 125 | return ctx 126 | } 127 | } 128 | 129 | const ( 130 | kContentCharset = "; charset=UTF-8" 131 | kContentJson = "application/json" 132 | ) 133 | 134 | func (s *Server) renderJson(w http.ResponseWriter, status int, v interface{}) error { 135 | data, err := json.MarshalIndent(v, "", " ") 136 | data = append(data, '\n') 137 | if err != nil { 138 | return err 139 | } 140 | w.Header().Set("Content-Type", kContentJson+kContentCharset) 141 | w.WriteHeader(status) 142 | if status != http.StatusNoContent { 143 | _, err = w.Write(data) 144 | } 145 | return err 146 | } 147 | 148 | func (s *Server) renderJsonOr500(w http.ResponseWriter, status int, v interface{}) { 149 | if err := s.renderJson(w, status, v); err != nil { 150 | s.renderError(w, http.StatusInternalServerError, err.Error(), "") 151 | } 152 | } 153 | 154 | func (s *Server) renderError(w http.ResponseWriter, status int, msg string, data interface{}) { 155 | apiError := ApiError{msg, data} 156 | if err := s.renderJson(w, status, apiError); err != nil { 157 | log.Errorf("Server got a json rendering error, %s", err) 158 | // we fallback to the http.Error instead return a json formatted error 159 | http.Error(w, err.Error(), http.StatusInternalServerError) 160 | } 161 | } 162 | 163 | func (s *Server) Shutdown() { 164 | if s.started { 165 | s.Stop(time.Second) 166 | } 167 | if s.engine != nil { 168 | s.engine.Stop() 169 | s.engine = nil 170 | } 171 | } 172 | 173 | type ApiError struct { 174 | Message string `json:"message"` 175 | Data interface{} `json:"data"` 176 | } 177 | 178 | func initOrcEngine(swarmAddr string, etcdAddr string, isDebug bool) (*engine.OrcEngine, error) { 179 | store, err := setcd.NewStore(etcdAddr, isDebug) 180 | if err != nil { 181 | return nil, err 182 | } 183 | 184 | cluster, err := swarm.NewCluster(swarmAddr, 10*time.Second, 20*time.Second) 185 | if err != nil { 186 | return nil, err 187 | } 188 | 189 | return engine.New(cluster, store) 190 | } 191 | 192 | func initNetwWorkMgr(endpoint string) { 193 | network.InitNetWorkManager("calico", endpoint) 194 | } 195 | 196 | func New(swarmAddr, etcdAddr string, isDebug bool) *Server { 197 | srv := &Server{ 198 | swarmAddress: swarmAddr, 199 | etcdAddress: etcdAddr, 200 | isDebug: isDebug, 201 | started: false, 202 | engine: nil, 203 | runtime: nil, 204 | } 205 | return srv 206 | } 207 | 208 | func getEngine(ctx context.Context) *engine.OrcEngine { 209 | return ctx.Value("engine").(*engine.OrcEngine) 210 | } 211 | 212 | func getUrlReverser(ctx context.Context) UrlReverser { 213 | return ctx.Value("urlReverser").(UrlReverser) 214 | } 215 | -------------------------------------------------------------------------------- /engine/runtimes.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "github.com/mijia/adoc" 5 | "github.com/mijia/sweb/log" 6 | "time" 7 | ) 8 | 9 | type RunState int 10 | type HealthState int 11 | type ExpectState int 12 | type PGOpState int32 13 | 14 | var RestartMaxCount int 15 | 16 | const ( 17 | RunStatePending = iota // waiting for operation 18 | RunStateDrift // drifting from one node to another 19 | RunStateSuccess // ok 20 | RunStateExit // exited 21 | RunStateFail // start failed with error 22 | RunStateInconsistent // container's state is different between deployd and swarm 23 | RunStateMissing // container is missing and need create it. happened when node down .etc 24 | RunStateRemoved // removed 25 | RunStatePaused // paused 26 | RunStateError // call docker interface with error 27 | ) 28 | 29 | const ( 30 | HealthStateNone = iota 31 | HealthStateStarting 32 | HealthStateHealthy 33 | HealthStateUnHealthy 34 | ) 35 | 36 | const ( 37 | ExpectStateRun = iota 38 | ExpectStateStop 39 | ) 40 | 41 | const ( 42 | PGOpStateIdle = iota 43 | PGOpStateUpgrading 44 | PGOpStateScheduling 45 | PGOpStateDrifting 46 | PGOpStateRemoving 47 | PGOpStateStarting 48 | PGOpStateStoping 49 | PGOpStateRestarting 50 | ) 51 | 52 | func (rs RunState) String() string { 53 | switch rs { 54 | case RunStatePending: 55 | return "RunStatePending" 56 | case RunStateDrift: 57 | return "RunStateDrift" 58 | case RunStateSuccess: 59 | return "RunStateSuccess" 60 | case RunStateExit: 61 | return "RunStateExit" 62 | case RunStateFail: 63 | return "RunStateFail" 64 | case RunStateMissing: 65 | return "RunStateMissing" 66 | case RunStateInconsistent: 67 | return "RunStateInconsistent" 68 | case RunStateRemoved: 69 | return "RunStateRemoved" 70 | case RunStatePaused: 71 | return "RunStatePaused" 72 | case RunStateError: 73 | return "RunStateError" 74 | default: 75 | return "Unknown RunState" 76 | } 77 | } 78 | 79 | func (hs HealthState) String() string { 80 | switch hs { 81 | case HealthStateNone: 82 | return "none" 83 | case HealthStateStarting: 84 | return "starting" 85 | case HealthStateHealthy: 86 | return "healthy" 87 | case HealthStateUnHealthy: 88 | return "unhealthy" 89 | default: 90 | return "none" 91 | } 92 | } 93 | 94 | func (es ExpectState) String() string { 95 | switch es { 96 | case ExpectStateRun: 97 | return "Run" 98 | case ExpectStateStop: 99 | return "Stop" 100 | default: 101 | return "error" 102 | } 103 | } 104 | 105 | func (pgos PGOpState) String() string { 106 | switch pgos { 107 | case PGOpStateIdle: 108 | return "Idle" 109 | case PGOpStateUpgrading: 110 | return "Upgrading" 111 | case PGOpStateScheduling: 112 | return "Scheduling" 113 | case PGOpStateDrifting: 114 | return "Drifting" 115 | case PGOpStateRemoving: 116 | return "Removing" 117 | case PGOpStateStarting: 118 | return "Starting" 119 | case PGOpStateStoping: 120 | return "Stoping" 121 | case PGOpStateRestarting: 122 | return "Restarting" 123 | default: 124 | return "error" 125 | } 126 | } 127 | 128 | type ImRuntime struct { 129 | BaseRuntime 130 | TargetState ExpectState 131 | DriftCount int 132 | RestartCount int 133 | RestartAt time.Time 134 | } 135 | 136 | type BaseRuntime struct { 137 | Healthst HealthState 138 | State RunState 139 | OOMkilled bool 140 | LastError string 141 | UpdatedAt time.Time 142 | } 143 | 144 | type Container struct { 145 | // FIXME(mijia): multiple ports supporing, will have multiple entries of 146 | Id string 147 | Runtime adoc.ContainerDetail 148 | NodeName string 149 | NodeIp string 150 | ContainerIp string 151 | NodePort int 152 | ContainerPort int 153 | Protocol string 154 | } 155 | 156 | func (c Container) Clone() Container { 157 | // So far we maybe only care about the basic information like in the Equals 158 | return c 159 | } 160 | 161 | func (c Container) Equals(o Container) bool { 162 | // The ContainerDetail from adoc change would reflect to the Pod runtime changes 163 | return c.Id == o.Id && 164 | c.NodeName == o.NodeName && 165 | c.NodeIp == o.NodeIp && 166 | c.ContainerIp == o.ContainerIp && 167 | c.NodePort == o.NodePort && 168 | c.ContainerPort == o.ContainerPort && 169 | c.Protocol == o.Protocol 170 | } 171 | 172 | type Pod struct { 173 | InstanceNo int 174 | Containers []Container 175 | ImRuntime 176 | } 177 | 178 | func (p Pod) Clone() Pod { 179 | n := p 180 | n.Containers = make([]Container, len(p.Containers)) 181 | for i := range p.Containers { 182 | n.Containers[i] = p.Containers[i].Clone() 183 | } 184 | return n 185 | } 186 | 187 | func (p Pod) Equals(o Pod) bool { 188 | if len(p.Containers) != len(o.Containers) { 189 | return false 190 | } 191 | for i := range p.Containers { 192 | if !p.Containers[i].Equals(o.Containers[i]) { 193 | return false 194 | } 195 | } 196 | return p.InstanceNo == o.InstanceNo && 197 | p.State == o.State && 198 | p.LastError == o.LastError && 199 | p.DriftCount == o.DriftCount 200 | } 201 | 202 | func (pod Pod) ContainerIds() []string { 203 | ids := make([]string, len(pod.Containers)) 204 | for i, container := range pod.Containers { 205 | ids[i] = container.Id 206 | } 207 | return ids 208 | } 209 | 210 | func (pod Pod) NeedRestart(policy RestartPolicy) bool { 211 | if pod.TargetState == ExpectStateStop { 212 | return false 213 | } 214 | state := pod.State 215 | if policy == RestartPolicyAlways { 216 | return state == RunStateExit || state == RunStateFail 217 | } 218 | if policy == RestartPolicyOnFail { 219 | return state == RunStateFail 220 | } 221 | return false 222 | } 223 | 224 | func (pod Pod) RestartEnoughTimes() bool { 225 | if len(pod.Containers) > 0 && pod.RestartAt.Add(2*RestartInfoClearInterval).Before(pod.Containers[0].Runtime.State.FinishedAt) { 226 | return false 227 | } 228 | return pod.RestartCount >= RestartMaxCount 229 | } 230 | 231 | func (pod Pod) NodeName() string { 232 | if len(pod.Containers) > 0 { 233 | return pod.Containers[0].NodeName 234 | } 235 | return "" 236 | } 237 | 238 | func (pod Pod) NodeIp() string { 239 | if len(pod.Containers) > 0 { 240 | return pod.Containers[0].NodeIp 241 | } 242 | return "" 243 | } 244 | 245 | func (pod Pod) PodIp() string { 246 | if len(pod.Containers) > 0 { 247 | return pod.Containers[0].ContainerIp 248 | } 249 | return "" 250 | } 251 | 252 | func (pod *Pod) ChangeTargetState(state ExpectState) { 253 | pod.TargetState = state 254 | log.Infof("target state:::%v", state) 255 | } 256 | 257 | type PodGroup struct { 258 | Pods []Pod 259 | BaseRuntime 260 | } 261 | 262 | func (pg PodGroup) Clone() PodGroup { 263 | n := pg 264 | n.Pods = make([]Pod, len(pg.Pods)) 265 | for i := range pg.Pods { 266 | n.Pods[i] = pg.Pods[i].Clone() 267 | } 268 | return n 269 | } 270 | 271 | func (pg PodGroup) Equals(o PodGroup) bool { 272 | if len(pg.Pods) != len(o.Pods) { 273 | return false 274 | } 275 | for i := range pg.Pods { 276 | if !pg.Pods[i].Equals(o.Pods[i]) { 277 | return false 278 | } 279 | } 280 | return pg.State == o.State && 281 | pg.LastError == o.LastError 282 | } 283 | 284 | func (group PodGroup) collectNodes() map[string]string { 285 | nodes := make(map[string]string) 286 | for _, pod := range group.Pods { 287 | name := pod.NodeName() 288 | ip := pod.NodeIp() 289 | if name != "" && ip != "" { 290 | nodes[name] = ip 291 | } 292 | } 293 | return nodes 294 | } 295 | 296 | type DependencyEvent struct { 297 | Type string // add, remove, verify 298 | Name string 299 | NodeName string 300 | Namespace string 301 | } 302 | 303 | type OperationEvent struct { 304 | Type string // start, over 305 | PgName string 306 | } 307 | -------------------------------------------------------------------------------- /engine/depends.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "sync" 7 | "time" 8 | 9 | "github.com/laincloud/deployd/cluster" 10 | "github.com/laincloud/deployd/storage" 11 | "github.com/laincloud/deployd/utils/util" 12 | ) 13 | 14 | // set GarbageCollectTimeout long enough. 15 | // sometimes, podgroup refresh goroutine may having some problem(such as swarm exception), 16 | // and do not verify it's portal for a long time 17 | var DependsGarbageCollectTimeout time.Duration 18 | 19 | type NamespacePodsWithSpec struct { 20 | Spec PodSpec 21 | Pods map[string][]Pod 22 | } 23 | 24 | type SharedPodWithSpec struct { 25 | RefCount int 26 | VerifyTime time.Time 27 | Spec PodSpec 28 | Pod Pod 29 | } 30 | 31 | type sharedPodController struct { 32 | podController 33 | refCount int 34 | verifyTime time.Time 35 | } 36 | 37 | func (podCtrl *sharedPodController) String() string { 38 | return fmt.Sprintf("SharedPodCtrl %s, refCount=%d, verify=%s", 39 | podCtrl.spec, podCtrl.refCount, podCtrl.verifyTime) 40 | } 41 | 42 | type dependsController struct { 43 | sync.RWMutex 44 | spec PodSpec 45 | podCtrls map[string]map[string]*sharedPodController // [node][namespace]podCtrl 46 | removeStatus int 47 | 48 | Publisher 49 | evSnapshot map[string]RuntimeEaglePod // id => RuntimeEaglePod 50 | opsChan chan depOperation 51 | startedAt time.Time 52 | specStoredKey string 53 | podsStoredKey string 54 | } 55 | 56 | func (depCtrl *dependsController) String() string { 57 | return fmt.Sprintf("DependsCtrl %s", depCtrl.spec) 58 | } 59 | 60 | func (depCtrl *dependsController) RemoveStatus() int { 61 | depCtrl.RLock() 62 | defer depCtrl.RUnlock() 63 | return depCtrl.removeStatus 64 | } 65 | 66 | func (depCtrl *dependsController) Inspect() NamespacePodsWithSpec { 67 | depCtrl.RLock() 68 | defer depCtrl.RUnlock() 69 | 70 | podsWithSpec := NamespacePodsWithSpec{ 71 | Spec: depCtrl.spec, 72 | Pods: make(map[string][]Pod), 73 | } 74 | for _, nsPodCtrls := range depCtrl.podCtrls { 75 | for namespace, podCtrl := range nsPodCtrls { 76 | pods, ok := podsWithSpec.Pods[namespace] 77 | if !ok { 78 | pods = make([]Pod, 0, 10) 79 | } 80 | pods = append(pods, podCtrl.pod) 81 | podsWithSpec.Pods[namespace] = pods 82 | } 83 | } 84 | return podsWithSpec 85 | } 86 | 87 | func (depCtrl *dependsController) Refresh() { 88 | depCtrl.RLock() 89 | spec := depCtrl.spec.Clone() 90 | depCtrl.RUnlock() 91 | depCtrl.opsChan <- depOperSnapshotEagleView{spec} 92 | depCtrl.opsChan <- depOperRefresh{spec} 93 | depCtrl.opsChan <- depOperStoreSavePods{spec} 94 | } 95 | 96 | func (depCtrl *dependsController) AddSpec() { 97 | depCtrl.RLock() 98 | spec := depCtrl.spec.Clone() 99 | depCtrl.RUnlock() 100 | depCtrl.opsChan <- depOperStoreSaveSpec{spec, true} 101 | } 102 | 103 | func (depCtrl *dependsController) UpdateSpec(newSpec PodSpec) { 104 | toUpdate := false 105 | var ( 106 | oldSpec PodSpec 107 | mergeSpec PodSpec 108 | ) 109 | depCtrl.Lock() 110 | if !depCtrl.spec.Equals(newSpec) { 111 | toUpdate = true 112 | oldSpec = depCtrl.spec.Clone() 113 | depCtrl.spec = depCtrl.spec.Merge(newSpec) 114 | mergeSpec = depCtrl.spec.Clone() 115 | } 116 | depCtrl.Unlock() 117 | 118 | if !toUpdate { 119 | return 120 | } 121 | depCtrl.opsChan <- depOperSnapshotEagleView{mergeSpec} 122 | depCtrl.opsChan <- depOperStoreSaveSpec{mergeSpec, true} 123 | depCtrl.opsChan <- depOperUpgrade{mergeSpec, oldSpec} 124 | depCtrl.opsChan <- depOperStoreSavePods{mergeSpec} 125 | } 126 | 127 | func (depCtrl *dependsController) RemoveSpec(force bool) { 128 | depCtrl.Lock() 129 | depCtrl.removeStatus = 0 130 | toRemove := force 131 | if !force { 132 | for _, nsPodCtrls := range depCtrl.podCtrls { 133 | for _, podCtrl := range nsPodCtrls { 134 | if podCtrl.refCount > 0 { 135 | toRemove = false 136 | break 137 | } 138 | } 139 | if !toRemove { 140 | break 141 | } 142 | } 143 | } 144 | if !toRemove { 145 | depCtrl.removeStatus = 2 146 | } 147 | spec := depCtrl.spec.Clone() 148 | depCtrl.Unlock() 149 | 150 | if !toRemove { 151 | return 152 | } 153 | depCtrl.opsChan <- depOperStoreRemove{spec} 154 | depCtrl.opsChan <- depOperRemove{spec} 155 | depCtrl.opsChan <- depOperSnapshotEagleView{spec} 156 | depCtrl.opsChan <- depOperPurge{spec} 157 | } 158 | 159 | func (depCtrl *dependsController) AddPod(namespace, nodeName string) { 160 | depCtrl.RLock() 161 | spec := depCtrl.spec.Clone() 162 | depCtrl.RUnlock() 163 | 164 | depCtrl.opsChan <- depOperSnapshotEagleView{spec} 165 | depCtrl.opsChan <- depOperDeployPod{spec, namespace, nodeName} 166 | depCtrl.opsChan <- depOperStoreSavePods{spec} 167 | } 168 | 169 | func (depCtrl *dependsController) RemovePod(namespace, nodeName string) { 170 | depCtrl.RLock() 171 | spec := depCtrl.spec.Clone() 172 | depCtrl.RUnlock() 173 | depCtrl.opsChan <- depOperRemovePod{spec, namespace, nodeName} 174 | depCtrl.opsChan <- depOperStoreSavePods{spec} 175 | } 176 | 177 | func (depCtrl *dependsController) VerifyPod(namespace, nodeName string) { 178 | depCtrl.RLock() 179 | spec := depCtrl.spec.Clone() 180 | depCtrl.RUnlock() 181 | depCtrl.opsChan <- depOperSnapshotEagleView{spec} 182 | depCtrl.opsChan <- depOperVerifyPod{spec, namespace, nodeName} 183 | depCtrl.opsChan <- depOperStoreSavePods{spec} 184 | } 185 | 186 | func (depCtrl *dependsController) Activate(c cluster.Cluster, store storage.Store, eagle *RuntimeEagleView, stop chan struct{}) { 187 | go func() { 188 | for { 189 | select { 190 | case op := <-depCtrl.opsChan: 191 | if op.Do(depCtrl, c, store, eagle) { 192 | return 193 | } 194 | case <-stop: 195 | if len(depCtrl.opsChan) == 0 { 196 | return 197 | } 198 | } 199 | } 200 | }() 201 | } 202 | 203 | func (depCtrl *dependsController) getOrAddPodCtrl(nodeName string, namespace string, spec PodSpec, pod Pod) (*sharedPodController, bool) { 204 | if _, ok := depCtrl.podCtrls[nodeName]; !ok { 205 | depCtrl.podCtrls[nodeName] = make(map[string]*sharedPodController) 206 | } 207 | if podCtrl, ok := depCtrl.podCtrls[nodeName][namespace]; ok { 208 | return podCtrl, false 209 | } else { 210 | podCtrl = &sharedPodController{ 211 | podController: podController{ 212 | spec: spec, 213 | pod: pod, 214 | }, 215 | } 216 | podCtrl.podController.spec.PrevState = NewPodPrevState(1) // new pod controller, create new empty prev state 217 | depCtrl.podCtrls[nodeName][namespace] = podCtrl 218 | return podCtrl, true 219 | } 220 | } 221 | 222 | func (depCtrl *dependsController) specifyPodSpec(spec PodSpec, nodeName, namespace string) PodSpec { 223 | newContainers := make([]ContainerSpec, 0, len(spec.Containers)) 224 | for _, container := range spec.Containers { 225 | newEnv := make([]string, 0, len(container.Env)) 226 | for _, env := range container.Env { 227 | newEnv = append(newEnv, env) 228 | } 229 | container.Env = newEnv 230 | newContainers = append(newContainers, container) 231 | } 232 | spec.Containers = newContainers 233 | spec.Namespace = spec.Name 234 | spec.Network = fmt.Sprintf("%s_%s", spec.Name, namespace) 235 | spec.PrevState = NewPodPrevState(1) 236 | spec.Name = fmt.Sprintf("%s-%s-%s", spec.Name, nodeName, namespace) 237 | spec.Filters = util.AddNodeConstraint(spec.Filters, nodeName) 238 | return spec 239 | } 240 | 241 | func (depCtrl *dependsController) emitChangeEvent(changeType string, spec PodSpec, pod Pod) { 242 | } 243 | 244 | func newDependsController(spec PodSpec, pods map[string]map[string]SharedPodWithSpec) *dependsController { 245 | depCtrl := &dependsController{ 246 | Publisher: NewPublisher(true), 247 | spec: spec, 248 | startedAt: time.Now(), 249 | podCtrls: make(map[string]map[string]*sharedPodController), 250 | opsChan: make(chan depOperation, 100), 251 | 252 | specStoredKey: strings.Join([]string{kLainDeploydRootKey, kLainDependencyKey, kLainSpecKey, spec.Name}, "/"), 253 | podsStoredKey: strings.Join([]string{kLainDeploydRootKey, kLainDependencyKey, kLainPodKey, spec.Name}, "/"), 254 | } 255 | 256 | for node, nsPods := range pods { 257 | for namespace, pod := range nsPods { 258 | podCtrl, _ := depCtrl.getOrAddPodCtrl(node, namespace, pod.Spec, pod.Pod) 259 | podCtrl.refCount = pod.RefCount 260 | podCtrl.verifyTime = pod.VerifyTime 261 | } 262 | } 263 | 264 | return depCtrl 265 | } 266 | -------------------------------------------------------------------------------- /engine/histories.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | "sync" 8 | "time" 9 | 10 | "github.com/laincloud/deployd/storage" 11 | "github.com/laincloud/deployd/utils/util" 12 | "github.com/mijia/adoc" 13 | "github.com/mijia/sweb/log" 14 | ) 15 | 16 | // cnt means container 17 | const ( 18 | KCntStatus = "/lain/deployd/histroy" 19 | FmtKCntStatusLstPos = "/lain/deployd/histroy/%s/%d/lastpos" 20 | FmtKCntStatusInfo = "/lain/deployd/histroy/%s/%d/%d" 21 | 22 | DefaultStatusSize = 20 23 | ) 24 | 25 | type StatusLastPos struct { 26 | Pos int 27 | Size int 28 | } 29 | 30 | type StatusMessage struct { 31 | Status string `json:"status,omitempty"` 32 | From string `json:"from,omitempty"` 33 | Time int64 `json:"time,omitempty"` 34 | Action string `json:"action,omitempty"` 35 | } 36 | 37 | type podStatusHistory struct { 38 | pos *StatusLastPos 39 | podname string 40 | instance int 41 | dirty bool 42 | lock *sync.Mutex 43 | dirtys []bool 44 | locks []*sync.Mutex 45 | statuses []*StatusMessage 46 | } 47 | 48 | func (podSta *podStatusHistory) Save(engine *OrcEngine) { 49 | pos := podSta.pos.Pos 50 | podSta.dirtys[pos] = true 51 | podSta.dirty = true 52 | 53 | if err := podSta.saveStatus(engine, pos); err == nil { 54 | podSta.dirtys[pos] = false 55 | if err := podSta.saveLastPos(engine); err == nil { 56 | podSta.dirty = false 57 | } 58 | } 59 | } 60 | 61 | func (podSta *podStatusHistory) Check(engine *OrcEngine) { 62 | statusClean := true 63 | for pos, dirty := range podSta.dirtys { 64 | if dirty { 65 | if err := podSta.saveStatus(engine, pos); err == nil { 66 | podSta.dirtys[pos] = false 67 | } else { 68 | statusClean = false 69 | } 70 | } 71 | } 72 | if statusClean && podSta.dirty { 73 | if err := podSta.saveLastPos(engine); err == nil { 74 | podSta.dirty = false 75 | } 76 | } 77 | } 78 | 79 | func (podSta *podStatusHistory) saveStatus(engine *OrcEngine, pos int) error { 80 | lock := podSta.locks[pos] 81 | lock.Lock() 82 | defer lock.Unlock() 83 | if err := engine.store.Set(fmt.Sprintf(FmtKCntStatusInfo, podSta.podname, podSta.instance, pos), 84 | podSta.statuses[pos], true); err != nil { 85 | log.Error("save container %s status info failed by error %s ", podSta.podname, err) 86 | return err 87 | } 88 | return nil 89 | } 90 | 91 | func (podSta *podStatusHistory) saveLastPos(engine *OrcEngine) error { 92 | lock := podSta.lock 93 | lock.Lock() 94 | defer lock.Unlock() 95 | return engine.store.Set(fmt.Sprintf(FmtKCntStatusLstPos, podSta.podname, podSta.instance), podSta.pos) 96 | } 97 | 98 | type PodGroupStatusHistory struct { 99 | podStatuses map[int]*podStatusHistory // instance: podStatusHistory 100 | } 101 | 102 | type engineStatusHistory struct { 103 | pgStatuses map[string]*PodGroupStatusHistory // podname: PodGroupStatusHistory 104 | } 105 | 106 | func (esh *engineStatusHistory) checkStatus(engine *OrcEngine) { 107 | for _, pgsh := range esh.pgStatuses { 108 | for _, podsh := range pgsh.podStatuses { 109 | podsh.Check(engine) 110 | } 111 | } 112 | } 113 | 114 | // egStatuses(engine statuses):map[] 115 | // pgStatuses(podgroup statuses): map[] 116 | // podStatusHistory(pod status histories):map[] 117 | // DETAILDATA 118 | var ( 119 | egStatuses *engineStatusHistory 120 | egLock *sync.Mutex 121 | ) 122 | 123 | func init() { 124 | egStatuses = &engineStatusHistory{ 125 | pgStatuses: make(map[string]*PodGroupStatusHistory), 126 | } 127 | egLock = &sync.Mutex{} 128 | } 129 | 130 | func MaintainEngineStatusHistory(engine *OrcEngine) { 131 | tick := time.Tick(1 * time.Hour) 132 | for { 133 | select { 134 | case <-tick: 135 | egStatuses.checkStatus(engine) 136 | case <-engine.stop: 137 | return 138 | } 139 | } 140 | } 141 | 142 | // Sync with etcd data when start deployd 143 | // ugly finished! should change with allKeysByPrefix(return all non-dir node) 144 | func SyncEventsDataFromStorage(engine *OrcEngine) bool { 145 | egStatuses.pgStatuses = make(map[string]*PodGroupStatusHistory) 146 | store := engine.store 147 | podgroups, err := store.KeysByPrefix(KCntStatus) 148 | log.Debugf("events:%v, %v ", podgroups, err) 149 | if err != nil { 150 | return err == storage.KMissingError 151 | } 152 | for _, podgroup := range podgroups { // /lain/deployd/histroy 153 | pgsh := &PodGroupStatusHistory{ 154 | podStatuses: make(map[int]*podStatusHistory), 155 | } 156 | podname := strings.TrimPrefix(podgroup, KCntStatus+"/") 157 | egStatuses.pgStatuses[podname] = pgsh 158 | instances, err := store.KeysByPrefix(podgroup) 159 | if err != nil { 160 | return false 161 | } 162 | for _, instanceKey := range instances { // /lain/deployd/histroy/$podgroup 163 | instanceK := strings.TrimPrefix(instanceKey, podgroup+"/") 164 | if instance, err := strconv.Atoi(instanceK); err == nil { 165 | podSH := newPodStatusHistory(podname, instance) 166 | pgsh.podStatuses[instance] = podSH 167 | var pos StatusLastPos 168 | if err := store.Get(fmt.Sprintf(FmtKCntStatusLstPos, podname, instance), &pos); err != nil { 169 | if err != storage.KMissingError { 170 | return false 171 | } 172 | } 173 | podSH.pos = &pos 174 | statusKeys, err := store.KeysByPrefix(instanceKey) 175 | if err != nil { 176 | return false 177 | } 178 | for _, statusKey := range statusKeys { // /lain/deployd/histroy/$podgroup/$instance 179 | indexKey := strings.TrimPrefix(statusKey, instanceKey+"/") 180 | if index, err := strconv.Atoi(indexKey); err == nil { 181 | var status StatusMessage 182 | if err := store.Get(statusKey, &status); err == nil { 183 | podSH.statuses[index] = &status 184 | } else { 185 | return false 186 | } 187 | } 188 | } 189 | } 190 | } 191 | } 192 | return true 193 | } 194 | 195 | func newPodStatusHistory(podname string, instance int) *podStatusHistory { 196 | dirtys := make([]bool, DefaultStatusSize) 197 | locks := make([]*sync.Mutex, DefaultStatusSize) 198 | statuses := make([]*StatusMessage, DefaultStatusSize) 199 | for i := 0; i < DefaultStatusSize; i++ { 200 | dirtys[i] = false 201 | locks[i] = &sync.Mutex{} 202 | } 203 | psh := &podStatusHistory{ 204 | pos: &StatusLastPos{0, DefaultStatusSize}, 205 | podname: podname, 206 | instance: instance, 207 | dirty: true, 208 | lock: &sync.Mutex{}, 209 | dirtys: dirtys, 210 | locks: locks, 211 | statuses: statuses, 212 | } 213 | return psh 214 | } 215 | 216 | func NewPodStatusHistory(podname string, instance int, status *StatusMessage) *podStatusHistory { 217 | psh := newPodStatusHistory(podname, instance) 218 | psh.statuses[0] = status 219 | return psh 220 | } 221 | 222 | func savePodStaHstry(engine *OrcEngine, event *adoc.Event) { 223 | actor := event.Actor 224 | if name, ok := actor.Attributes["name"]; ok { 225 | if podname, instance, err := util.ParseNameInstanceNo(name); err == nil { 226 | status := &StatusMessage{ 227 | Status: event.Status, 228 | From: event.From, 229 | Time: event.Time, 230 | Action: event.Action, 231 | } 232 | nextPos := 0 233 | egLock.Lock() 234 | defer egLock.Unlock() 235 | if pgStatus, ok := egStatuses.pgStatuses[podname]; ok { 236 | if podStatus, ok := pgStatus.podStatuses[instance]; ok { 237 | nextPos = (podStatus.pos.Pos + 1) % podStatus.pos.Size 238 | podStatus.statuses[nextPos] = status 239 | podStatus.pos.Pos = nextPos 240 | } else { 241 | pgStatus.podStatuses[instance] = NewPodStatusHistory(podname, instance, status) 242 | } 243 | } else { 244 | podStatuses := make(map[int]*podStatusHistory) 245 | psh := NewPodStatusHistory(podname, instance, status) 246 | podStatuses[instance] = psh 247 | egStatuses.pgStatuses[podname] = &PodGroupStatusHistory{podStatuses} 248 | } 249 | egStatuses.pgStatuses[podname].podStatuses[instance].Save(engine) 250 | } 251 | } 252 | } 253 | 254 | func FetchPodStaHstry(engine *OrcEngine, podname string, instance int) []*StatusMessage { 255 | stmsgs := make([]*StatusMessage, 0) 256 | if pgSH, ok := egStatuses.pgStatuses[podname]; ok { 257 | if podSH, ok := pgSH.podStatuses[instance]; ok { 258 | pos := podSH.pos 259 | start := (pos.Pos + 1) % pos.Size 260 | if podSH.statuses[start] == nil { 261 | for i := 0; i <= pos.Pos; i++ { 262 | stmsgs = append(stmsgs, podSH.statuses[i]) 263 | } 264 | } else { 265 | for i := start; i < pos.Size; i++ { 266 | stmsgs = append(stmsgs, podSH.statuses[i]) 267 | } 268 | for i := 0; i < start; i++ { 269 | stmsgs = append(stmsgs, podSH.statuses[i]) 270 | } 271 | } 272 | return stmsgs 273 | } 274 | } 275 | return stmsgs 276 | } 277 | -------------------------------------------------------------------------------- /engine/ports.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strconv" 7 | "strings" 8 | "sync" 9 | 10 | etcd "github.com/coreos/etcd/client" 11 | "github.com/mijia/sweb/log" 12 | "golang.org/x/net/context" 13 | ) 14 | 15 | const ( 16 | KeyPrefixStreamPorts = "/lain/deployd/stream/ports" 17 | ) 18 | 19 | var ( 20 | once sync.Once 21 | pm *PortsManager 22 | ) 23 | 24 | func ConfigPortsManager(endpoint string) { 25 | onceBody := func() { 26 | pm = NewPortsManager(endpoint) 27 | } 28 | once.Do(onceBody) 29 | } 30 | 31 | type StreamPort struct { 32 | SrcPort int `json:"srcport"` 33 | DstPort int `json:"dstport"` 34 | Proto string `json:"proto"` 35 | } 36 | 37 | func (sp StreamPort) Equals(osp StreamPort) bool { 38 | return sp.SrcPort == osp.SrcPort && 39 | sp.DstPort == osp.DstPort && 40 | sp.Proto == osp.Proto 41 | } 42 | 43 | type StreamPorts struct { 44 | Ports []StreamPort `json:"ports"` 45 | } 46 | 47 | func (sp StreamPorts) Equals(osp StreamPorts) bool { 48 | if len(sp.Ports) != len(osp.Ports) { 49 | return false 50 | } 51 | for i, _ := range sp.Ports { 52 | if !sp.Ports[i].Equals(osp.Ports[i]) { 53 | return false 54 | } 55 | } 56 | return true 57 | } 58 | 59 | type StreamProc struct { 60 | StreamPort 61 | NameSpace string 62 | ProcName string 63 | } 64 | 65 | type PortsManager struct { 66 | etcd *etcd.Client 67 | lock *sync.Mutex 68 | } 69 | 70 | func NewPortsManager(endpoint string) *PortsManager { 71 | cfg := etcd.Config{ 72 | Endpoints: []string{endpoint}, 73 | Transport: etcd.DefaultTransport, 74 | } 75 | c, err := etcd.New(cfg) 76 | if err != nil { 77 | log.Errorf("NewPortsManager with error:%v\n", err) 78 | return nil 79 | } 80 | return &PortsManager{ 81 | etcd: &c, 82 | lock: &sync.Mutex{}, 83 | } 84 | } 85 | 86 | func (pm PortsManager) occupiedProcs(spArr ...*StreamProc) []int { 87 | occs := make([]int, 0) 88 | for _, sp := range spArr { 89 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", sp.SrcPort) 90 | if keyExists(pm.etcd, key) { 91 | occs = append(occs, sp.SrcPort) 92 | } 93 | } 94 | return occs 95 | } 96 | 97 | func (pm PortsManager) occupiedPorts(ports ...int) []int { 98 | occs := make([]int, 0) 99 | for _, port := range ports { 100 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", port) 101 | if keyExists(pm.etcd, key) { 102 | occs = append(occs, port) 103 | } 104 | } 105 | return occs 106 | } 107 | 108 | func (pm PortsManager) Refresh(pgCtrls map[string]*podGroupController) { 109 | pm.lock.Lock() 110 | defer pm.lock.Unlock() 111 | occs := make([]*StreamProc, 0) 112 | for _, pgCtrl := range pgCtrls { 113 | annotation := pgCtrl.spec.Pod.Annotation 114 | var sps StreamPorts 115 | if err := json.Unmarshal([]byte(annotation), &sps); err != nil { 116 | continue 117 | } 118 | for _, sp := range sps.Ports { 119 | occs = append(occs, &StreamProc{ 120 | StreamPort: sp, 121 | NameSpace: pgCtrl.spec.Namespace, 122 | ProcName: pgCtrl.spec.Name, 123 | }) 124 | } 125 | } 126 | 127 | for _, sp := range occs { 128 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", sp.SrcPort) 129 | putValue(pm.etcd, key, sp, true) 130 | } 131 | 132 | markedPorts, err := fetchAll(pm.etcd, KeyPrefixStreamPorts) 133 | if err != nil { 134 | return 135 | } 136 | portsSets := make(map[int]struct{}) 137 | for _, port := range occs { 138 | portsSets[port.SrcPort] = struct{}{} 139 | } 140 | garbagePorts := make([]int, 0) 141 | for _, port := range markedPorts { 142 | if _, ok := portsSets[port]; !ok { 143 | garbagePorts = append(garbagePorts, port) 144 | } 145 | } 146 | for _, port := range garbagePorts { 147 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", port) 148 | delValue(pm.etcd, key) 149 | } 150 | } 151 | 152 | func (pm PortsManager) RegisterStreamPorts(spArr ...*StreamProc) (bool, []int) { 153 | pm.lock.Lock() 154 | defer pm.lock.Unlock() 155 | succeedArr := make([]*StreamProc, 0, len(spArr)) 156 | for _, sp := range spArr { 157 | if !pm.RegisterStreamPort(sp) { 158 | for _, succeed := range succeedArr { 159 | pm.CancelStreamPort(succeed) 160 | } 161 | return false, pm.occupiedProcs(spArr...) 162 | } 163 | succeedArr = append(succeedArr, sp) 164 | } 165 | return true, nil 166 | } 167 | 168 | func (pm PortsManager) UpdateStreamPorts(spArr ...*StreamProc) { 169 | pm.lock.Lock() 170 | defer pm.lock.Unlock() 171 | for _, sp := range spArr { 172 | pm.UpdateStreamPort(sp) 173 | } 174 | } 175 | 176 | func (pm PortsManager) CancelStreamPorts(spArr ...*StreamProc) { 177 | pm.lock.Lock() 178 | defer pm.lock.Unlock() 179 | for _, sp := range spArr { 180 | pm.CancelStreamPort(sp) 181 | } 182 | } 183 | 184 | func (pm PortsManager) FetchAllStreamPortsInfo() []StreamProc { 185 | pm.lock.Lock() 186 | defer pm.lock.Unlock() 187 | ports, err := fetchAllInfo(pm.etcd, KeyPrefixStreamPorts) 188 | if err != nil { 189 | return nil 190 | } 191 | return ports 192 | } 193 | 194 | func (pm PortsManager) RegisterStreamPort(sp *StreamProc) bool { 195 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", sp.SrcPort) 196 | return putValue(pm.etcd, key, sp, false) 197 | } 198 | 199 | func (pm *PortsManager) UpdateStreamPort(sp *StreamProc) bool { 200 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", sp.SrcPort) 201 | return putValue(pm.etcd, key, sp, true) 202 | } 203 | 204 | func (pm *PortsManager) CancelStreamPort(sp *StreamProc) bool { 205 | key := fmt.Sprintf(KeyPrefixStreamPorts+"/%d", sp.SrcPort) 206 | return delValue(pm.etcd, key) 207 | } 208 | 209 | func RegisterPorts(sps ...*StreamProc) (bool, []int) { 210 | return pm.RegisterStreamPorts(sps...) 211 | } 212 | 213 | func UpdatePorts(sps ...*StreamProc) { 214 | pm.UpdateStreamPorts(sps...) 215 | } 216 | 217 | func CancelPorts(sps ...*StreamProc) { 218 | pm.CancelStreamPorts(sps...) 219 | } 220 | 221 | func FetchAllPortsInfo() []StreamProc { 222 | return pm.FetchAllStreamPortsInfo() 223 | } 224 | 225 | func OccupiedPorts(ports ...int) []int { 226 | return pm.occupiedPorts(ports...) 227 | } 228 | 229 | func RefreshPorts(pgCtrls map[string]*podGroupController) { 230 | pm.Refresh(pgCtrls) 231 | } 232 | 233 | func keyExists(e *etcd.Client, key string) bool { 234 | kapi := etcd.NewKeysAPI(*e) 235 | return storeOp(func() (bool, error) { 236 | resp, err := kapi.Get(context.Background(), key, &etcd.GetOptions{Quorum: true}) 237 | if err != nil { 238 | if etcdErr, ok := err.(etcd.Error); ok { 239 | switch etcdErr.Code { 240 | case etcd.ErrorCodeKeyNotFound: 241 | return false, nil 242 | default: 243 | return false, err 244 | } 245 | } 246 | return false, err 247 | } 248 | if resp == nil || resp.Node == nil || resp.Node.Value == "" { 249 | return false, nil 250 | } 251 | return true, nil 252 | }) 253 | } 254 | 255 | func fetchAllInfo(e *etcd.Client, key string) ([]StreamProc, error) { 256 | kapi := etcd.NewKeysAPI(*e) 257 | resp, err := kapi.Get(context.Background(), key, &etcd.GetOptions{Recursive: true, Quorum: true}) 258 | if err != nil { 259 | return nil, err 260 | } 261 | portsInfo := make([]StreamProc, 0, len(resp.Node.Nodes)) 262 | 263 | var sp StreamProc 264 | for _, node := range resp.Node.Nodes { 265 | json.Unmarshal([]byte(node.Value), &sp) 266 | portsInfo = append(portsInfo, sp) 267 | } 268 | return portsInfo, nil 269 | } 270 | 271 | func fetchAll(e *etcd.Client, key string) ([]int, error) { 272 | kapi := etcd.NewKeysAPI(*e) 273 | resp, err := kapi.Get(context.Background(), key, &etcd.GetOptions{Recursive: true, Quorum: true}) 274 | if err != nil { 275 | return nil, err 276 | } 277 | ports := make([]int, 0, len(resp.Node.Nodes)) 278 | for _, node := range resp.Node.Nodes { 279 | key := node.Key 280 | infos := strings.Split(key, "/") 281 | if len(infos) > 0 { 282 | if port, err := strconv.Atoi(strings.Split(key, "/")[len(infos)-1]); err == nil { 283 | ports = append(ports, port) 284 | } 285 | } 286 | } 287 | return ports, nil 288 | } 289 | 290 | func putValue(e *etcd.Client, key string, value interface{}, force bool) bool { 291 | kapi := etcd.NewKeysAPI(*e) 292 | bytes, err := json.Marshal(value) 293 | if err != nil { 294 | return false 295 | } 296 | return storeOp(func() (bool, error) { 297 | var opts *etcd.SetOptions = nil 298 | if !force { 299 | opts = &etcd.SetOptions{PrevExist: etcd.PrevNoExist} 300 | } 301 | _, err := kapi.Set( 302 | context.Background(), 303 | key, string(bytes), 304 | opts, 305 | ) 306 | if err != nil { 307 | if etcdErr, ok := err.(etcd.Error); ok { 308 | switch etcdErr.Code { 309 | case etcd.ErrorCodeNodeExist: 310 | return false, nil 311 | default: 312 | log.Errorf("set key %v failed with error:%v", key, err) 313 | } 314 | } 315 | return false, err 316 | } 317 | return true, nil 318 | }) 319 | } 320 | 321 | func delValue(e *etcd.Client, key string) bool { 322 | kapi := etcd.NewKeysAPI(*e) 323 | return storeOp(func() (bool, error) { 324 | _, err := kapi.Delete(context.Background(), key, nil) 325 | if err != nil { 326 | return false, err 327 | } 328 | return true, nil 329 | }) 330 | } 331 | 332 | func storeOp(op func() (bool, error)) bool { 333 | succ, err := op() 334 | if err != nil { 335 | log.Errorf("store op err:%v", err) 336 | return false 337 | } else { 338 | return succ 339 | } 340 | } 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lain Deloyd 2 | 3 | [![Build Status](https://travis-ci.org/laincloud/deployd.svg?branch=master)](https://travis-ci.org/laincloud/deployd) [![codecov](https://codecov.io/gh/laincloud/deployd/branch/master/graph/badge.svg)](https://codecov.io/gh/laincloud/deployd) [![MIT license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://opensource.org/licenses/MIT) 4 | 5 | 6 | ## 基本介绍 7 | 8 | Deployd是负责Lain中底层容器编排的组件。将用户层对proc和portal等概念操作映射为实际的container操作,包括对container进行启停,升级和迁移等。Deployd会对所有的container定时巡检,自动修复异常的container,以确保服务的正常。 9 | 10 | ## 整体设计 11 | 12 | 1. 主要包括apiserver、engine、cluster、storage四个组成部分 13 | 1. 使用Pod做为调度单元,Pod中包含需要运行的Container描述,即目标Container需要达到的状态,还包括对Pod进行调度的预设条件Filter和依赖关系Dependency 14 | 1. 使用PodGroup来提供对Pod的Replication控制,以及重启设定 15 | 1. 提供Pod的依赖关系,一个Pod可以指定他需要的依赖Pod同时运行,还包括依赖Scope的设定,例如Namespace级别的Scope,或者Node级别的Scope 16 | 1. 总结来说,Deployd使用Pod来描述Container的运行参数、资源预约(包括内存和CPU)、依赖关系、预设调度条件;PodGroup来描述一个Pod需要多少实例,发生不同状况之后重启策略是什么,例如不重启、失败后重启、永远重启等;Dependency用来描述被依赖的Pod基本运行参数、资源等 17 | 18 | ![deployd design](deployd-design.png) 19 | 20 | ### OrcEngine 21 | OrcEngine采用单独的Worker Queue来对来自Api Server的各种操作请求进行排队派发,分发给对应的dependsController和podGroupController,并且统一安排定时的刷新操作,进行自检工作。默认会从Etcd读取所有PodGroup和DependPod的相关Spec和已知的运行时信息。 22 | 23 | 还包括RuntimeEagleView利用Swarm Api提供的筛选API进行实时信息校正,因为在Etcd中存储的相关运行时信息可能是不准确的,这时候会通过RuntimeEagleView来获取Swarm实际运行时数据进行校准。 24 | 25 | 所有的操作请求进入OrcEngine的Worker Queue,Queue出队的Operation会将相关请求派发到对应的Controller进入该Controller的Worker Queue,进行调度安排。(具体操作实现可以参考engine/engine_ops.go代码) 26 | 27 | ### podGroupController 28 | 29 | podGroupController提供对于PodGroup的控制和自检工作,负责所有相关PodGroup调度工作,并定时自检,根据当前集群内PodGroup工作状态和配置进行相关调整,每个podGroupController都使用单独的Goroutine来进行所有调度工作的安排,所以,OrcEngine本身提供的异步操作接口。podGroupController会调用对应的podController进行底层的实际Container控制操作(具体操作实现可以参考engine/podgroup_ops.go),所有的API都会被拆分成若干底层Operation的Functor推送到Worker Queue中排队,从而重用大部分代码: 30 | 31 | 1. Deploy操作:每个Instance的Deploy首先会从RuntimeEagleView中尝试获取当前是否有相关Container被部署,如果发现已经被部署的Pod,Deploy操作不会重新调度Container,只是重新获取Container状态,恢复PodGroup的运行时数据。在Deploy时,会尽量带上Affinity的调度标记,例如`affinity:cc.bdp.lain.deployd.pg_name!=~hello.web.web`,可以使Instance在集群中部署时能被分散开。 32 | 1. 实例数量调度(RescheduleInstance):会根据Instance数量变化的Delta来选择是Deploy新的Instance还是Remove Instance,如果是Deploy的话,相关执行同Deploy操作;如果是删除Instance,是从InstanceNo大的一端开始删除 33 | 1. Spec更新调度(RescheduleSpec):每个Instance串行的进行更新,更新过程中先会删除该Instance,并且等待`10s`,然后调用上面的Deploy Instance操作,同样会使用RuntimeEagleView来进行校准 34 | 1. Drift漂移操作:每个Instance来判断自己是否需要漂移,如果漂移的话,也是先Remove Instance,然后再Deploy Instance到指定节点或者由Swarm来选择被调度的节点 35 | 1. Remove操作:每个Instance会通过podController来进行Remove操作,然后再次调用RuntimeEagleView刷新相关Container运行列表,如果发现有残留的Container,会直接Remove Container,避免podController操作失败造成数据和运行时污染 36 | 1. Refresh操作:先是通过RuntimeEagleView更新运行时Contrainer相关列表,每个Instance自己刷新,如果和RuntimePod匹配,那么就没有问题,此外,有一种情况目前是考虑的: 37 | * 发现Container Missing:会重新调用上面的Deploy Instance操作,从新部署新的实例 38 | * (Deployd数据格式升级):发现老版本Container还在运行,会使用UpgradeInstance操作对应Instance,将Container本身升级到新版本,例如添加或者更新Container的配置Labels等 39 | * 如果发现RuntimePod对应版本和当前Spec版本不一致,会调用UpgradeInstance来更新Instance 40 | * 如果发现Container没有正常运行,会根据PodGroupSpec中的重启策略来选择是否重新启动Container 41 | 42 | ### dependsController 43 | 44 | dependsController提供对于Dependency Pod的控制和自检工作,OrcEngine新建Dependency Pod的时候只是记录相关Spec信息,并生成对应dependsController,但是并不会实际部署任何Pod,dependency需要跟相关PodGroup的Instance运行在同一个集群Node上,所以会在有实际PodGroup Instance运行之后才会启动部署相关的Dependency Pod。而具体部署的细节是通过DependencyPolicy来控制的,目前有两种策略,一种是Node级别的,一种是Namespace级别的,例如具有相同Namespace的PodGroup Instance他们在同一台主机节点上会使用同一个Namespace级别的Dependency Pod,如果是Node级别的,那该Node主机上只会有一个Dependency Pod被部署然后被大家共享。 45 | 46 | dependsController也会调用相应的podController进行底册的实际Container控制操作,所有的API也被拆分成若干底层Operation Functor推送到单独的Goroutine Worker Queue中,不同的时,dependsController里面使用了带有引用计数和VerifyTime的podController,从而实现不同DependencyPolicy级别的共享功能。 47 | 48 | dependsController是通过Event的方式接收系统中正在发生的Dependency变化的,具体包括添加、删除和Verify,对应的会AddPod、RemovePod和VerifyPod,具体操作实现可以参考`engine/depends_ops.go`: 49 | 50 | 1. AddSpec: 添加Spec配置,只是简单的存储到Storage中 51 | 1. UpdateSpec:更新Spec配置,并对当前所有正在运行的Pod进行Upgrade,具体操作跟上面podGroupController的更新操作比较类似 52 | 1. RemoveSpec:删除配置,默认如果当前对应的所有级别podController的引用计数大于0的话,是不允许执行的,需要先移除依赖他的PodGroup之后才可以进行,如果指定force的话,会强制停掉相关Pod 53 | 1. AddPod:接收到DependencyEvent添加Pod事件,会根据事件中的Namespace和Node标记找到podController,如果没有相关部署,会进行部署,否则仅仅是增加引用计数,并且修改VerifyTime 54 | 1. RemovePod:接收到DependencyEvent删除Pod事件,会根据事件中的Namespace和Node标记找到podController,不会立即就移除Pod,修改引用计数,修改VerifyTime,具体的移除操作实际上是在自检过程中如果发现很长时间没有PodGroup来Verify还在使用的话,该Pod就会被删除了,目前设定的垃圾回收时间为`5m` 55 | 1. VerifyPod: 接收到DependencyEvent的Verify事件,会根据事件中的Namespace和Node标记找到podController,更新他的VerifyTime 56 | 1. Refresh自检:会先刷新RuntimeEagleView中Dependency Pods的运行时列表,然后对于每个Node上每个Namespace对应的podController进行自检,如果发现距离VerifyTime已经超过Deploy启动时间并且超过垃圾回收时间,就会对该Pod进行回收,如果确定还不是垃圾之后,会有几种情况: 57 | * 运行时正常,并且跟RuntimeEagleView中的列表匹配成功,说明一切正常 58 | * 有Container Missing,会尝试从RuntimeEagleView中发现丢失的Container,如果找到,只需要重新登记,如果没有找到,会进行重新部署 59 | * 如果发现是老版本的Container,会调用相应的UpgradePod操作对该Pod进行升级操作,从而满足Deployd自身数据和Container更新的要求,例如升级了Container配置Labels等 60 | * 发现实际运行版本不同于Spec中定义版本,也会调用相应的UpgradePod动作进行升级 61 | * 发现Container挂了,会尝试重新启动 62 | 63 | ### constraintController 64 | 65 | constraintController用于在部署pod时添加相应限制规则。目前主要用途是在进行集群维护时将某些节点设置为不可部署状态,这样deployd在部署时则不会允许pod部署到相应限制节点。 66 | constraint机制主要来自于swarm,属于node filter中的一种,具体可参见swarm filter相关文档。 67 | 68 | ### notifyController 69 | 70 | notifyController用于管理deployd的callback列表及给相应callback列表发送通知。当deployd发现容器状态出现问题时,会给已注册的callback url发送通知。 71 | 目前当出现如下情况时notifyController会发送通知: 72 | * 某个pod处于exit状态 73 | * 找不到某个pod 74 | * 某个pod启动后不包含IP 75 | * 某个pod在一定时间内被重启了多次 76 | 77 | ## 编译和安装 78 | 79 | ### 编译 80 | **依赖环境:go1.5+**, 81 | 82 | ```sh 83 | go build -o deployd 84 | ``` 85 | 86 | ### 运行 87 | **依赖环境: swarm, etcd** 88 | 89 | ```sh 90 | ./deployd -h # 查看启动参数 91 | 92 | # 例子 93 | ./deployd -web :9000 -swarm http://127.0.0.1:2376 -etcd http://127.0.0.1:2379 # 监听9000端口 94 | ``` 95 | 96 | ## API Reference 97 | 98 | Deployd的内部编排引擎OrcEngine为异步执行模型,所以,基本上调度API返回的结果只是预约结果,而非真实操作的最后结果,可以继续通过相关GET Api来获取实际的运行信息,任务接受后,会进入OrcEngine的异步执行队列中。 99 | 100 | ### Engine Api 101 | 102 | ``` 103 | GET /api/engine/config 104 | # 获取engine 配置信息 105 | # 返回: 106 | # OK: EngineConfig JSON 数据 107 | 108 | PATCH /api/engine/config 109 | # 修改engine配置信息 110 | # 参数: 111 | # Body: EngineConfig的JSON数据 112 | # 返回: 113 | # OK: EngineConfig JSON 数据 114 | # 错误信息: 115 | # BadRequest: PodGroupSpec JSON格式错误,或者缺少必需的参数 116 | 117 | PATCH /api/engine/maintenance&on=false 118 | # 维护模式设置 119 | # 参数: 120 | # on: 是否打开维护模式 121 | # 返回: 122 | # OK: EngineConfig JSON 数据 123 | ``` 124 | 125 | ### PodGroup Api 126 | 127 | ``` 128 | GET /api/podgroups?name={string}&force_update={true|false} 129 | # 获取PodGroup运行Spec和Runtime数据 130 | # 参数: 131 | # name: PodGroup名称 132 | # force_update: 是否强制更新,使用true或者false 133 | # 返回: 134 | # OK: PodGroupWithSpec JSON 数据 135 | # 错误信息: 136 | # BadRequest: 缺少name参数 137 | # NotFound: 没有找到对应名称的PodGroup 138 | 139 | POST /api/podgroups 140 | # 新建要被调度的PodGroup,并且马上部署 141 | # 参数: 142 | # Body: PodGroupSpec的JSON数据 143 | # 返回: 144 | # Accepted: 任务被接受 145 | # 错误信息: 146 | # BadRequest: PodGroupSpec JSON格式错误,或者缺少必需的参数 147 | # NotAllowed: 集群缺少相关资源可被调度、PodGroup已经存在(请使用Patch相关接口) 148 | 149 | DELETE /api/podgroups?name={string} 150 | # 删除PodGroup部署 151 | # 参数: 152 | # name: PodGroup名称 153 | # 返回: 154 | # Accepted: 任务被接受 155 | # 错误信息: 156 | # BadRequest: 缺少name参数 157 | # NotFound: 没有找到对应名称的PodGroup 158 | 159 | PATCH /api/podgroups?name={string}&cmd=replica&num_instances={int}&restart_policy={string} 160 | # 更改PodGroup运行时的Instance数量以及重启策略 161 | # 参数: 162 | # name: PodGroup名称 163 | # num_instances: 需要的实例数量 164 | # restart_policy(optional): 重启策略,值包括:never, always, onfail 165 | # 返回: 166 | # Accepted: 任务被接受 167 | # 错误信息: 168 | # BadRequest: 缺少必需的参数 169 | # NotAllowed: 集群缺少相关资源可被调度 170 | # NotFound: 没有找到对应名称的PodGroup 171 | 172 | PATCH /api/podgroups?name={string}&cmd=spec 173 | # 更改PodGroup运行时的具体Spec配置信息 174 | # 参数: 175 | # name: PodGroup名称 176 | # Body: 新的PodSpec 177 | # 返回: 178 | # Accepted: 任务被接受 179 | # 错误信息: 180 | # BadRequest: 缺少必需的参数 181 | # NotAllowed: 集群缺少相关资源可被调度 182 | # NotFound: 没有找到对应名称的PodGroup 183 | 184 | PATCH /api/podgroups?name={string}&cmd=operation&optype={start/stop/restart}[&instance={int}] 185 | # 更改PodGroup运行时的具体Spec配置信息 186 | # 参数: 187 | # name: PodGroup名称 188 | # optype: 操作类型 停止或重启 189 | # instance: 操作的pg instance,不传时为整个pod group 190 | # 返回: 191 | # Accepted: 任务被接受 192 | # 错误信息: 193 | # BadRequest: 缺少必需的参数 194 | # NotAllowed: 集群缺少相关资源可被调度 195 | # NotFound: 没有找到对应名称的PodGroup 196 | ``` 197 | 198 | ### Dependency Api 199 | 200 | ``` 201 | GET /api/depends?name={string} 202 | # 获取Dependency Pod的Spec和Runtime数据 203 | # 参数: 204 | # name: Dependency Pod名称 205 | # 返回: 206 | # OK: PodSpec以及Runtime JSON 数据 207 | # 错误信息: 208 | # BadRequest: 缺少name参数 209 | # NotFound: 没有找到对应依赖Pod定义 210 | 211 | POST /api/depends 212 | # 新建依赖Dependency Pod,但是并不会马上部署,按需部署的 213 | # 参数: 214 | # Body: PodSpec的JSON数据 215 | # 返回: 216 | # Accepted: 任务被接受 217 | # 错误信息: 218 | # BadRequest: PodSpec JSON格式错误,或者缺少必需的参数 219 | # NotAllowed: 集群缺少相关资源可被调度、Dependency已经存在(请使用PUT相关接口) 220 | 221 | DELETE /api/depends?name={string}&force={true|false} 222 | # 删除Dependency部署 223 | # 参数: 224 | # name: Dependency Pod名称 225 | # force(optional): 是否强制删除,如果force=false,当前Dependency Pod被其他PodGroup依赖的话,是不会被删除的 226 | # 返回: 227 | # Accepted: 任务被接受 228 | # 错误信息: 229 | # BadRequest: 缺少name参数 230 | # NotFound: 没有找到对应名称的Dependency 231 | 232 | PUT /api/depends 233 | # 更新依赖Dependency Pod,会逐步更新所有目前运行的实例 234 | # 参数: 235 | # Body: PodSpec的JSON数据 236 | # 返回: 237 | # Accepted: 任务被接受 238 | # 错误信息: 239 | # BadRequest: PodSpec JSON格式错误,或者缺少必需的参数 240 | # NotFound: 没有找到对应的Dependency 241 | ``` 242 | 243 | ### Node Api 244 | 245 | ``` 246 | GET /api/nodes 247 | # 获取集群当前节点数据 248 | 249 | PATCH /api/nodes?cmd=drift&from={string}&to={string}&pg={string}&pg_instance={int}&force={true|false} 250 | # 漂移相关的Pod 251 | # 参数: 252 | # from: 漂移出去的节点名称 253 | # to(optional): 漂移的目标节点名称,如果等于from的话,会报BadRequest 254 | # pg(optional): 特定漂移的PodGroup名称 255 | # pg_instance(optional): 特定漂移的PodGroup InstanceNo,需要同时指定pg参数 256 | # force(optional): 是否忽略PodGroup Stateful的标记,如果为false,具有Stateful标记的PodGroup不会被飘走 257 | # 返回: 258 | # Accepted: 任务被接受 259 | # 错误信息: 260 | # BadRequest: 缺少必需的参数 261 | ``` 262 | 263 | ### Constraint Api 264 | 265 | ``` 266 | GET /api/contraints 267 | # 获取集群当前constraints数据 268 | 269 | PATCH /api/constraints?type={string}&value={string}&equal={true|false}&soft={true|false} 270 | # 漂移相关的contraint 271 | # 参数: 272 | # type: 需要修改的constraint类型,比如node 273 | # value: constraint类型对应的值 274 | # equal(optional): 在应用constraint的值时,是使用==还是!=,如果为true,则使用== 275 | # soft(optional): 是否强制实施此constraint,如果为true,如果不能满足条件则不能部署相应容器 276 | # 返回: 277 | # Accepted: constraint被添加 278 | # 错误信息: 279 | # BadRequest: 缺少必需的参数 280 | 281 | DELETE /api/constraints?type={string} 282 | # 删除某种类型的constraint 283 | # 参数: 284 | # type: Constraint 名称 285 | # 返回: 286 | # Accepted: constraint被删除 287 | # 错误信息: 288 | # BadRequest: 缺少必需的参数 289 | # NotFound: 没有找到对应类型的constraint 290 | ``` 291 | 292 | ### Notify Api 293 | 294 | ``` 295 | GET /api/notifies 296 | # 获取集群当前notify列表 297 | 298 | POST /api/notifies?callback={string} 299 | # 添加一个callback url 300 | # 参数: 301 | # callback: 需要添加的callback url 302 | # 返回: 303 | # Accepted: callback url被添加 304 | # 错误信息: 305 | # BadRequest: 缺少必需的参数或url格式存在问题 306 | 307 | DELETE /api/notifies?callback={string} 308 | # 删除某个callback url 309 | # 参数: 310 | # callback: callback url 311 | # 返回: 312 | # Accepted: callback url被删除 313 | # 错误信息: 314 | # BadRequest: 缺少相关参数 315 | # NotFound: 没有找到对应的callback url 316 | ``` 317 | 318 | ### Status API 319 | 320 | ``` 321 | GET /api/status 322 | # 获取deployd engine的启停状态 323 | 324 | PATCH -XPATCH /api/status -H "Content-Type: application/json" -d '{"status": "start"}' 325 | # start 或 stop deployd engine 326 | ``` 327 | 328 | ## Cluster 管理接口 329 | 目前Cluster部分使用Docker Swarm来提供集群管理功能,并且设计了NetworkManager接口(还不成熟)接入Calico(已废弃删除)或者Noop的网络管理器,基本接口包括: 330 | 331 | ``` 332 | type NetworkManager interface { 333 | GetContainerNetInfo(nodeName string, containerId string) (ContainerNetInfo, error) 334 | PatchEnv(envlist []string, key string, value string) 335 | } 336 | 337 | type Cluster interface { 338 | NetworkManager 339 | GetResources() ([]Node, error) 340 | ListContainers(showAll bool, showSize bool, filters ...string) ([]adoc.Container, error) 341 | CreateContainer(cc adoc.ContainerConfig, hc adoc.HostConfig, name ...string) (string, error) 342 | StartContainer(id string) error 343 | StopContainer(id string, timeout ...int) error 344 | InspectContainer(id string) (adoc.ContainerDetail, error) 345 | RemoveContainer(id string, force bool, volumes bool) error 346 | RenameContainer(id string, name string) error 347 | MonitorEvents(filter string, callback adoc.EventCallback) int64 348 | StopMonitor(monitorId int64) 349 | } 350 | ``` 351 | 352 | ## Storage 接口 353 | 目前存储部分使用Etcd集群来提供KV存储功能,主要接口包括: 354 | 355 | ``` 356 | type Store interface { 357 | Get(key string, v interface{}) error 358 | Set(key string, v interface{}, force ...bool) error 359 | KeysByPrefix(prefix string) ([]string, error) 360 | Remove(key string) error 361 | TryRemoveDir(key string) 362 | RemoveDir(key string) error 363 | } 364 | ``` 365 | 366 | ## 已知问题 367 | 1. Swarm本身对于写一类的操作是要进行加锁的,例如pull image、create container、start container,即便操作对象不在同一个node上,也会有这个全局锁问题,所以算是个瓶颈吧,如果有大规模的重新部署或者更新之类的话,整个编排系统的吞吐量和并发程度受限于Swarm 368 | 369 | ## License 370 | 371 | Deployd is released under the [MIT license](LICENSE). 372 | -------------------------------------------------------------------------------- /Gopkg.lock: -------------------------------------------------------------------------------- 1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. 2 | 3 | 4 | [[projects]] 5 | name = "cloud.google.com/go" 6 | packages = [ 7 | "compute/metadata", 8 | "internal" 9 | ] 10 | revision = "3b1ae45394a234c385be014e9a488f2bb6eef821" 11 | 12 | [[projects]] 13 | name = "github.com/PuerkitoBio/purell" 14 | packages = ["."] 15 | revision = "8a290539e2e8629dbc4e6bad948158f790ec31f4" 16 | version = "v1.0.0" 17 | 18 | [[projects]] 19 | name = "github.com/PuerkitoBio/urlesc" 20 | packages = ["."] 21 | revision = "5bd2802263f21d8788851d5305584c82a5c75d7e" 22 | 23 | [[projects]] 24 | name = "github.com/coreos/etcd" 25 | packages = [ 26 | "client", 27 | "pkg/fileutil", 28 | "pkg/pathutil", 29 | "pkg/tlsutil", 30 | "pkg/transport", 31 | "pkg/types" 32 | ] 33 | revision = "d267ca9c184e953554257d0acdd1dc9c47d38229" 34 | version = "v3.1.8" 35 | 36 | [[projects]] 37 | name = "github.com/coreos/go-oidc" 38 | packages = [ 39 | "http", 40 | "jose", 41 | "key", 42 | "oauth2", 43 | "oidc" 44 | ] 45 | revision = "be73733bb8cc830d0205609b95d125215f8e9c70" 46 | 47 | [[projects]] 48 | name = "github.com/coreos/go-semver" 49 | packages = ["semver"] 50 | revision = "8ab6407b697782a06568d4b7f1db25550ec2e4c6" 51 | version = "v0.2.0" 52 | 53 | [[projects]] 54 | name = "github.com/coreos/go-systemd" 55 | packages = ["journal"] 56 | revision = "48702e0da86bd25e76cfef347e2adeb434a0d0a6" 57 | version = "v14" 58 | 59 | [[projects]] 60 | name = "github.com/coreos/pkg" 61 | packages = [ 62 | "capnslog", 63 | "health", 64 | "httputil", 65 | "timeutil" 66 | ] 67 | revision = "3ac0863d7acf3bc44daf49afef8919af12f704ef" 68 | version = "v3" 69 | 70 | [[projects]] 71 | name = "github.com/davecgh/go-spew" 72 | packages = ["spew"] 73 | revision = "5215b55f46b2b919f50a1df0eaa5886afe4e3b3d" 74 | 75 | [[projects]] 76 | name = "github.com/docker/distribution" 77 | packages = [ 78 | "digest", 79 | "reference" 80 | ] 81 | revision = "cd27f179f2c10c5d300e6d09025b538c475b0d51" 82 | 83 | [[projects]] 84 | name = "github.com/docker/libkv" 85 | packages = [ 86 | ".", 87 | "store", 88 | "store/etcd" 89 | ] 90 | revision = "aabc039ad04deb721e234f99cd1b4aa28ac71a40" 91 | version = "v0.2.1" 92 | 93 | [[projects]] 94 | name = "github.com/emicklei/go-restful" 95 | packages = [ 96 | ".", 97 | "log", 98 | "swagger" 99 | ] 100 | revision = "09691a3b6378b740595c1002f40c34dd5f218a22" 101 | 102 | [[projects]] 103 | name = "github.com/ghodss/yaml" 104 | packages = ["."] 105 | revision = "73d445a93680fa1a78ae23a5839bad48f32ba1ee" 106 | 107 | [[projects]] 108 | name = "github.com/go-openapi/jsonpointer" 109 | packages = ["."] 110 | revision = "46af16f9f7b149af66e5d1bd010e3574dc06de98" 111 | 112 | [[projects]] 113 | name = "github.com/go-openapi/jsonreference" 114 | packages = ["."] 115 | revision = "13c6e3589ad90f49bd3e3bbe2c2cb3d7a4142272" 116 | 117 | [[projects]] 118 | name = "github.com/go-openapi/spec" 119 | packages = ["."] 120 | revision = "6aced65f8501fe1217321abf0749d354824ba2ff" 121 | 122 | [[projects]] 123 | name = "github.com/go-openapi/swag" 124 | packages = ["."] 125 | revision = "1d0bd113de87027671077d3c71eb3ac5d7dbba72" 126 | 127 | [[projects]] 128 | name = "github.com/gogo/protobuf" 129 | packages = [ 130 | "proto", 131 | "sortkeys" 132 | ] 133 | revision = "909568be09de550ed094403c2bf8a261b5bb730a" 134 | version = "v0.3" 135 | 136 | [[projects]] 137 | name = "github.com/golang/glog" 138 | packages = ["."] 139 | revision = "44145f04b68cf362d9c4df2182967c2275eaefed" 140 | 141 | [[projects]] 142 | name = "github.com/golang/protobuf" 143 | packages = ["proto"] 144 | revision = "4bd1920723d7b7c925de087aa32e2187708897f7" 145 | 146 | [[projects]] 147 | name = "github.com/google/gofuzz" 148 | packages = ["."] 149 | revision = "44d81051d367757e1c7c6a5a86423ece9afcf63c" 150 | 151 | [[projects]] 152 | name = "github.com/howeyc/gopass" 153 | packages = ["."] 154 | revision = "3ca23474a7c7203e0a0a070fd33508f6efdb9b3d" 155 | 156 | [[projects]] 157 | name = "github.com/imdario/mergo" 158 | packages = ["."] 159 | revision = "6633656539c1639d9d78127b7d47c622b5d7b6dc" 160 | 161 | [[projects]] 162 | name = "github.com/jonboulle/clockwork" 163 | packages = ["."] 164 | revision = "2eee05ed794112d45db504eb05aa693efd2b8b09" 165 | version = "v0.1.0" 166 | 167 | [[projects]] 168 | name = "github.com/juju/ratelimit" 169 | packages = ["."] 170 | revision = "77ed1c8a01217656d2080ad51981f6e99adaa177" 171 | 172 | [[projects]] 173 | name = "github.com/julienschmidt/httprouter" 174 | packages = ["."] 175 | revision = "975b5c4c7c21c0e3d2764200bf2aa8e34657ae6e" 176 | 177 | [[projects]] 178 | name = "github.com/kelseyhightower/envconfig" 179 | packages = ["."] 180 | revision = "f611eb38b3875cc3bd991ca91c51d06446afa14c" 181 | version = "v1.3.0" 182 | 183 | [[projects]] 184 | name = "github.com/mailru/easyjson" 185 | packages = [ 186 | "buffer", 187 | "jlexer", 188 | "jwriter" 189 | ] 190 | revision = "d5b7844b561a7bc640052f1b935f7b800330d7e0" 191 | 192 | [[projects]] 193 | name = "github.com/mijia/adoc" 194 | packages = ["."] 195 | revision = "1ef227e439ebbac803b4b9ec6f9a111edd4d6831" 196 | 197 | [[projects]] 198 | branch = "master" 199 | name = "github.com/mijia/go-generics" 200 | packages = ["."] 201 | revision = "2278a5f0de143e1d17ea16d56e7f85391bdb85a3" 202 | 203 | [[projects]] 204 | name = "github.com/mijia/sweb" 205 | packages = [ 206 | "form", 207 | "log", 208 | "server" 209 | ] 210 | revision = "4919e73674ef98e133d7f5e1d87c3963653c12b1" 211 | 212 | [[projects]] 213 | name = "github.com/paulbellamy/ratecounter" 214 | packages = ["."] 215 | revision = "66b206d2a2d00245e34b1494fcaeef5e99041ea7" 216 | 217 | [[projects]] 218 | name = "github.com/pmezard/go-difflib" 219 | packages = ["difflib"] 220 | revision = "d8ed2627bdf02c080bf22230dbb337003b7aba2d" 221 | 222 | [[projects]] 223 | branch = "calico" 224 | name = "github.com/projectcalico/go-json" 225 | packages = ["json"] 226 | revision = "6219dc7339ba20ee4c57df0a8baac62317d19cb1" 227 | 228 | [[projects]] 229 | branch = "calico" 230 | name = "github.com/projectcalico/go-yaml" 231 | packages = ["."] 232 | revision = "955bc3e451ef0c9df8b9113bf2e341139cdafab2" 233 | 234 | [[projects]] 235 | branch = "calico" 236 | name = "github.com/projectcalico/go-yaml-wrapper" 237 | packages = ["."] 238 | revision = "598e54215bee41a19677faa4f0c32acd2a87eb56" 239 | 240 | [[projects]] 241 | name = "github.com/projectcalico/libcalico-go" 242 | packages = [ 243 | "lib/api", 244 | "lib/api/unversioned", 245 | "lib/backend", 246 | "lib/backend/api", 247 | "lib/backend/compat", 248 | "lib/backend/etcd", 249 | "lib/backend/extensions", 250 | "lib/backend/k8s", 251 | "lib/backend/k8s/custom", 252 | "lib/backend/k8s/resources", 253 | "lib/backend/model", 254 | "lib/client", 255 | "lib/converter", 256 | "lib/errors", 257 | "lib/hash", 258 | "lib/hwm", 259 | "lib/ipip", 260 | "lib/net", 261 | "lib/numorstring", 262 | "lib/scope", 263 | "lib/selector", 264 | "lib/selector/parser", 265 | "lib/selector/tokenizer", 266 | "lib/validator" 267 | ] 268 | revision = "aab828ef9fd5405040c36368f866e5ec9ea314a6" 269 | version = "v1.7.3" 270 | 271 | [[projects]] 272 | name = "github.com/satori/go.uuid" 273 | packages = ["."] 274 | revision = "879c5887cd475cd7864858769793b2ceb0d44feb" 275 | version = "v1.1.0" 276 | 277 | [[projects]] 278 | name = "github.com/sirupsen/logrus" 279 | packages = ["."] 280 | revision = "ba1b36c82c5e05c4f912a88eab0dcd91a171688f" 281 | version = "v0.11.5" 282 | 283 | [[projects]] 284 | name = "github.com/spf13/pflag" 285 | packages = ["."] 286 | revision = "08b1a584251b5b62f458943640fc8ebd4d50aaa5" 287 | 288 | [[projects]] 289 | branch = "master" 290 | name = "github.com/stretchr/graceful" 291 | packages = ["."] 292 | revision = "d72b0151351a13d0421b763b88f791469c4f5dc7" 293 | 294 | [[projects]] 295 | name = "github.com/stretchr/testify" 296 | packages = ["assert"] 297 | revision = "976c720a22c8eb4eb6a0b4348ad85ad12491a506" 298 | 299 | [[projects]] 300 | name = "github.com/ugorji/go" 301 | packages = ["codec"] 302 | revision = "ded73eae5db7e7a0ef6f55aace87a2873c5d2b74" 303 | 304 | [[projects]] 305 | name = "golang.org/x/crypto" 306 | packages = ["ssh/terminal"] 307 | revision = "1351f936d976c60a0a48d728281922cf63eafb8d" 308 | 309 | [[projects]] 310 | name = "golang.org/x/net" 311 | packages = [ 312 | "context", 313 | "context/ctxhttp", 314 | "http2", 315 | "http2/hpack", 316 | "idna", 317 | "lex/httplex" 318 | ] 319 | revision = "f2499483f923065a842d38eb4c7f1927e6fc6e6d" 320 | 321 | [[projects]] 322 | name = "golang.org/x/oauth2" 323 | packages = [ 324 | ".", 325 | "google", 326 | "internal", 327 | "jws", 328 | "jwt" 329 | ] 330 | revision = "3c3a985cb79f52a3190fbc056984415ca6763d01" 331 | 332 | [[projects]] 333 | name = "golang.org/x/sys" 334 | packages = ["unix"] 335 | revision = "833a04a10549a95dc34458c195cbad61bbb6cb4d" 336 | 337 | [[projects]] 338 | name = "golang.org/x/text" 339 | packages = [ 340 | "cases", 341 | "internal", 342 | "internal/gen", 343 | "internal/tag", 344 | "internal/triegen", 345 | "internal/ucd", 346 | "language", 347 | "runes", 348 | "secure/bidirule", 349 | "secure/precis", 350 | "transform", 351 | "unicode/bidi", 352 | "unicode/cldr", 353 | "unicode/norm", 354 | "unicode/rangetable", 355 | "width" 356 | ] 357 | revision = "19e51611da83d6be54ddafce4a4af510cb3e9ea4" 358 | 359 | [[projects]] 360 | name = "google.golang.org/appengine" 361 | packages = [ 362 | ".", 363 | "internal", 364 | "internal/app_identity", 365 | "internal/base", 366 | "internal/datastore", 367 | "internal/log", 368 | "internal/modules", 369 | "internal/remote_api", 370 | "internal/urlfetch", 371 | "urlfetch" 372 | ] 373 | revision = "4f7eeb5305a4ba1966344836ba4af9996b7b4e05" 374 | 375 | [[projects]] 376 | name = "gopkg.in/go-playground/validator.v8" 377 | packages = ["."] 378 | revision = "5f57d2222ad794d0dffb07e664ea05e2ee07d60c" 379 | version = "v8.18.1" 380 | 381 | [[projects]] 382 | name = "gopkg.in/inf.v0" 383 | packages = ["."] 384 | revision = "3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4" 385 | version = "v0.9.0" 386 | 387 | [[projects]] 388 | name = "gopkg.in/tchap/go-patricia.v2" 389 | packages = ["patricia"] 390 | revision = "666120de432aea38ab06bd5c818f04f4129882c9" 391 | version = "v2.2.6" 392 | 393 | [[projects]] 394 | name = "gopkg.in/yaml.v2" 395 | packages = ["."] 396 | revision = "53feefa2559fb8dfa8d81baad31be332c97d6c77" 397 | 398 | [[projects]] 399 | name = "k8s.io/apimachinery" 400 | packages = [ 401 | "pkg/api/errors", 402 | "pkg/api/meta", 403 | "pkg/api/resource", 404 | "pkg/apimachinery", 405 | "pkg/apimachinery/announced", 406 | "pkg/apimachinery/registered", 407 | "pkg/apis/meta/v1", 408 | "pkg/apis/meta/v1/unstructured", 409 | "pkg/conversion", 410 | "pkg/conversion/queryparams", 411 | "pkg/fields", 412 | "pkg/labels", 413 | "pkg/openapi", 414 | "pkg/runtime", 415 | "pkg/runtime/schema", 416 | "pkg/runtime/serializer", 417 | "pkg/runtime/serializer/json", 418 | "pkg/runtime/serializer/protobuf", 419 | "pkg/runtime/serializer/recognizer", 420 | "pkg/runtime/serializer/streaming", 421 | "pkg/runtime/serializer/versioning", 422 | "pkg/selection", 423 | "pkg/types", 424 | "pkg/util/diff", 425 | "pkg/util/errors", 426 | "pkg/util/framer", 427 | "pkg/util/intstr", 428 | "pkg/util/json", 429 | "pkg/util/net", 430 | "pkg/util/rand", 431 | "pkg/util/runtime", 432 | "pkg/util/sets", 433 | "pkg/util/validation", 434 | "pkg/util/validation/field", 435 | "pkg/util/wait", 436 | "pkg/util/yaml", 437 | "pkg/version", 438 | "pkg/watch", 439 | "third_party/forked/golang/reflect" 440 | ] 441 | revision = "b317fa7ec8e0e7d1f77ac63bf8c3ec7b29a2a215" 442 | 443 | [[projects]] 444 | name = "k8s.io/client-go" 445 | packages = [ 446 | "discovery", 447 | "kubernetes", 448 | "kubernetes/scheme", 449 | "kubernetes/typed/apps/v1beta1", 450 | "kubernetes/typed/authentication/v1", 451 | "kubernetes/typed/authentication/v1beta1", 452 | "kubernetes/typed/authorization/v1", 453 | "kubernetes/typed/authorization/v1beta1", 454 | "kubernetes/typed/autoscaling/v1", 455 | "kubernetes/typed/autoscaling/v2alpha1", 456 | "kubernetes/typed/batch/v1", 457 | "kubernetes/typed/batch/v2alpha1", 458 | "kubernetes/typed/certificates/v1beta1", 459 | "kubernetes/typed/core/v1", 460 | "kubernetes/typed/extensions/v1beta1", 461 | "kubernetes/typed/policy/v1beta1", 462 | "kubernetes/typed/rbac/v1alpha1", 463 | "kubernetes/typed/rbac/v1beta1", 464 | "kubernetes/typed/settings/v1alpha1", 465 | "kubernetes/typed/storage/v1", 466 | "kubernetes/typed/storage/v1beta1", 467 | "pkg/api", 468 | "pkg/api/install", 469 | "pkg/api/v1", 470 | "pkg/apis/apps", 471 | "pkg/apis/apps/install", 472 | "pkg/apis/apps/v1beta1", 473 | "pkg/apis/authentication", 474 | "pkg/apis/authentication/install", 475 | "pkg/apis/authentication/v1", 476 | "pkg/apis/authentication/v1beta1", 477 | "pkg/apis/authorization", 478 | "pkg/apis/authorization/install", 479 | "pkg/apis/authorization/v1", 480 | "pkg/apis/authorization/v1beta1", 481 | "pkg/apis/autoscaling", 482 | "pkg/apis/autoscaling/install", 483 | "pkg/apis/autoscaling/v1", 484 | "pkg/apis/autoscaling/v2alpha1", 485 | "pkg/apis/batch", 486 | "pkg/apis/batch/install", 487 | "pkg/apis/batch/v1", 488 | "pkg/apis/batch/v2alpha1", 489 | "pkg/apis/certificates", 490 | "pkg/apis/certificates/install", 491 | "pkg/apis/certificates/v1beta1", 492 | "pkg/apis/extensions", 493 | "pkg/apis/extensions/install", 494 | "pkg/apis/extensions/v1beta1", 495 | "pkg/apis/policy", 496 | "pkg/apis/policy/install", 497 | "pkg/apis/policy/v1beta1", 498 | "pkg/apis/rbac", 499 | "pkg/apis/rbac/install", 500 | "pkg/apis/rbac/v1alpha1", 501 | "pkg/apis/rbac/v1beta1", 502 | "pkg/apis/settings", 503 | "pkg/apis/settings/install", 504 | "pkg/apis/settings/v1alpha1", 505 | "pkg/apis/storage", 506 | "pkg/apis/storage/install", 507 | "pkg/apis/storage/v1", 508 | "pkg/apis/storage/v1beta1", 509 | "pkg/util", 510 | "pkg/util/parsers", 511 | "pkg/version", 512 | "plugin/pkg/client/auth", 513 | "plugin/pkg/client/auth/gcp", 514 | "plugin/pkg/client/auth/oidc", 515 | "rest", 516 | "rest/watch", 517 | "third_party/forked/golang/template", 518 | "tools/auth", 519 | "tools/cache", 520 | "tools/clientcmd", 521 | "tools/clientcmd/api", 522 | "tools/clientcmd/api/latest", 523 | "tools/clientcmd/api/v1", 524 | "tools/metrics", 525 | "transport", 526 | "util/cert", 527 | "util/clock", 528 | "util/flowcontrol", 529 | "util/homedir", 530 | "util/integer", 531 | "util/jsonpath" 532 | ] 533 | revision = "4a3ab2f5be5177366f8206fd79ce55ca80e417fa" 534 | 535 | [solve-meta] 536 | analyzer-name = "dep" 537 | analyzer-version = 1 538 | inputs-digest = "12dec0c24e3b4f118d64bdf0caacf72342cbeb3877213dec2d074a935274defe" 539 | solver-name = "gps-cdcl" 540 | solver-version = 1 541 | -------------------------------------------------------------------------------- /engine/specs.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/laincloud/deployd/storage" 9 | "github.com/mijia/adoc" 10 | "github.com/mijia/go-generics" 11 | "github.com/mijia/sweb/log" 12 | ) 13 | 14 | const ( 15 | kLainDeploydRootKey = "/lain/deployd" 16 | kLainConstraintKey = "constraints" 17 | kLainNotifyKey = "notifies" 18 | kLainPodGroupKey = "pod_groups" 19 | kLainDependencyKey = "depends" 20 | kLainSpecKey = "specs" 21 | kLainPodKey = "pods" 22 | kLainNodesKey = "nodes" 23 | kLainLastPodSpecKey = "last_spec" 24 | kLainPgOpingKey = "operating" 25 | 26 | kLainLabelPrefix = "cc.bdp.lain.deployd" 27 | kLainLogVolumePath = "/lain/logs" 28 | 29 | MinPodSetupTime = 0 30 | MaxPodSetupTime = 300 31 | 32 | MinPodKillTimeout = 10 33 | MaxPodKillTimeout = 120 34 | 35 | ) 36 | 37 | var ( 38 | kLainVolumeRoot = "/data/lain/volumes" 39 | kLainCloudVolumeRoot = "/data/lain/cloud-volumes" 40 | ) 41 | 42 | type ImSpec struct { 43 | Name string 44 | Namespace string 45 | Version int 46 | CreatedAt time.Time 47 | UpdatedAt time.Time 48 | } 49 | 50 | type ContainerLabel struct { 51 | Name string 52 | Namespace string 53 | InstanceNo int 54 | Version int 55 | DriftCount int 56 | ContainerIndex int 57 | Annotation string 58 | } 59 | 60 | func configSpecsVars(store storage.Store) error { 61 | if v, err := store.GetRaw(EtcdCloudVolumeRootKey); err == nil { 62 | kLainCloudVolumeRoot = v 63 | } 64 | 65 | if v, err := store.GetRaw(EtcdVolumeRootKey); err == nil { 66 | kLainVolumeRoot = v 67 | } 68 | log.Debugf("cloud_volume_root: %s, volumes_root: %s", kLainCloudVolumeRoot, kLainVolumeRoot) 69 | return nil 70 | } 71 | 72 | func (label ContainerLabel) NameAffinity() string { 73 | return fmt.Sprintf("affinity:%s.pg_name!=~%s", kLainLabelPrefix, label.Name) 74 | } 75 | 76 | func (label ContainerLabel) Label2Maps() map[string]string { 77 | labelMaps := make(map[string]string) 78 | labelMaps[kLainLabelPrefix+".pg_name"] = label.Name 79 | labelMaps[kLainLabelPrefix+".pg_namespace"] = label.Namespace 80 | labelMaps[kLainLabelPrefix+".instance_no"] = fmt.Sprintf("%d", label.InstanceNo) 81 | labelMaps[kLainLabelPrefix+".version"] = fmt.Sprintf("%d", label.Version) 82 | labelMaps[kLainLabelPrefix+".drift_count"] = fmt.Sprintf("%d", label.DriftCount) 83 | labelMaps[kLainLabelPrefix+".container_index"] = fmt.Sprintf("%d", label.ContainerIndex) 84 | labelMaps[kLainLabelPrefix+".annotation"] = label.Annotation 85 | return labelMaps 86 | } 87 | 88 | func (label *ContainerLabel) FromMaps(m map[string]string) bool { 89 | var err error 90 | hasError := false 91 | label.Name = m[kLainLabelPrefix+".pg_name"] 92 | hasError = hasError || label.Name == "" 93 | label.Namespace = m[kLainLabelPrefix+".pg_namespace"] 94 | label.InstanceNo, err = strconv.Atoi(m[kLainLabelPrefix+".instance_no"]) 95 | hasError = hasError || err != nil 96 | label.Version, err = strconv.Atoi(m[kLainLabelPrefix+".version"]) 97 | hasError = hasError || err != nil 98 | label.DriftCount, err = strconv.Atoi(m[kLainLabelPrefix+".drift_count"]) 99 | hasError = hasError || err != nil 100 | label.ContainerIndex, err = strconv.Atoi(m[kLainLabelPrefix+".container_index"]) 101 | hasError = hasError || err != nil 102 | label.Annotation = m[kLainLabelPrefix+".annotation"] 103 | return !hasError 104 | } 105 | 106 | const ( 107 | CloudVolumeSingleMode = "single" 108 | CloudVolumeMultiMode = "multi" 109 | ) 110 | 111 | type CloudVolumeSpec struct { 112 | Type string 113 | Dirs []string 114 | } 115 | 116 | func (s CloudVolumeSpec) VerifyParams() bool { 117 | verify := s.Type == CloudVolumeMultiMode || 118 | s.Type == CloudVolumeSingleMode 119 | 120 | return verify 121 | } 122 | 123 | func (s CloudVolumeSpec) Clone() CloudVolumeSpec { 124 | newSpec := s 125 | newSpec.Type = s.Type 126 | newSpec.Dirs = generics.Clone_StringSlice(s.Dirs) 127 | return newSpec 128 | } 129 | 130 | func (s CloudVolumeSpec) Equals(o CloudVolumeSpec) bool { 131 | return s.Type == o.Type && 132 | generics.Equal_StringSlice(s.Dirs, o.Dirs) 133 | } 134 | 135 | type HealthCnfOptions struct { 136 | Interval int `json:"interval"` 137 | Timeout int `json:"timeout"` 138 | Retries int `json:"retries"` 139 | } 140 | 141 | func (hco HealthCnfOptions) Equals(cp HealthCnfOptions) bool { 142 | return hco.Interval == cp.Interval && 143 | hco.Timeout == cp.Timeout && 144 | hco.Retries == cp.Retries 145 | } 146 | 147 | type HealthConfig struct { 148 | Cmd string `json:"cmd"` 149 | Options HealthCnfOptions `json:"options"` 150 | } 151 | 152 | func (hc HealthConfig) Equals(cp HealthConfig) bool { 153 | return hc.Cmd == cp.Cmd && 154 | hc.Options.Equals(cp.Options) 155 | } 156 | 157 | func (hc HealthConfig) FetchOption() HealthCnfOptions { 158 | interval := DefaultHealthInterval 159 | timeout := DefaultHealthTimeout 160 | retries := DefaultHealthRetries 161 | options := hc.Options 162 | if options.Interval > interval { 163 | interval = options.Interval 164 | } 165 | if options.Timeout > timeout { 166 | timeout = options.Timeout 167 | } 168 | if options.Retries > retries { 169 | retries = options.Retries 170 | } 171 | return HealthCnfOptions{ 172 | Interval: interval, 173 | Timeout: timeout, 174 | Retries: retries, 175 | } 176 | } 177 | 178 | type ContainerSpec struct { 179 | ImSpec 180 | Image string 181 | Env []string 182 | User string 183 | WorkingDir string 184 | DnsSearch []string 185 | Volumes []string // a stateful flag 186 | SystemVolumes []string // not a stateful flag, every node has system volumes 187 | CloudVolumes []CloudVolumeSpec 188 | Command []string 189 | Entrypoint []string 190 | CpuLimit int 191 | MemoryLimit int64 192 | Expose int 193 | LogConfig adoc.LogConfig 194 | } 195 | 196 | func (s ContainerSpec) Clone() ContainerSpec { 197 | newSpec := s 198 | newSpec.Env = generics.Clone_StringSlice(s.Env) 199 | newSpec.Volumes = generics.Clone_StringSlice(s.Volumes) 200 | newSpec.SystemVolumes = generics.Clone_StringSlice(s.SystemVolumes) 201 | newSpec.Command = generics.Clone_StringSlice(s.Command) 202 | newSpec.DnsSearch = generics.Clone_StringSlice(s.DnsSearch) 203 | if s.Entrypoint == nil { 204 | newSpec.Entrypoint = nil 205 | } else { 206 | newSpec.Entrypoint = generics.Clone_StringSlice(s.Entrypoint) 207 | } 208 | newSpec.LogConfig.Type = s.LogConfig.Type 209 | newSpec.LogConfig.Config = generics.Clone_StringStringMap(s.LogConfig.Config) 210 | 211 | for i := range s.CloudVolumes { 212 | newSpec.CloudVolumes[i] = s.CloudVolumes[i].Clone() 213 | } 214 | return newSpec 215 | } 216 | 217 | func (s ContainerSpec) VerifyParams() bool { 218 | verify := s.Image != "" && 219 | s.CpuLimit >= 0 && 220 | s.MemoryLimit >= 0 && 221 | s.Expose >= 0 222 | if !verify { 223 | return false 224 | } 225 | for _, cvSpec := range s.CloudVolumes { 226 | if !cvSpec.VerifyParams() { 227 | return false 228 | } 229 | } 230 | return true 231 | } 232 | 233 | func (s ContainerSpec) Equals(o ContainerSpec) bool { 234 | if (s.Entrypoint == nil && o.Entrypoint != nil) || (s.Entrypoint != nil && o.Entrypoint == nil) { 235 | return false 236 | } 237 | 238 | return s.Name == o.Name && 239 | s.Image == o.Image && 240 | generics.Equal_StringSlice(s.Env, o.Env) && 241 | generics.Equal_StringSlice(s.Command, o.Command) && 242 | generics.Equal_StringSlice(s.DnsSearch, o.DnsSearch) && 243 | s.CpuLimit == o.CpuLimit && 244 | s.MemoryLimit == o.MemoryLimit && 245 | s.Expose == o.Expose && 246 | s.User == o.User && 247 | s.WorkingDir == o.WorkingDir && 248 | generics.Equal_StringSlice(s.Volumes, o.Volumes) && 249 | generics.Equal_StringSlice(s.SystemVolumes, o.SystemVolumes) && 250 | generics.Equal_StringSlice(s.Entrypoint, o.Entrypoint) && 251 | s.LogConfig.Type == o.LogConfig.Type && 252 | generics.Equal_StringStringMap(s.LogConfig.Config, o.LogConfig.Config) 253 | } 254 | 255 | func NewContainerSpec(image string) ContainerSpec { 256 | spec := ContainerSpec{ 257 | Image: image, 258 | } 259 | spec.Version = 1 260 | spec.CreatedAt = time.Now() 261 | spec.UpdatedAt = spec.CreatedAt 262 | return spec 263 | } 264 | 265 | type DependencyPolicy int 266 | 267 | const ( 268 | DependencyNamespaceLevel = iota 269 | DependencyNodeLevel 270 | ) 271 | 272 | type Dependency struct { 273 | PodName string 274 | Policy DependencyPolicy 275 | } 276 | 277 | func (d Dependency) Clone() Dependency { 278 | return d 279 | } 280 | 281 | type PodPrevState struct { 282 | NodeName string 283 | IPs []string 284 | } 285 | 286 | func NewPodPrevState(length int) PodPrevState { 287 | return PodPrevState{ 288 | NodeName: "", 289 | IPs: make([]string, length), 290 | } 291 | } 292 | 293 | func (pps PodPrevState) Clone() PodPrevState { 294 | newState := pps 295 | newState.IPs = make([]string, len(pps.IPs)) 296 | copy(newState.IPs, pps.IPs) 297 | return newState 298 | } 299 | 300 | type PodSpec struct { 301 | ImSpec 302 | Network string 303 | Containers []ContainerSpec 304 | Filters []string // for cluster scheduling 305 | Labels map[string]string 306 | Dependencies []Dependency 307 | Annotation string 308 | Stateful bool 309 | SetupTime int 310 | KillTimeout int 311 | PrevState PodPrevState 312 | HealthConfig HealthConfig 313 | } 314 | 315 | func (s PodSpec) GetSetupTime() int { 316 | if s.SetupTime < MinPodSetupTime { 317 | return MinPodSetupTime 318 | } else if s.SetupTime > MaxPodSetupTime { 319 | return MaxPodSetupTime 320 | } 321 | return s.SetupTime 322 | } 323 | 324 | func (s PodSpec) GetKillTimeout() int { 325 | if s.KillTimeout < MinPodKillTimeout { 326 | return MinPodKillTimeout 327 | } else if s.KillTimeout > MaxPodKillTimeout { 328 | return MaxPodKillTimeout 329 | } 330 | return s.KillTimeout 331 | } 332 | 333 | func (s PodSpec) String() string { 334 | return fmt.Sprintf("Pod[name=%s, version=%d, depends=%+v, stateful=%v, #containers=%d]", 335 | s.Name, s.Version, s.Dependencies, s.Stateful, len(s.Containers)) 336 | } 337 | 338 | func (s PodSpec) Clone() PodSpec { 339 | newSpec := s 340 | newSpec.Filters = generics.Clone_StringSlice(s.Filters) 341 | newSpec.Labels = generics.Clone_StringStringMap(s.Labels) 342 | newSpec.Containers = make([]ContainerSpec, len(s.Containers)) 343 | newSpec.PrevState = s.PrevState.Clone() 344 | for i := range s.Containers { 345 | newSpec.Containers[i] = s.Containers[i].Clone() 346 | } 347 | newSpec.Dependencies = make([]Dependency, len(s.Dependencies)) 348 | for i := range s.Dependencies { 349 | newSpec.Dependencies[i] = s.Dependencies[i].Clone() 350 | } 351 | newSpec.HealthConfig = s.HealthConfig 352 | return newSpec 353 | } 354 | 355 | func (s PodSpec) VerifyParams() bool { 356 | verify := s.Name != "" && s.Namespace != "" && 357 | len(s.Containers) > 0 358 | if !verify { 359 | return false 360 | } 361 | for _, cSpec := range s.Containers { 362 | if !cSpec.VerifyParams() { 363 | return false 364 | } 365 | } 366 | return true 367 | } 368 | 369 | func (s PodSpec) IsHardStateful() bool { 370 | return s.Stateful 371 | } 372 | 373 | func (s PodSpec) IsStateful() bool { 374 | return s.HasVolumes() || s.Stateful 375 | } 376 | 377 | func (s PodSpec) HasVolumes() bool { 378 | for _, container := range s.Containers { 379 | if len(container.Volumes) == 1 && container.Volumes[0] == kLainLogVolumePath { 380 | continue 381 | } 382 | if len(container.Volumes) > 0 { 383 | return true 384 | } 385 | } 386 | return false 387 | } 388 | 389 | func (s PodSpec) Equals(o PodSpec) bool { 390 | if len(s.Containers) != len(o.Containers) { 391 | return false 392 | } 393 | for i := range s.Containers { 394 | if !s.Containers[i].Equals(o.Containers[i]) { 395 | return false 396 | } 397 | } 398 | if len(s.Dependencies) != len(o.Dependencies) { 399 | return false 400 | } 401 | for i := range s.Dependencies { 402 | if s.Dependencies[i] != o.Dependencies[i] { 403 | return false 404 | } 405 | } 406 | return s.Name == o.Name && 407 | s.Namespace == o.Namespace && 408 | s.Version == o.Version && 409 | s.Annotation == o.Annotation && 410 | s.Stateful == o.Stateful && 411 | generics.Equal_StringSlice(s.Filters, o.Filters) && 412 | generics.Equal_StringStringMap(s.Labels, o.Labels) && 413 | s.KillTimeout == o.KillTimeout && 414 | s.SetupTime == o.SetupTime && 415 | s.HealthConfig.Equals(o.HealthConfig) 416 | } 417 | 418 | func (s PodSpec) Merge(o PodSpec) PodSpec { 419 | // deal with params keeping original 420 | if len(s.Containers) > 0 { 421 | sc := s.Containers[0] 422 | for i, _ := range o.Containers { 423 | if i >= len(s.Containers) { 424 | sc = s.Containers[0] 425 | } else { 426 | sc = s.Containers[i] 427 | } 428 | if o.Containers[i].CpuLimit == 0 { 429 | o.Containers[i].CpuLimit = sc.CpuLimit 430 | } 431 | if o.Containers[i].MemoryLimit == 0 { 432 | o.Containers[i].MemoryLimit = sc.MemoryLimit 433 | } 434 | } 435 | } 436 | s.Containers = o.Containers 437 | s.Dependencies = o.Dependencies 438 | s.Filters = o.Filters 439 | s.Labels = o.Labels 440 | s.Annotation = o.Annotation 441 | s.Stateful = o.Stateful 442 | s.Version += 1 443 | s.UpdatedAt = time.Now() 444 | s.PrevState = o.PrevState 445 | s.SetupTime = o.SetupTime 446 | s.KillTimeout = o.KillTimeout 447 | s.HealthConfig = o.HealthConfig 448 | return s 449 | } 450 | 451 | func NewPodSpec(containerSpec ContainerSpec, otherSpecs ...ContainerSpec) PodSpec { 452 | cSpecs := make([]ContainerSpec, 1+len(otherSpecs)) 453 | cSpecs[0] = containerSpec 454 | for i, cs := range otherSpecs { 455 | cSpecs[i+1] = cs 456 | } 457 | spec := PodSpec{ 458 | Containers: cSpecs, 459 | PrevState: NewPodPrevState(len(otherSpecs) + 1), 460 | } 461 | spec.Version = 1 462 | spec.CreatedAt = time.Now() 463 | spec.UpdatedAt = spec.CreatedAt 464 | return spec 465 | } 466 | 467 | type RestartPolicy int 468 | 469 | const ( 470 | RestartPolicyNever = iota 471 | RestartPolicyAlways 472 | RestartPolicyOnFail 473 | ) 474 | 475 | func (rp RestartPolicy) String() string { 476 | switch rp { 477 | case RestartPolicyNever: 478 | return "RestartPolicyNever" 479 | case RestartPolicyAlways: 480 | return "RestartPolicyAlways" 481 | case RestartPolicyOnFail: 482 | return "RestartPolicyOnFail" 483 | default: 484 | return "Unknown RestartPolicy" 485 | } 486 | } 487 | 488 | type PodGroupPrevState struct { 489 | Nodes []string 490 | // we think a instance only have one ip, as now a instance only have one container. 491 | IPs []string 492 | } 493 | 494 | func (pgps PodGroupPrevState) Clone() PodGroupPrevState { 495 | newState := PodGroupPrevState{ 496 | Nodes: make([]string, len(pgps.Nodes)), 497 | IPs: make([]string, len(pgps.Nodes)), 498 | } 499 | copy(newState.Nodes, pgps.Nodes) 500 | copy(newState.IPs, pgps.IPs) 501 | return newState 502 | } 503 | 504 | func (pgps PodGroupPrevState) Reset(instanceNo int) PodGroupPrevState { 505 | newState := PodGroupPrevState{ 506 | Nodes: make([]string, instanceNo), 507 | IPs: make([]string, instanceNo), 508 | } 509 | copy(newState.Nodes, pgps.Nodes) 510 | copy(newState.IPs, pgps.IPs) 511 | return newState 512 | } 513 | 514 | func (pgps PodGroupPrevState) Length() int { 515 | if pgps.Nodes == nil { 516 | return 0 517 | } 518 | return len(pgps.Nodes) 519 | } 520 | 521 | type PodGroupSpec struct { 522 | ImSpec 523 | Pod PodSpec 524 | NumInstances int 525 | RestartPolicy RestartPolicy 526 | } 527 | 528 | func (spec PodGroupSpec) String() string { 529 | return fmt.Sprintf("PodGroup[name=%s, version=%d, #instances=%d, restart=%s]", 530 | spec.Name, spec.Version, spec.NumInstances, spec.RestartPolicy) 531 | } 532 | 533 | func (spec PodGroupSpec) Clone() PodGroupSpec { 534 | newSpec := spec 535 | newSpec.Pod = spec.Pod.Clone() 536 | return newSpec 537 | } 538 | 539 | func (spec PodGroupSpec) Equals(o PodGroupSpec) bool { 540 | return spec.Name == o.Name && 541 | spec.Namespace == o.Namespace && 542 | spec.Version == o.Version && 543 | spec.Pod.Equals(o.Pod) && 544 | spec.NumInstances == o.NumInstances && 545 | spec.RestartPolicy == o.RestartPolicy 546 | } 547 | 548 | func (spec PodGroupSpec) VerifyParams() bool { 549 | verify := spec.Name != "" && 550 | spec.Namespace != "" && 551 | spec.NumInstances >= 0 552 | if !verify { 553 | return false 554 | } 555 | return spec.Pod.VerifyParams() 556 | } 557 | 558 | func NewPodGroupSpec(name string, namespace string, podSpec PodSpec, numInstances int) PodGroupSpec { 559 | spec := PodGroupSpec{ 560 | Pod: podSpec, 561 | NumInstances: numInstances, 562 | } 563 | spec.Name = name 564 | spec.Namespace = namespace 565 | spec.Version = 1 566 | spec.CreatedAt = time.Now() 567 | spec.UpdatedAt = spec.CreatedAt 568 | spec.Pod.ImSpec = spec.ImSpec 569 | return spec 570 | } 571 | --------------------------------------------------------------------------------