├── .github └── main.workflow ├── .gitignore ├── cmd ├── cmd.go ├── local.go └── ssh.go ├── iptables └── iptables.go ├── cmt.go ├── LICENSE ├── magic_migrator.sh ├── README.md ├── validate └── validate.go └── migrate └── migrate.go /.github/main.workflow: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | cmt 27 | runtime.json 28 | config.json 29 | rootfs 30 | cpuinfo.img 31 | descriptors.json 32 | -------------------------------------------------------------------------------- /cmd/cmd.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | ) 7 | 8 | type Cmd interface { 9 | Run(name string, args ...string) (string, string, error) 10 | Start(name string, args ...string) (Cmd, error) 11 | Wait() error 12 | Output(name string, args ...string) (string, string, error) 13 | URL(path string) *url.URL 14 | } 15 | 16 | func Scp(src, dest *url.URL) error { 17 | scpCmd := NewLocal() 18 | _, _, err := scpCmd.Run("scp", "-3", formatCopyURL(src), formatCopyURL(dest)) 19 | 20 | return err 21 | } 22 | 23 | func formatCopyURL(u *url.URL) string { 24 | if u.Host == "" { 25 | return u.String() 26 | } 27 | return fmt.Sprintf("%s@%s:%s", u.User.Username(), u.Host, u.Path) 28 | } 29 | -------------------------------------------------------------------------------- /iptables/iptables.go: -------------------------------------------------------------------------------- 1 | package iptables 2 | 3 | import "strings" 4 | 5 | // This code can be improved a lot! 6 | // Please don't consider this production ready! 7 | func Diff(a, b string) []string { 8 | alines := strings.Split(a, "\n") 9 | blines := strings.Split(b, "\n") 10 | 11 | var diff []string 12 | 13 | for _, aline := range alines { 14 | // Only consider -A lines which are the actual rules 15 | if !strings.HasPrefix(strings.TrimSpace(aline), "-A") { 16 | continue 17 | } 18 | match := false 19 | for _, bline := range blines { 20 | if aline == bline { 21 | match = true 22 | break 23 | } 24 | } 25 | if !match { 26 | diff = append(diff, aline) 27 | } 28 | } 29 | 30 | return diff 31 | } 32 | -------------------------------------------------------------------------------- /cmt.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "github.com/codegangsta/cli" 8 | "github.com/marcosnils/cmt/migrate" 9 | "github.com/marcosnils/cmt/validate" 10 | ) 11 | 12 | const ( 13 | version = "0.1" 14 | usage = `Container Migration Tool 15 | 16 | cmt is a Docker Global Hackday #3 project. 17 | The purpose of the project is to create an external command line tool 18 | that can be either used with docker or runC which helps on the task to live migrate 19 | containers between different hosts by performing pre-migration validations 20 | and allowing to auto-discover suitable target hosts.` 21 | ) 22 | 23 | func main() { 24 | app := cli.NewApp() 25 | app.Name = "cmt" 26 | app.Usage = usage 27 | app.Version = version 28 | app.EnableBashCompletion = true 29 | 30 | app.Commands = []cli.Command{ 31 | migrate.Command, 32 | validate.Command, 33 | } 34 | 35 | if err := app.Run(os.Args); err != nil { 36 | log.Fatal(err) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Marcos Nils, Jonathan Leibiusky and Gabriel Eisbruch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /cmd/local.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "log" 7 | "net/url" 8 | "os/exec" 9 | ) 10 | 11 | type LocalCmd struct { 12 | currentCommand *exec.Cmd 13 | } 14 | 15 | func NewLocal() Cmd { 16 | return &LocalCmd{} 17 | } 18 | 19 | func (c *LocalCmd) Run(name string, args ...string) (string, string, error) { 20 | command := exec.Command(name, args...) 21 | 22 | var stdout bytes.Buffer 23 | var stderr bytes.Buffer 24 | 25 | command.Stdout = &stdout 26 | command.Stderr = &stderr 27 | 28 | err := command.Run() 29 | 30 | return stdout.String(), stderr.String(), err 31 | } 32 | 33 | func (c *LocalCmd) Start(name string, args ...string) (Cmd, error) { 34 | command := exec.Command(name, args...) 35 | c.currentCommand = command 36 | return c, command.Start() 37 | } 38 | 39 | func (c *LocalCmd) Wait() error { 40 | if c.currentCommand == nil { 41 | return errors.New("Start needs to be called before wait") 42 | 43 | } 44 | defer func() { 45 | // Clear out current command 46 | c.currentCommand = nil 47 | 48 | }() 49 | return c.currentCommand.Wait() 50 | } 51 | 52 | func (c *LocalCmd) Output(name string, args ...string) (string, string, error) { 53 | log.Println(name, args) 54 | stdout, stderr, err := c.Run(name, args...) 55 | log.Println(stdout, stderr) 56 | return stdout, stderr, err 57 | } 58 | 59 | func (c *LocalCmd) URL(path string) *url.URL { 60 | return &url.URL{ 61 | Path: path, 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /magic_migrator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | MACHINE_FROM=$1 4 | shift 5 | CONTAINER_ID_FROM=$1 6 | shift 7 | MACHINE_TO=$1 8 | shift 9 | CONTAINER_ID_TO=$1 10 | shift 11 | PRE_DUMP=$1 12 | 13 | 14 | function dump() { 15 | ssh $MACHINE_TO "mkdir -p $CONTAINER_ID_TO/../" 16 | ssh $MACHINE_FROM < /tmp/log&" 29 | } 30 | 31 | function predump() { 32 | ssh $MACHINE_TO "mkdir -p $CONTAINER_ID_TO/../" 33 | ssh $MACHINE_FROM < /tmp/log&" 53 | } 54 | 55 | if [[ -z "$PRE_DUMP" ]]; then 56 | dump 57 | else 58 | predump 59 | fi 60 | -------------------------------------------------------------------------------- /cmd/ssh.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "io/ioutil" 8 | "log" 9 | "net" 10 | "net/url" 11 | "os" 12 | "strings" 13 | 14 | "golang.org/x/crypto/ssh" 15 | "golang.org/x/crypto/ssh/agent" 16 | ) 17 | 18 | type SSHCmd struct { 19 | config ssh.ClientConfig 20 | client *ssh.Client 21 | host string 22 | connected bool 23 | currentSession *ssh.Session 24 | } 25 | 26 | func NewSSH(user, host string) *SSHCmd { 27 | c := SSHCmd{} 28 | c.config.User = user 29 | chunks := strings.Split(host, ":") 30 | c.host = chunks[0] 31 | return &c 32 | } 33 | 34 | func (r *SSHCmd) UseAgent() error { 35 | if r.connected { 36 | return fmt.Errorf("Cannot add authentication methods while being connected") 37 | } 38 | sshAgent, err := net.Dial("unix", os.Getenv("SSH_AUTH_SOCK")) 39 | if err != nil { 40 | return err 41 | } 42 | r.config.Auth = append(r.config.Auth, ssh.PublicKeysCallback(agent.NewClient(sshAgent).Signers)) 43 | 44 | return nil 45 | } 46 | 47 | func (r *SSHCmd) UsePrivateKey(path string) error { 48 | if r.connected { 49 | return fmt.Errorf("Cannot add authentication methods while being connected") 50 | } 51 | content, err := ioutil.ReadFile(path) 52 | if err != nil { 53 | return err 54 | } 55 | privateKey, err := ssh.ParseRawPrivateKey(content) 56 | if err != nil { 57 | return err 58 | } 59 | signer, err := ssh.NewSignerFromKey(privateKey) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | authMethod := ssh.PublicKeys(signer) 65 | r.config.Auth = append(r.config.Auth, authMethod) 66 | 67 | return nil 68 | } 69 | 70 | func (r *SSHCmd) connect() error { 71 | if r.connected { 72 | return nil 73 | } 74 | 75 | client, err := ssh.Dial("tcp", fmt.Sprintf("%s:%d", r.host, 22), &r.config) 76 | if err != nil { 77 | return err 78 | } 79 | 80 | r.client = client 81 | r.connected = true 82 | 83 | return nil 84 | } 85 | 86 | func (r *SSHCmd) Run(name string, args ...string) (string, string, error) { 87 | if !r.connected { 88 | err := r.connect() 89 | if err != nil { 90 | return "", "", err 91 | } 92 | } 93 | 94 | session, err := r.client.NewSession() 95 | if err != nil { 96 | return "", "", err 97 | } 98 | defer session.Close() 99 | 100 | var stdout bytes.Buffer 101 | var stderr bytes.Buffer 102 | 103 | session.Stdout = &stdout 104 | session.Stderr = &stderr 105 | 106 | err = session.Run(fmt.Sprintf("%s %s", name, strings.Join(args, " "))) 107 | 108 | return stdout.String(), stderr.String(), err 109 | } 110 | 111 | func (r *SSHCmd) Start(name string, args ...string) (Cmd, error) { 112 | if !r.connected { 113 | err := r.connect() 114 | if err != nil { 115 | return nil, err 116 | } 117 | } 118 | 119 | session, err := r.client.NewSession() 120 | if err != nil { 121 | return nil, err 122 | } 123 | r.currentSession = session 124 | 125 | return r, session.Start(fmt.Sprintf("%s %s", name, strings.Join(args, " "))) 126 | } 127 | 128 | func (r *SSHCmd) Wait() error { 129 | if r.currentSession == nil { 130 | return errors.New("Start needs to be called before wait") 131 | } 132 | 133 | defer func() { 134 | r.currentSession.Close() 135 | r.currentSession = nil 136 | }() 137 | 138 | return r.currentSession.Wait() 139 | } 140 | 141 | func (r *SSHCmd) Output(name string, args ...string) (string, string, error) { 142 | log.Println(name, args) 143 | stdout, stderr, err := r.Run(name, args...) 144 | log.Println(stdout, stderr) 145 | return stdout, stderr, err 146 | } 147 | 148 | func (c *SSHCmd) URL(path string) *url.URL { 149 | return &url.URL{ 150 | User: url.User(c.config.User), 151 | Host: c.host, 152 | Path: path, 153 | } 154 | 155 | } 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cmt 2 | Container migration tool for the [Docker Global HackDay #3](https://www.docker.com/community/hackathon?mkt_tok=3RkMMJWWfF9wsRonuqTMZKXonjHpfsX57ugoXqe0lMI/0ER3fOvrPUfGjI4AT8dkI%2BSLDwEYGJlv6SgFQ7LMMaZq1rgMXBk%3D) 3 | 4 | https://www.youtube.com/watch?v=pwf0-_cs6U4 5 | 6 | 7 | ## Description 8 | 9 | Checkpoint & Restore is still a feature which is not generically available to container users. Certain understanding about how it works is needed and it’s most likely that users get errors when trying to perform CR due to some restrictions or differences between the source and the target host. The purpose of the project is to create an external command line tool that can be either used with docker or runC which helps on the task to live migrate containers between different hosts by performing pre-migration validations and allowing to auto-discover suitable target hosts. 10 | 11 | ## IMPORTANT!! 12 | 13 | This project uses custom patched versions of ~~[CRIU](https://github.com/marcosnils/criu)~~ and [runC](https://github.com/marcosnils/runc/tree/pre_dump) to work. It's important to install these specific versions for CMT to work. CRIU patch has been already proposed to upstream, we hold on runC on the other hand because we needed to implement it fast and we're not sure of any possible impact on the project. 14 | 15 | *Update 09/21/15*: CRIU patch as been merged to upstream [here](https://github.com/xemul/criu/commit/e3f900f95429bc0447d8e3cff3cbb2e0a19f8d23). Master version should work with CMT. 16 | 17 | 18 | ## Usage 19 | 20 | `go get github.com/marcosnils/cmt` 21 | 22 | `cmt --help` should list all possible CMT commands and options 23 | 24 | ## Authentication 25 | 26 | CMT uses ssh-agent authentication when trying to communicate between hosts. Make sure your agent has the corresponding credentials before trying to perform any action. 27 | 28 | Instruction about how to setup ssh-agent can be found here: http://sshkeychain.sourceforge.net/mirrors/SSH-with-Keys-HOWTO/SSH-with-Keys-HOWTO-6.html 29 | 30 | 31 | ## Design / performance 32 | 33 | CMT was thought to be as portable and lightweight as possible. As it relies on ssh heaviliy for remote communication we also took into account SSH session optimizations and concurrent executions 34 | to speed up the whole process. 35 | 36 | It was also designed with the idea to be easily adaptable to any underlying mechanism of C/R. This means that when Docker finally implements C/R natively, CMT can take care of all the necessary 37 | heavy duty to perform container migration (image layer diffs included). 38 | 39 | ## Hooks 40 | 41 | CMT supports 3 kind of hooks. A hook is any command that you provide and that CMT will run when reaching some specific state in the migration process. 42 | The supported hooks are: 43 | - Pre-restore: which is executed right before restoring the container 44 | on the destination host. 45 | - Post-restore: which is executed after successfully restoring the 46 | container on the destination host. 47 | - Failed-restore: which is executed after a failing to restore the 48 | container on the destination host. 49 | 50 | For example: 51 | ``` 52 | cmt migrate --hook-pre-restore "echo pre restore" --hook-post-restore "echo post restore" --hook-failed-restore "echo failed restore" 53 | ``` 54 | 55 | There are some very useful scenarios for this. For example in AWS you could use the pre-restore hook to move an Elastic Network Interface so the destination has the same IP address of the source. 56 | 57 | ## FAQ 58 | 59 | ### What kind of validations does CMT do? 60 | 61 | - Binary existence (runC, criu) 62 | - Binary version matching 63 | - Destination host free memory 64 | - Kernel capabilities to perform c/r (`criu check --ms`) 65 | - CPU capability problem (http://criu.org/Cpuinfo) 66 | 67 | 68 | ### Can CMT perform TCP live migration without end-user disconnection? 69 | 70 | Yes, although all the heavy work is done by CRIU, CMT provides some help when migrating TCP connections to avoid end-user disconnect. 71 | We've accomplished this in AWS using ENI and VPC peering connections. 72 | 73 | (*Hope to find the time to demo this soon*) 74 | 75 | ### Is it necessary to perform validations each time when migrating? 76 | 77 | No, validations are performed by default as a security measure, using `--force` flag bypasses them. 78 | 79 | ### What does pre-dump exactly do? 80 | 81 | Please refer to the official CRIU documentation for iterative migration specifics. http://criu.org/Iterative_migration 82 | 83 | ### What does downtime mean? 84 | 85 | Refer to the CRIU documentation for downtime/freeze time. (http://criu.org/) 86 | 87 | ## TODO 88 | 89 | Redo this project as it should be done (tests please!!). 90 | 91 | We do have some [issues](https://github.com/marcosnils/cmt/issues) we though about implementing but we couldn't find the time. 92 | 93 | 94 | ## Special mention to: 95 | 96 | - Docker and the community for making us leave our comfort zone and hack on cool stuff. We've learnt a lot these past 4 days. 97 | - Medallia Argentina for hosting the Buenos Aires Docker meetup and being excellent people. 98 | - All OS contributors who can make this happen. 99 | - Ross Boucher (@boucher) for dedicating his personal time to help us answering our annoying questions. 100 | -------------------------------------------------------------------------------- /validate/validate.go: -------------------------------------------------------------------------------- 1 | package validate 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/url" 7 | "os/exec" 8 | "strings" 9 | "sync" 10 | 11 | "golang.org/x/crypto/ssh" 12 | 13 | "github.com/codegangsta/cli" 14 | "github.com/marcosnils/cmt/cmd" 15 | ) 16 | 17 | var Command = cli.Command{ 18 | Name: "validate", 19 | Usage: "Validate host migration capabilities", 20 | Flags: []cli.Flag{ 21 | cli.StringFlag{ 22 | Name: "src", 23 | Usage: "Source host URL [user@host:port]", 24 | }, 25 | cli.StringFlag{ 26 | Name: "dst", 27 | Usage: "Destination host URL [user@host:port]", 28 | }, 29 | }, 30 | Action: func(c *cli.Context) { 31 | srcURL := ParseURL(c.String("src")) 32 | dstURL := ParseURL(c.String("dst")) 33 | 34 | Validate(srcURL, dstURL, false) 35 | println("Validation succeded") 36 | 37 | }, 38 | } 39 | 40 | func ParseURL(rawurl string) *url.URL { 41 | if rawurl == "" { 42 | return nil 43 | } 44 | // We do this hack beacuse url.Parse require a schema to do the right thing 45 | schemaUrl := rawurl 46 | if !strings.HasPrefix(rawurl, "ssh://") { 47 | schemaUrl = fmt.Sprintf("ssh://%s", rawurl) 48 | } 49 | 50 | u, err := url.Parse(schemaUrl) 51 | if err != nil { 52 | log.Fatal("Error parsing host: ", rawurl) 53 | } 54 | 55 | return u 56 | 57 | } 58 | 59 | func Validate(src, dst *url.URL, continueOnWarnings bool) (srcCmd, dstCmd cmd.Cmd) { 60 | if src == nil || dst == nil { 61 | log.Fatal("Both src and dst must be specified") 62 | } 63 | 64 | srcCmd = GetCommand(src) 65 | dstCmd = GetCommand(dst) 66 | 67 | if found, e := checkVersion(srcCmd, dstCmd, "criu"); e != nil { 68 | if found && continueOnWarnings { 69 | log.Println("Warning: ", e) 70 | } else { 71 | log.Fatal(e) 72 | } 73 | } 74 | if found, e := checkVersion(srcCmd, dstCmd, "runc"); e != nil { 75 | if found && continueOnWarnings { 76 | log.Println("Warning: ", e) 77 | } else { 78 | log.Fatal(e) 79 | } 80 | } 81 | 82 | if e := checkKernelCap(srcCmd); e != nil { 83 | log.Fatal(e) 84 | } 85 | 86 | if e := checkKernelCap(dstCmd); e != nil { 87 | log.Fatal(e) 88 | } 89 | 90 | if e := checkCPUCompat(srcCmd, dstCmd); e != nil { 91 | if continueOnWarnings { 92 | log.Println("Warning: ", e) 93 | } else { 94 | log.Fatal(e) 95 | } 96 | } 97 | 98 | return 99 | } 100 | 101 | func checkCPUCompat(srcCmd, dstCmd cmd.Cmd) error { 102 | // Dump 103 | _, _, err := srcCmd.Run("criu", "cpuinfo", "dump") 104 | if _, ok := err.(*ssh.ExitError); ok { 105 | return fmt.Errorf("Error dumping CPU info") 106 | } else if _, ok := err.(*exec.ExitError); ok { 107 | return fmt.Errorf("Error dumping CPU info") 108 | } else if err != nil { 109 | return fmt.Errorf("Connection error: %s ", err) 110 | } 111 | 112 | // Copy 113 | 114 | err = cmd.Scp(srcCmd.URL("./cpuinfo.img"), dstCmd.URL(".")) 115 | if _, ok := err.(*ssh.ExitError); ok { 116 | return fmt.Errorf("Error copying dump image") 117 | } else if _, ok := err.(*exec.ExitError); ok { 118 | return fmt.Errorf("Error copying dump image") 119 | } else if err != nil { 120 | return fmt.Errorf("Connection error: %s ", err) 121 | } 122 | 123 | // Check 124 | _, _, err = srcCmd.Run("criu", "cpuinfo", "check") 125 | if _, ok := err.(*ssh.ExitError); ok { 126 | return fmt.Errorf("Error checking CPU info") 127 | } else if _, ok := err.(*exec.ExitError); ok { 128 | return fmt.Errorf("Error checking CPU info") 129 | } else if err != nil { 130 | return fmt.Errorf("Connection error: %s ", err) 131 | } 132 | return nil 133 | } 134 | 135 | func checkKernelCap(c cmd.Cmd) error { 136 | _, _, err := c.Run("sudo", "criu", "check", "--ms") 137 | if _, ok := err.(*ssh.ExitError); ok { 138 | return fmt.Errorf("Error criu checks do not pass") 139 | } else if _, ok := err.(*exec.ExitError); ok { 140 | return fmt.Errorf("Error criu checks do not pass") 141 | } else if err != nil { 142 | return fmt.Errorf("Connection error: %s ", err) 143 | } 144 | return err 145 | } 146 | 147 | func GetCommand(hostURL *url.URL) cmd.Cmd { 148 | if hostURL.Host != "" { 149 | rc := cmd.NewSSH(hostURL.User.Username(), hostURL.Host) 150 | if err := rc.UseAgent(); err != nil { 151 | log.Fatal("Unable to use SSH agent for host: ", hostURL.String()) 152 | } 153 | return rc 154 | 155 | } 156 | 157 | return cmd.NewLocal() 158 | 159 | } 160 | 161 | func checkVersion(sCmd, dCmd cmd.Cmd, name string) (bool, error) { 162 | var wg sync.WaitGroup 163 | wg.Add(2) 164 | var sourceVersion, destVersion string 165 | var sourceError, destError error 166 | go func() { 167 | sourceVersion, sourceError = getVersion(sCmd, name) 168 | wg.Done() 169 | }() 170 | go func() { 171 | destVersion, destError = getVersion(dCmd, name) 172 | wg.Done() 173 | }() 174 | 175 | wg.Wait() 176 | 177 | if sourceError != nil { 178 | return false, fmt.Errorf("%s in src", sourceError) 179 | } 180 | if destError != nil { 181 | return false, fmt.Errorf("%s in dst", destError) 182 | } 183 | 184 | if sourceVersion != destVersion { 185 | return true, fmt.Errorf("ERROR: Source and destination versions of %s do not match", name) 186 | } 187 | 188 | return true, nil 189 | } 190 | 191 | func getVersion(command cmd.Cmd, name string) (string, error) { 192 | version, _, err := command.Run("sudo", name, "--version") 193 | if _, ok := err.(*ssh.ExitError); ok { 194 | return "", fmt.Errorf("Error %s does not exist", name) 195 | } else if _, ok := err.(*exec.ExitError); ok { 196 | return "", fmt.Errorf("Error %s does not exist", name) 197 | } else if err != nil { 198 | return "", fmt.Errorf("Connection error: %s ", err) 199 | } 200 | return version, nil 201 | } 202 | -------------------------------------------------------------------------------- /migrate/migrate.go: -------------------------------------------------------------------------------- 1 | package migrate 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "path/filepath" 7 | "strings" 8 | "sync" 9 | "time" 10 | 11 | "github.com/codegangsta/cli" 12 | "github.com/marcosnils/cmt/cmd" 13 | "github.com/marcosnils/cmt/iptables" 14 | "github.com/marcosnils/cmt/validate" 15 | ) 16 | 17 | var Command = cli.Command{ 18 | Name: "migrate", 19 | Usage: "Migrate running container", 20 | Flags: []cli.Flag{ 21 | cli.StringFlag{ 22 | Name: "src", 23 | Usage: "Source host where the container is running", 24 | }, 25 | cli.StringFlag{ 26 | Name: "dst", 27 | Usage: "Target host to migrate the container", 28 | }, 29 | cli.BoolFlag{ 30 | Name: "pre-dump", 31 | Usage: "Perform a pre-dump to minimize downtime", 32 | }, 33 | cli.BoolFlag{ 34 | Name: "force", 35 | Usage: "Doesn't fail of validations related to different versions of executable binaries and cpu differences", 36 | }, 37 | cli.StringFlag{ 38 | Name: "hook-pre-restore", 39 | Usage: "Command to run right before restoring process", 40 | }, 41 | cli.StringFlag{ 42 | Name: "hook-post-restore", 43 | Usage: "Command to run right after a successful process restoration", 44 | }, 45 | cli.StringFlag{ 46 | Name: "hook-failed-restore", 47 | Usage: "Command to run right after a failed process restoration", 48 | }, 49 | }, 50 | Action: func(c *cli.Context) { 51 | srcUrl := validate.ParseURL(c.String("src")) 52 | dstUrl := validate.ParseURL(c.String("dst")) 53 | 54 | log.Println("Performing validations") 55 | src, dst := validate.Validate(srcUrl, dstUrl, c.Bool("force")) 56 | 57 | log.Println("Preparing everything to do a checkpoint") 58 | containerId := getContainerId(srcUrl.Path) 59 | var imagesPath string 60 | var restoreCmd cmd.Cmd 61 | var migrateStart time.Time 62 | var downtime time.Duration 63 | 64 | if c.Bool("pre-dump") { 65 | // Process pre-dump 66 | predumpPath := fmt.Sprintf("%s/images/0", srcUrl.Path) 67 | prepareDir(src, predumpPath) 68 | 69 | checkpoint(src, containerId, predumpPath, true) 70 | 71 | srcTarFile := fmt.Sprintf("%s/predump.tar.gz", srcUrl.Path) 72 | prepareTar(src, srcTarFile, predumpPath) 73 | 74 | prepareDir(dst, fmt.Sprintf("%s/images/0", dstUrl.Path)) 75 | 76 | log.Println("Copying predump image to dst") 77 | err := cmd.Scp(src.URL(srcTarFile), dst.URL(fmt.Sprintf("%s/images/0", dstUrl.Path))) 78 | if err != nil { 79 | log.Fatal("Error copying predump image files to dst", err) 80 | } 81 | 82 | dstTarFile := fmt.Sprintf("%s/images/0/predump.tar.gz", dstUrl.Path) 83 | unpackTar(dst, dstTarFile, fmt.Sprintf("%s/images/0", dstUrl.Path)) 84 | 85 | // Process final image 86 | migrateStart = time.Now() 87 | 88 | 89 | iptablesBefore, ipErr := getIPTables(src) 90 | 91 | if ipErr != nil { 92 | log.Fatal("Error capturing iptables rules. ", ipErr) 93 | } 94 | imagesPath = fmt.Sprintf("%s/images/1", srcUrl.Path) 95 | prepareDir(src, fmt.Sprintf("%s/images/1", srcUrl.Path)) 96 | log.Println("Performing the checkpoint") 97 | _, _, err = src.Run("sudo", "runc", "--id", containerId, "checkpoint", "--image-path", imagesPath, "--prev-images-dir", "../0", "--track-mem", "--tcp-established") 98 | if err != nil { 99 | log.Fatal("Error performing checkpoint:", err) 100 | } 101 | 102 | iptablesAfter, ipErr2 := getIPTables(src) 103 | 104 | if ipErr2 != nil { 105 | log.Fatal("Error capturing iptables rules. ", ipErr2) 106 | } 107 | 108 | iptablesRules := iptables.Diff(iptablesAfter, iptablesBefore) 109 | 110 | srcTarFile = fmt.Sprintf("%s/dump.tar.gz", srcUrl.Path) 111 | prepareTar(src, srcTarFile, imagesPath) 112 | prepareDir(dst, fmt.Sprintf("%s/images/1", dstUrl.Path)) 113 | 114 | log.Println("Copying final dump image to dst") 115 | err = cmd.Scp(src.URL(srcTarFile), dst.URL(fmt.Sprintf("%s/images/1", dstUrl.Path))) 116 | if err != nil { 117 | log.Fatal("Error copying predump image files to dst", err) 118 | } 119 | 120 | dstTarFile = fmt.Sprintf("%s/images/1/dump.tar.gz", dstUrl.Path) 121 | unpackTar(dst, dstTarFile, fmt.Sprintf("%s/images/1", dstUrl.Path)) 122 | 123 | log.Println("Performing the restore") 124 | 125 | // first thing to do, apply the iptables rules 126 | applyErr := applyIPTablesRules(dst, iptablesRules) 127 | if applyErr != nil { 128 | log.Fatal("Error applying IPTables rules. ", applyErr) 129 | } 130 | TriggerHook(c.String("hook-pre-restore")) 131 | 132 | // after the restore, we remove iptable rules from source host 133 | removeErr := removeIPTablesRules(src, iptablesRules) 134 | if removeErr != nil { 135 | log.Fatal("Error removing IPTables rules. ", removeErr) 136 | } 137 | configFilePath := fmt.Sprintf("%s/config.json", dstUrl.Path) 138 | runtimeFilePath := fmt.Sprintf("%s/runtime.json", dstUrl.Path) 139 | dstImagesPath := fmt.Sprintf("%s/images/1", dstUrl.Path) 140 | 141 | restoreCmd, err = dst.Start("sudo", "runc", "--id", containerId, "restore", "--tcp-established", "--image-path", dstImagesPath, "--config-file", configFilePath, "--runtime-file", runtimeFilePath) 142 | if err != nil { 143 | log.Fatal("Error performing restore:", err) 144 | } 145 | } else { 146 | imagesPath = fmt.Sprintf("%s/images", srcUrl.Path) 147 | prepareDir(src, imagesPath) 148 | 149 | migrateStart = time.Now() 150 | iptablesBefore, ipErr := getIPTables(src) 151 | 152 | if ipErr != nil { 153 | log.Fatal("Error capturing iptables rules. ", ipErr) 154 | } 155 | checkpoint(src, containerId, imagesPath, false) 156 | iptablesAfter, ipErr2 := getIPTables(src) 157 | 158 | if ipErr2 != nil { 159 | log.Fatal("Error capturing iptables rules. ", ipErr2) 160 | } 161 | 162 | iptablesRules := iptables.Diff(iptablesAfter, iptablesBefore) 163 | 164 | srcTarFile := fmt.Sprintf("%s/dump.tar.gz", srcUrl.Path) 165 | prepareTar(src, srcTarFile, imagesPath) 166 | 167 | prepareDir(dst, fmt.Sprintf("%s/images", dstUrl.Path)) 168 | 169 | log.Println("Copying checkpoint image to dst") 170 | err := cmd.Scp(src.URL(srcTarFile), dst.URL(fmt.Sprintf("%s/images", dstUrl.Path))) 171 | if err != nil { 172 | log.Fatal("Error copying image files to dst", err) 173 | } 174 | 175 | dstTarFile := fmt.Sprintf("%s/images/dump.tar.gz", dstUrl.Path) 176 | unpackTar(dst, dstTarFile, fmt.Sprintf("%s/images", dstUrl.Path)) 177 | 178 | log.Println("Performing the restore") 179 | // first thing to do, apply the iptables rules 180 | applyErr := applyIPTablesRules(dst, iptablesRules) 181 | if applyErr != nil { 182 | log.Fatal("Error applying IPTables rules. ", applyErr) 183 | } 184 | 185 | TriggerHook(c.String("hook-pre-restore")) 186 | 187 | // after the restore, we remove iptable rules from source host 188 | removeErr := removeIPTablesRules(src, iptablesRules) 189 | if removeErr != nil { 190 | log.Fatal("Error removing IPTables rules. ", removeErr) 191 | } 192 | configFilePath := fmt.Sprintf("%s/config.json", dstUrl.Path) 193 | runtimeFilePath := fmt.Sprintf("%s/runtime.json", dstUrl.Path) 194 | dstImagesPath := fmt.Sprintf("%s/images", dstUrl.Path) 195 | restoreCmd, err = dst.Start("sudo", "runc", "--id", containerId, "restore", "--tcp-established", "--image-path", dstImagesPath, "--config-file", configFilePath, "--runtime-file", runtimeFilePath) 196 | if err != nil { 197 | log.Fatal("Error performing restore:", err) 198 | } 199 | 200 | } 201 | 202 | var restoreSucceed bool 203 | var restoreError error 204 | var wg sync.WaitGroup 205 | wg.Add(1) 206 | 207 | go func() { 208 | restoreError = restoreCmd.Wait() 209 | wg.Done() 210 | }() 211 | 212 | go func() { 213 | log.Println("Waiting for container to start...") 214 | // We make a fast check so we don't wait for the first ticker internal 215 | if isRunning(containerId, dst) { 216 | restoreSucceed = true 217 | wg.Done() 218 | return 219 | } 220 | ticker := time.NewTicker(200 * time.Millisecond) 221 | go func() { 222 | for _ = range ticker.C { 223 | if isRunning(containerId, dst) { 224 | restoreSucceed = true 225 | break 226 | } 227 | 228 | } 229 | ticker.Stop() 230 | wg.Done() 231 | }() 232 | }() 233 | 234 | wg.Wait() 235 | 236 | downtime = time.Since(migrateStart) 237 | 238 | if restoreSucceed { 239 | log.Printf("Restore finished successfully, total downtime: %dms", downtime/time.Millisecond) 240 | TriggerHook(c.String("hook-post-restore")) 241 | } else { 242 | log.Println("Error performing restore:", restoreError) 243 | TriggerHook(c.String("hook-failed-restore")) 244 | } 245 | 246 | }, 247 | } 248 | 249 | func applyIPTablesRules(host cmd.Cmd, rules []string) error { 250 | for _, rule := range rules { 251 | args := []string{"iptables"} 252 | args = append(args, strings.Fields(rule)...) 253 | _, _, err := host.Run("sudo", args...) 254 | if err != nil { 255 | return err 256 | } 257 | } 258 | return nil 259 | } 260 | 261 | func removeIPTablesRules(host cmd.Cmd, rules []string) error { 262 | for _, rule := range rules { 263 | args := []string{"iptables"} 264 | args = append(args, strings.Fields(rule)...) 265 | args[1] = "-D" 266 | _, _, err := host.Run("sudo", args...) 267 | if err != nil { 268 | return err 269 | } 270 | } 271 | return nil 272 | } 273 | 274 | func getIPTables(host cmd.Cmd) (string, error) { 275 | rules, _, err := host.Run("sudo", "iptables-save") 276 | if err != nil { 277 | return "", err 278 | } 279 | return rules, nil 280 | } 281 | 282 | func isRunning(containerId string, dstCmd cmd.Cmd) bool { 283 | _, _, err := dstCmd.Run("stat", fmt.Sprintf("/var/run/opencontainer/containers/%s", containerId)) 284 | if err != nil { 285 | return false 286 | } 287 | 288 | return true 289 | } 290 | 291 | func unpackTar(cmd cmd.Cmd, tarFile, workDir string) { 292 | log.Println("Preparing image at destination host") 293 | _, _, err := cmd.Run("sudo", "tar", "-C", workDir, "-xvzf", tarFile) 294 | if err != nil { 295 | log.Fatal("Error uncompressing image in destination:", err) 296 | } 297 | } 298 | 299 | func prepareTar(cmd cmd.Cmd, tarFile, workDir string) { 300 | _, _, err := cmd.Run("sudo", "tar", "-czf", tarFile, "-C", fmt.Sprintf("%s/", workDir), ".") 301 | if err != nil { 302 | log.Fatal("Error compressing image in source:", err) 303 | } 304 | } 305 | 306 | func checkpoint(cmd cmd.Cmd, containerId, imagesPath string, predump bool) { 307 | log.Printf("Performing the checkpoint predump = %t\n", predump) 308 | args := []string{"runc", "--id", containerId, "checkpoint", "--tcp-established", "--track-mem", "--image-path", imagesPath} 309 | if predump { 310 | args = append(args, "--pre-dump") 311 | } 312 | _, _, err := cmd.Run("sudo", args...) 313 | if err != nil { 314 | log.Fatal("Error performing checkpoint:", err) 315 | } 316 | } 317 | 318 | func prepareDir(cmd cmd.Cmd, path string) { 319 | _, _, err := cmd.Run("mkdir", "-p", path) 320 | if err != nil { 321 | log.Fatal("Error preparing pre-dump dir:", err) 322 | } 323 | } 324 | 325 | func getContainerId(path string) string { 326 | _, id := filepath.Split(path) 327 | return id 328 | } 329 | 330 | func TriggerHook(command string) error { 331 | if command == "" { 332 | return nil 333 | } 334 | 335 | log.Printf("Running hook: %s\n", command) 336 | 337 | args := strings.Fields(command) 338 | c := cmd.NewLocal() 339 | stdout, stderr, err := c.Run(args[0], args[1:]...) 340 | 341 | log.Println(stdout, stderr) 342 | 343 | return err 344 | } 345 | --------------------------------------------------------------------------------