├── .gitignore ├── ReadMe.md ├── cgroups └── cgroup.go ├── config └── config.go ├── go.mod ├── go.sum ├── log └── log.go ├── main.go ├── network ├── bitmap.go ├── bitset_test.go ├── bridge_network.go ├── ipam_fs.go ├── ipam_test.go └── network.go ├── ubuntu-rootfs-16.04.06.tar.gz └── workspace └── workspace.go /.gitignore: -------------------------------------------------------------------------------- 1 | /ubuntu-base-16.04.6-base-amd64/ 2 | -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | > 🐑 🐑 🐑 本系列教程主要是为了弄清楚容器化的原理,纸上得来终觉浅,绝知此事要躬行,理论始终不及动手实践来的深刻,所以这个系列会用go语言实现一个类似docker的容器化功能,最终能够容器化的运行一个进程。 2 | 3 | 4 | #### 配套笔记 5 | [500行代码手写docker开篇-goland远程编译环境配置](https://mp.weixin.qq.com/s/h3GDRrh9fo_mnzpSpkBfuA) 6 | 7 | [500行代码手写docker-以新命名空间运行程序](https://mp.weixin.qq.com/s/jD3CsIoV5FtqhBn7yGuHIg) 8 | 9 | [500行代码代码手写docker-将rootfs设置为只读镜像](https://mp.weixin.qq.com/s/PKZSrPkW1B3zwgAlP5OYrA) 10 | 11 | [500代码行代码手写docker-设置网络命名空间](https://mp.weixin.qq.com/s/T0IufjsDOI2A2G2NFpWWFg) 12 | 13 | [500行代码手写docker(5)-实现硬件资源限制cgroups](https://mp.weixin.qq.com/s/4jUjqiHRBHCcjpCw-R_Y_Q) 14 | 15 | #### 配套视频 16 | 17 | [500行代码手写docker开篇-goland远程编译环境配置](https://www.bilibili.com/video/BV1Fm4y147Hc/) 18 | 19 | [500行代码手写docker-以新命名空间运行程序](https://www.bilibili.com/video/BV1oh4y1R7YW/) 20 | 21 | [500行代码代码手写docker-将rootfs设置为只读镜像](https://www.bilibili.com/video/BV1Jz4y1q7ys/) 22 | 23 | [500代码行代码手写docker-设置网络命名空间](https://www.bilibili.com/video/BV1wc41137fH/) 24 | 25 | [500行代码手写docker(5)-实现硬件资源限制cgroups](https://www.bilibili.com/video/BV11s4y1e7fc/) 26 | 27 | #### 代码最终运行效果 28 | 29 | ![tty.gif](https://s2.loli.net/2023/05/16/eVm8ME9ArWOvD5k.gif) 30 | 31 | 32 | ## 纠错 33 | 34 | 如果有疑问或者发现错误,可以在相应的 Issues 进行提问或勘误。 35 | 36 | 37 | ## 公众号 38 | 39 | ![WechatIMG143.jpeg](https://s2.loli.net/2023/04/12/QzqyFU6tjAxKame.jpg) -------------------------------------------------------------------------------- /cgroups/cgroup.go: -------------------------------------------------------------------------------- 1 | package cgroups 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "path" 8 | "strconv" 9 | "tinydocker/log" 10 | ) 11 | 12 | const ( 13 | cgroupsPath = "/sys/fs/cgroup" 14 | dockerName = "tinydocker" 15 | ) 16 | 17 | func ConfigDefaultCgroups(pid int, containerName string) error { 18 | 19 | var ( 20 | cpuPath = path.Join(cgroupsPath, "cpu", dockerName, containerName) 21 | memoryPath = path.Join(cgroupsPath, "memory", dockerName, containerName) 22 | ) 23 | 24 | // 创建容器的控制目录 25 | if err := os.MkdirAll(cpuPath, 0700); err != nil { 26 | return fmt.Errorf("create cgroup path fail err=%s", err) 27 | } 28 | if err := os.MkdirAll(memoryPath, 0700); err != nil { 29 | return fmt.Errorf("create cgroup path fail err=%s", err) 30 | } 31 | // 设置cpu 32 | if err := os.WriteFile(path.Join(cpuPath, "cpu.cfs_quota_us"), []byte("50000"), 0700); err != nil { 33 | return fmt.Errorf("write cpu quota us fail err=%s", err) 34 | } 35 | if err := os.WriteFile(path.Join(cpuPath, "tasks"), []byte(strconv.Itoa(pid)), 0644); err != nil { 36 | return fmt.Errorf("write cpu tasks fail err=%s", err) 37 | } 38 | 39 | // 设置内存 40 | if err := os.WriteFile(path.Join(memoryPath, "memory.limit_in_bytes"), []byte("200m"), 0700); err != nil { 41 | return fmt.Errorf("write memory limit bytes fail err=%s", err) 42 | } 43 | if err := os.WriteFile(path.Join(memoryPath, "tasks"), []byte(strconv.Itoa(pid)), 0644); err != nil { 44 | return fmt.Errorf("write memory tasks fail err=%s", err) 45 | } 46 | return nil 47 | } 48 | 49 | func CleanCgroupsPath(containerName string) error { 50 | output, err := exec.Command("cgdelete", "-r", fmt.Sprintf("memory:%s/%s", dockerName, containerName)).Output() 51 | if err != nil { 52 | log.Error("cgdelete fail err=%s output=%s", err, string(output)) 53 | } 54 | output, err = exec.Command("cgdelete", "-r", fmt.Sprintf("cpu:%s/%s", dockerName, containerName)).Output() 55 | if err != nil { 56 | log.Error("cgdelete fail err=%s output=%s", err, string(output)) 57 | } 58 | return nil 59 | } 60 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "github.com/ThreeKing2018/gocolor" 6 | ) 7 | 8 | const ( 9 | IpAmStorageFsPath = "/root/subnet.json" 10 | NetStoragePath = "/root/network.json" 11 | ) 12 | 13 | func Banner() string { 14 | return fmt.Sprintf("%s %s %s %s %s %s ", 15 | gocolor.SRedBG("welcome"), 16 | gocolor.SGreenBG("to"), 17 | gocolor.SYellowBG("use"), 18 | gocolor.SBlueBG("tinydocker"), 19 | "🦍 🦍 🦍", 20 | "❗️❗") 21 | } 22 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module tinydocker 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/ThreeKing2018/gocolor v0.0.0-20190625094635-394e0e24c0d0 7 | github.com/vishvananda/netlink v1.1.0 8 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df 9 | ) 10 | 11 | require ( 12 | github.com/davecgh/go-spew v1.1.1 // indirect 13 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/ThreeKing2018/gocolor v0.0.0-20190625094635-394e0e24c0d0 h1:fFoYXYxBFRE1exQedMZyFy4P1LHGJH1idubWhVuEJ0I= 2 | github.com/ThreeKing2018/gocolor v0.0.0-20190625094635-394e0e24c0d0/go.mod h1:dG3aFVtzqgcYBEhVjC139oGBy2Z7C92+iyXNaiViDNM= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= 6 | github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= 7 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= 8 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= 9 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 h1:/d2cWp6PSamH4jDPFLyO150psQdqvtoNX8Zjg3AQ31g= 10 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 11 | -------------------------------------------------------------------------------- /log/log.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import ( 4 | "fmt" 5 | "github.com/ThreeKing2018/gocolor" 6 | "io" 7 | "log" 8 | "os" 9 | "sync" 10 | ) 11 | 12 | // 定义日志等级 13 | const ( 14 | DEBUG = iota // 用于调度,最低级 15 | INFO //输出普通信息,常用 16 | WARNING //输出警告,非错误信息,又比较重要 17 | ERROR //错误,属严重信息 18 | DEFAULT_FLAG = log.LstdFlags 19 | LSHORTFILE_FLAG = log.Lshortfile | log.LstdFlags 20 | ) 21 | 22 | // 定义日志接口 23 | type ColorLogger interface { 24 | Debug(format string, s ...interface{}) 25 | Info(format string, s ...interface{}) 26 | Warn(format string, s ...interface{}) 27 | Error(format string, s ...interface{}) 28 | SetLevel(level int) //设置等级 29 | } 30 | 31 | //var isColorChan = make(chan bool, 1) 32 | 33 | type Logger struct { 34 | Level int //日志等级 35 | debug *log.Logger 36 | info *log.Logger 37 | warning *log.Logger 38 | error *log.Logger 39 | IsColor bool //是否使用带颜色的日志 40 | Depth int //详情深度 41 | Prefix string //前缀 42 | wg *sync.WaitGroup 43 | } 44 | 45 | // 对外初使日志 46 | func New(w io.Writer, isColor bool) ColorLogger { 47 | return InitWriteLogger(w, 2, LSHORTFILE_FLAG, isColor) 48 | } 49 | 50 | // 默认的日志 51 | var defaultLogger = InitWriteLogger(os.Stdout, 2, DEFAULT_FLAG, true) 52 | 53 | // 初使写入日志,写入到一个buffer里 54 | func InitWriteLogger(w io.Writer, depth int, flag int, isColor bool) ColorLogger { 55 | logger := new(Logger) 56 | logger.wg = new(sync.WaitGroup) 57 | logger.IsColor = isColor 58 | logger.Depth = depth 59 | //初使每个等级的日志 60 | logger.debug = log.New(w, logger.setColorString(DEBUG, "[DEBUG]"), flag) 61 | logger.info = log.New(w, logger.setColorString(INFO, "[INF]"), flag) 62 | logger.warning = log.New(w, logger.setColorString(WARNING, "[WAR]"), flag) 63 | logger.error = log.New(w, logger.setColorString(ERROR, "[ERR]"), flag) 64 | 65 | logger.SetLevel(DEBUG) //初使一下等级啦 66 | return logger 67 | } 68 | 69 | // 设置不同字体颜色 70 | func (l *Logger) setColor(level int, format string, args ...interface{}) string { 71 | if false == l.IsColor { 72 | return fmt.Sprintf(format, args...) 73 | } 74 | switch level { 75 | case DEBUG: 76 | return gocolor.SMagenta(format, args...) 77 | case INFO: 78 | return gocolor.SGreen(format, args...) 79 | case WARNING: 80 | return gocolor.SYellow(format, args...) 81 | case ERROR: 82 | return gocolor.SRed(format, args...) 83 | default: 84 | return fmt.Sprintf(format, args...) 85 | } 86 | } 87 | 88 | // 设置不同背景颜色 89 | func (l *Logger) setColorString(level int, format string, args ...interface{}) string { 90 | if false == l.IsColor { 91 | return fmt.Sprintf(format, args...) 92 | } 93 | switch level { 94 | case DEBUG: 95 | return gocolor.SMagentaBG(format, args...) 96 | case INFO: 97 | return gocolor.SGreenBG(format, args...) 98 | case WARNING: 99 | return gocolor.SYellowBG(format, args...) 100 | case ERROR: 101 | return gocolor.SRedBG(format, args...) 102 | default: 103 | return fmt.Sprintf(format, args...) 104 | } 105 | } 106 | 107 | // 设置等级,默认全输出 108 | func (l *Logger) SetLevel(level int) { 109 | l.Level = level 110 | } 111 | 112 | // 用于调度的日志 113 | func (l *Logger) Debug(format string, s ...interface{}) { 114 | if l.Level > DEBUG { 115 | return 116 | } 117 | l.debug.Output(l.Depth, l.setColor(DEBUG, format, s...)) 118 | } 119 | 120 | // 输出普通信息 121 | func (l *Logger) Info(format string, s ...interface{}) { 122 | if l.Level > INFO { 123 | return 124 | } 125 | l.info.Output(l.Depth, l.setColor(INFO, format, s...)) 126 | } 127 | 128 | // 输出警告信息 129 | func (l *Logger) Warn(format string, s ...interface{}) { 130 | if l.Level > WARNING { 131 | return 132 | } 133 | l.warning.Output(l.Depth, l.setColor(WARNING, format, s...)) 134 | } 135 | 136 | // 输出错误信息 137 | func (l *Logger) Error(format string, s ...interface{}) { 138 | if l.Level > ERROR { 139 | return 140 | } 141 | l.error.Output(l.Depth, l.setColor(ERROR, format, s...)) 142 | } 143 | 144 | func Debug(format string, s ...interface{}) { 145 | defaultLogger.Debug(format, s...) 146 | } 147 | func Info(format string, s ...interface{}) { 148 | defaultLogger.Info(format, s...) 149 | } 150 | func Warn(format string, s ...interface{}) { 151 | defaultLogger.Warn(format, s...) 152 | } 153 | func Error(format string, s ...interface{}) { 154 | defaultLogger.Error(format, s...) 155 | } 156 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "syscall" 8 | "tinydocker/cgroups" 9 | "time" 10 | "tinydocker/config" 11 | "tinydocker/log" 12 | "tinydocker/network" 13 | "tinydocker/workspace" 14 | ) 15 | 16 | // ./tinydocker run 容器名 可执行文件名 17 | func main() { 18 | switch os.Args[1] { 19 | case "run": 20 | if err := network.Init(); err != nil { 21 | log.Error("net work fail err=%s", err) 22 | return 23 | } 24 | fmt.Println(config.Banner()) 25 | // 在一个新的命名空间 26 | initCmd, err := os.Readlink("/proc/self/exe") 27 | if err != nil { 28 | log.Error("get init process error %s", err) 29 | return 30 | } 31 | os.Args[1] = "init" 32 | cmd := exec.Command(initCmd, os.Args[1:]...) 33 | cmd.SysProcAttr = &syscall.SysProcAttr{ 34 | Cloneflags: syscall.CLONE_NEWUTS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNS | 35 | syscall.CLONE_NEWNET | syscall.CLONE_NEWIPC, 36 | } 37 | cmd.Env = os.Environ() 38 | cmd.Stdin = os.Stdin 39 | cmd.Stdout = os.Stdout 40 | cmd.Stderr = os.Stderr 41 | err = cmd.Start() 42 | if err != nil { 43 | fmt.Println(err) 44 | } 45 | 46 | containerName := os.Args[2] 47 | // 等待子进程完全启动 48 | time.Sleep(2 * time.Second) 49 | if err := cgroups.ConfigDefaultCgroups(cmd.Process.Pid, containerName); err != nil { 50 | log.Error("config cgroups fail %s", err) 51 | } 52 | 53 | if err := network.ConfigDefaultNetworkInNewNet(cmd.Process.Pid); err != nil { 54 | log.Error("config network fail %s", err) 55 | } 56 | cmd.Wait() 57 | cgroups.CleanCgroupsPath(containerName) 58 | workspace.DelMntNamespace(containerName) 59 | return 60 | case "init": 61 | var ( 62 | containerName = os.Args[2] 63 | cmd = os.Args[3] 64 | ) 65 | log.Info("Wait SIGUSR2 signal arrived ....") 66 | // 等待父进程网络命名空间设置完毕 67 | network.WaitParentSetNewNet() 68 | if err := workspace.SetMntNamespace(containerName); err != nil { 69 | log.Error("SetMntNamespace %s", err) 70 | return 71 | } 72 | syscall.Chdir("/") 73 | defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 74 | syscall.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), "") 75 | err := syscall.Exec(cmd, os.Args[3:], os.Environ()) 76 | if err != nil { 77 | log.Error("exec proc fail %s", err) 78 | return 79 | } 80 | log.Error("forever not exec it ") 81 | return 82 | default: 83 | log.Error("not valid cmd") 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /network/bitmap.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | type bitMap struct { 4 | Bitmap []byte 5 | } 6 | 7 | func InitBitMap(maxLen int64) *bitMap { 8 | b := &bitMap{} 9 | b.Bitmap = make([]byte, maxLen) 10 | return b 11 | } 12 | 13 | func (b *bitMap) BitExist(pos int) bool { 14 | aIndex := arrIndex(pos) 15 | bIndex := bytePos(pos) 16 | return 1 == 1&(b.Bitmap[aIndex]>>bIndex) 17 | } 18 | 19 | func (b *bitMap) BitSet(pos int) { 20 | aIndex := arrIndex(pos) 21 | bIndex := bytePos(pos) 22 | b.Bitmap[aIndex] = b.Bitmap[aIndex] | (1 << bIndex) 23 | } 24 | 25 | func (b *bitMap) BitClean(pos int) { 26 | aIndex := arrIndex(pos) 27 | bIndex := bytePos(pos) 28 | b.Bitmap[aIndex] = b.Bitmap[aIndex] & (^(1 << bIndex)) 29 | } 30 | 31 | func arrIndex(pos int) int { 32 | return pos / 8 33 | } 34 | 35 | func bytePos(pos int) int { 36 | return pos % 8 37 | } 38 | -------------------------------------------------------------------------------- /network/bitset_test.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestBitSet(t *testing.T) { 9 | 10 | bitmap := InitBitMap(65555/8 + 1) 11 | fmt.Println(bitmap.BitExist(0)) 12 | bitmap.BitSet(0) 13 | fmt.Println(bitmap.BitExist(0)) 14 | } 15 | -------------------------------------------------------------------------------- /network/bridge_network.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "fmt" 5 | "github.com/vishvananda/netlink" 6 | "github.com/vishvananda/netns" 7 | "math/rand" 8 | "net" 9 | "os" 10 | "os/exec" 11 | "runtime" 12 | "strconv" 13 | "strings" 14 | "time" 15 | "tinydocker/log" 16 | ) 17 | 18 | type bridgeDriver struct { 19 | } 20 | 21 | func (b *bridgeDriver) Name() string { 22 | return "bridge" 23 | } 24 | 25 | var BridgeDriver = &bridgeDriver{} 26 | 27 | func truncate(maxlen int, str string) string { 28 | if len(str) <= maxlen { 29 | return str 30 | } 31 | return str[:maxlen] 32 | } 33 | 34 | func createBridge(networkName string, interfaceIp *net.IPNet) (string, error) { 35 | bridgeName := truncate(15, fmt.Sprintf("br-%s", networkName)) 36 | la := netlink.NewLinkAttrs() 37 | la.Name = bridgeName 38 | br := &netlink.Bridge{LinkAttrs: la} 39 | if err := netlink.LinkAdd(br); err != nil { 40 | return "", fmt.Errorf("bridge creation failed for bridge %s: %s", bridgeName, err) 41 | } 42 | addr := &netlink.Addr{IPNet: interfaceIp, Peer: interfaceIp, Label: "", Flags: 0, Scope: 0} 43 | if err := netlink.AddrAdd(br, addr); err != nil { 44 | return "", fmt.Errorf("bridge add addr fail %s", err) 45 | } 46 | 47 | if err := netlink.LinkSetUp(br); err != nil { 48 | return "", fmt.Errorf("error enabling interface for %s: %v", bridgeName, err) 49 | } 50 | return bridgeName, nil 51 | } 52 | 53 | // like this ip 192.167.0.100/24 54 | func genInterfaceIp(rawIpWithRange string) (*net.IPNet, error) { 55 | ipNet, err := netlink.ParseIPNet(rawIpWithRange) 56 | if err != nil { 57 | return nil, fmt.Errorf("parse ip fail ip=%+s err=%s", rawIpWithRange, err) 58 | } 59 | return ipNet, nil 60 | } 61 | 62 | func setSNat(bridgeName string, subnet *net.IPNet) error { 63 | iptablesCmd := fmt.Sprintf("-t nat -A POSTROUTING -s %s ! -o %s -j MASQUERADE", subnet.String(), bridgeName) 64 | cmd := exec.Command("iptables", strings.Split(iptablesCmd, " ")...) 65 | _, err := cmd.Output() 66 | if err != nil { 67 | return fmt.Errorf("set snat fail %s", err) 68 | } 69 | return nil 70 | } 71 | 72 | func (b *bridgeDriver) CreateNetwork(networkName string, subnet string, networkType networktype) error { 73 | 74 | if networkType != BridgeNetworkType { 75 | return fmt.Errorf("support bridge network type now ") 76 | } 77 | 78 | // 检查网络命名是否存在 79 | if err := NetMgr.LoadConf(); err != nil { 80 | return fmt.Errorf("netMgr loadConf fail %s", err) 81 | } 82 | 83 | if netConf, ok := NetMgr.Storage[networkName]; ok { 84 | switch netConf.Driver { 85 | case "bridge": 86 | // 系统重启后需要重新建立网桥配置 87 | _, err := netlink.LinkByName(netConf.BridgeName) 88 | if err == nil { 89 | log.Info("exist default network ,will not create new network ") 90 | return nil 91 | } 92 | default: 93 | return fmt.Errorf("not support network driver") 94 | } 95 | } 96 | 97 | // 创建网桥 98 | interfaceIp, err := genInterfaceIp(subnet) 99 | if err != nil { 100 | return fmt.Errorf("genInterfaceIp err=%s", err) 101 | } 102 | bridgeName, err := createBridge(networkName, interfaceIp) 103 | if err != nil { 104 | return fmt.Errorf("createBridge err=%s", err) 105 | } 106 | 107 | _, cidr, _ := net.ParseCIDR(subnet) 108 | 109 | err = setSNat(bridgeName, cidr) 110 | if err != nil { 111 | log.Error("%s", err) 112 | } 113 | NetMgr.Storage[networkName] = &NetConf{ 114 | NetworkName: networkName, 115 | IpRange: cidr, 116 | Driver: BridgeNetworkType.String(), 117 | BridgeName: bridgeName, 118 | BridgeIp: interfaceIp, 119 | } 120 | return NetMgr.Sync() 121 | } 122 | 123 | func (b *bridgeDriver) CrateVeth(networkName string) (*netlink.Veth, *NetConf, error) { 124 | // 检查网络命名是否存在 125 | if err := NetMgr.LoadConf(); err != nil { 126 | return nil, nil, fmt.Errorf("netMgr loadConf fail %s", err) 127 | } 128 | networkConf, ok := NetMgr.Storage[networkName] 129 | if !ok { 130 | return nil, nil, fmt.Errorf("name %s network is invalid", networkName) 131 | } 132 | br, err := netlink.LinkByName(networkConf.BridgeName) 133 | if err != nil { 134 | return nil, nil, fmt.Errorf("link by name fail err=%s", err) 135 | } 136 | la := netlink.NewLinkAttrs() 137 | vethname := truncate(15, "veth-"+strconv.Itoa(10+int(rand.Int31n(10)))+"-"+networkConf.NetworkName) 138 | la.Name = vethname 139 | la.MasterIndex = br.Attrs().Index 140 | // 创建veth设备 141 | vethLink := &netlink.Veth{ 142 | LinkAttrs: la, 143 | PeerName: truncate(15, "cif-"+vethname), 144 | } 145 | if err := netlink.LinkAdd(vethLink); err != nil { 146 | return nil, nil, fmt.Errorf("veth creation failed for bridge %s: %s", networkName, err) 147 | } 148 | 149 | if err := netlink.LinkSetUp(vethLink); err != nil { 150 | return nil, nil, fmt.Errorf("error enabling interface for %s: %v", networkName, err) 151 | } 152 | return vethLink, networkConf, nil 153 | } 154 | 155 | func (b *bridgeDriver) setContainerIp(peerName string, pid int, containerIp net.IP, gateway *net.IPNet) error { 156 | peerLink, err := netlink.LinkByName(peerName) 157 | if err != nil { 158 | return fmt.Errorf("fail config endpoint: %v", err) 159 | } 160 | loLink, err := netlink.LinkByName("lo") 161 | if err != nil { 162 | return fmt.Errorf("fail config endpoint: %v", err) 163 | } 164 | // 进入容器的网络命名空间 165 | defer enterContainerNetns(&peerLink, pid)() 166 | containerVethInterfaceIP := *gateway 167 | containerVethInterfaceIP.IP = containerIp 168 | if err = setInterfaceIP(peerName, containerVethInterfaceIP.String()); err != nil { 169 | return fmt.Errorf("%v,%s", containerIp, err) 170 | } 171 | if err := netlink.LinkSetUp(peerLink); err != nil { 172 | return fmt.Errorf("netlink.LinkSetUp fail name=%s err=%s", peerName, err) 173 | } 174 | if err := netlink.LinkSetUp(loLink); err != nil { 175 | return fmt.Errorf("netlink.LinkSetUp fail name=%s err=%s", peerName, err) 176 | } 177 | _, cidr, _ := net.ParseCIDR("0.0.0.0/0") 178 | defaultRoute := &netlink.Route{ 179 | LinkIndex: peerLink.Attrs().Index, 180 | Gw: gateway.IP, 181 | Dst: cidr, 182 | } 183 | if err = netlink.RouteAdd(defaultRoute); err != nil { 184 | return fmt.Errorf("router add fail %s", err) 185 | } 186 | 187 | return nil 188 | } 189 | 190 | func enterContainerNetns(vethLink *netlink.Link, pid int) func() { 191 | f, err := os.OpenFile(fmt.Sprintf("/proc/%d/ns/net", pid), os.O_RDONLY, 0) 192 | if err != nil { 193 | fmt.Println(fmt.Errorf("error get container net namespace, %v", err)) 194 | } 195 | 196 | nsFD := f.Fd() 197 | runtime.LockOSThread() 198 | 199 | // 修改veth peer 另外一端移到容器的namespace中 200 | if err = netlink.LinkSetNsFd(*vethLink, int(nsFD)); err != nil { 201 | log.Error("error set link netns , %v", err) 202 | } 203 | 204 | // 获取当前的网络namespace 205 | origns, err := netns.Get() 206 | if err != nil { 207 | log.Error("error get current netns, %v", err) 208 | } 209 | 210 | // 设置当前线程到新的网络namespace,并在函数执行完成之后再恢复到之前的namespace 211 | if err = netns.Set(netns.NsHandle(nsFD)); err != nil { 212 | log.Error("error set netns, %v", err) 213 | } 214 | return func() { 215 | netns.Set(origns) 216 | origns.Close() 217 | runtime.UnlockOSThread() 218 | f.Close() 219 | } 220 | } 221 | 222 | // Set the IP addr of a netlink interface 223 | func setInterfaceIP(name string, rawIP string) error { 224 | retries := 2 225 | var iface netlink.Link 226 | var err error 227 | for i := 0; i < retries; i++ { 228 | iface, err = netlink.LinkByName(name) 229 | if err == nil { 230 | break 231 | } 232 | fmt.Println(fmt.Errorf("error retrieving new bridge netlink link [ %s ]... retrying", name)) 233 | time.Sleep(2 * time.Second) 234 | } 235 | if err != nil { 236 | return fmt.Errorf("Abandoning retrieving the new bridge link from netlink, Run [ ip link ] to troubleshoot the error: %v", err) 237 | } 238 | ipNet, err := netlink.ParseIPNet(rawIP) 239 | if err != nil { 240 | return err 241 | } 242 | addr := &netlink.Addr{IPNet: ipNet, Peer: ipNet, Label: "", Flags: 0, Scope: 0} 243 | return netlink.AddrAdd(iface, addr) 244 | } 245 | -------------------------------------------------------------------------------- /network/ipam_fs.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "encoding/binary" 5 | "encoding/json" 6 | "net" 7 | "os" 8 | "tinydocker/config" 9 | "tinydocker/log" 10 | ) 11 | 12 | type ipAmFs struct { 13 | subnets map[string]*bitMap 14 | path string 15 | } 16 | 17 | var IpAmfs = &ipAmFs{ 18 | subnets: make(map[string]*bitMap), 19 | path: config.IpAmStorageFsPath, 20 | } 21 | 22 | func (ipamfs *ipAmFs) SetIpUsed(subnet string) error { 23 | if err := ipamfs.loadConf(); err != nil { 24 | return err 25 | } 26 | ip, cidr, err := net.ParseCIDR(subnet) 27 | if err != nil { 28 | return err 29 | } 30 | ip = ip.To4() 31 | ones, total := cidr.Mask.Size() 32 | bitmap := ipamfs.subnets[cidr.String()] 33 | if bitmap == nil || bitmap.Bitmap == nil { 34 | bitmap = InitBitMap(1 << (total - ones)) 35 | ipamfs.subnets[cidr.String()] = bitmap 36 | } 37 | pos := getIPIndex(ip, cidr.Mask) 38 | log.Debug("set ip %s pos %d \n", ip, pos) 39 | bitmap.BitSet(pos) 40 | return ipamfs.sync() 41 | } 42 | 43 | func (ipamfs *ipAmFs) AllocIp(subnet string) (net.IP, error) { 44 | if err := ipamfs.loadConf(); err != nil { 45 | return nil, err 46 | } 47 | ip, cidr, err := net.ParseCIDR(subnet) 48 | if err != nil { 49 | return nil, err 50 | } 51 | ip = ip.To4() 52 | ones, total := cidr.Mask.Size() 53 | bitmap := ipamfs.subnets[cidr.String()] 54 | if bitmap == nil || bitmap.Bitmap == nil { 55 | bitmap = InitBitMap(1 << (total - ones)) 56 | ipamfs.subnets[cidr.String()] = bitmap 57 | } 58 | 59 | // pos 为0 是网络号不能分配ip, 60 | for pos := 1; pos <= (1<<(total-ones) - 2); pos++ { 61 | if bitmap.BitExist(pos) { 62 | continue 63 | } 64 | bitmap.BitSet(pos) 65 | firstIP := ipToUint32(ip.Mask(cidr.Mask)) 66 | ip = uint32ToIP(firstIP + uint32(pos)) 67 | break 68 | } 69 | err = ipamfs.sync() 70 | if err != nil { 71 | return nil, err 72 | } 73 | return ip, nil 74 | } 75 | 76 | func (ipamfs *ipAmFs) ReleaseIp(subnet string, ip net.IP) error { 77 | if err := ipamfs.loadConf(); err != nil { 78 | return err 79 | } 80 | _, cidr, err := net.ParseCIDR(subnet) 81 | if err != nil { 82 | return err 83 | } 84 | bitmap := ipamfs.subnets[cidr.String()] 85 | if bitmap == nil { 86 | return nil 87 | } 88 | pos := getIPIndex(ip, cidr.Mask) 89 | bitmap.BitClean(pos) 90 | return ipamfs.sync() 91 | } 92 | 93 | func uint32ToIP(ip uint32) net.IP { 94 | return net.IPv4(byte(ip>>24), byte(ip>>16), byte(ip>>8), byte(ip)) 95 | } 96 | 97 | func getIPIndex(ip net.IP, mask net.IPMask) int { 98 | ipInt := ipToUint32(ip) 99 | firstIP := ipToUint32(ip.Mask(mask)) 100 | return int(ipInt - firstIP) 101 | } 102 | func ipToUint32(ip net.IP) uint32 { 103 | if ip == nil { 104 | return 0 105 | } 106 | ip = ip.To4() 107 | if ip == nil { 108 | return 0 109 | } 110 | return binary.BigEndian.Uint32(ip) 111 | } 112 | 113 | func (ipamfs *ipAmFs) loadConf() error { 114 | if _, err := os.Stat(ipamfs.path); err != nil { 115 | if os.IsNotExist(err) { 116 | return nil 117 | } else { 118 | return err 119 | } 120 | } 121 | data, err := os.ReadFile(ipamfs.path) 122 | if err != nil { 123 | return err 124 | } 125 | if len(ipamfs.subnets) == 0 { 126 | ipamfs.subnets = make(map[string]*bitMap) 127 | } 128 | if len(data) == 0 { 129 | return nil 130 | } 131 | err = json.Unmarshal(data, &ipamfs.subnets) 132 | if err != nil { 133 | return err 134 | } 135 | return nil 136 | } 137 | 138 | func (ipamfs *ipAmFs) sync() error { 139 | if _, err := os.Stat(ipamfs.path); err != nil { 140 | if os.IsNotExist(err) { 141 | os.Create(ipamfs.path) 142 | } else { 143 | return err 144 | } 145 | } 146 | data, err := json.Marshal(ipamfs.subnets) 147 | if err != nil { 148 | return err 149 | } 150 | err = os.WriteFile(ipamfs.path, data, 0644) 151 | if err != nil { 152 | return err 153 | } 154 | return nil 155 | } 156 | -------------------------------------------------------------------------------- /network/ipam_test.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "testing" 7 | ) 8 | 9 | func TestAlloc(t *testing.T) { 10 | subnet := "192.168.0.0/24" 11 | ip, err := IpAmfs.AllocIp(subnet) 12 | if err != nil { 13 | t.Fatal(err) 14 | } 15 | fmt.Println(ip.To4().String()) 16 | ip, err = IpAmfs.AllocIp(subnet) 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | fmt.Println(ip.To4().String()) 21 | //err = IpAmfs.ReleaseIp(subnet, ip) 22 | //if err != nil { 23 | // t.Fatal(err) 24 | //} 25 | ip, err = IpAmfs.AllocIp(subnet) 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | fmt.Println(ip.To4().String()) 30 | } 31 | 32 | func TestBitMap_BitClean(t *testing.T) { 33 | ip, cidr, err := net.ParseCIDR("192.168.0.1/24") 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | ip = ip.To4() 38 | ones, total := cidr.Mask.Size() 39 | fmt.Println(total - ones) 40 | fmt.Println(1 << (total - ones)) 41 | //bitM := InitBitMap(10) 42 | //bitM.BitSet(5) 43 | //fmt.Println(bitM.BitExist(5)) 44 | //bitM.BitClean(5) 45 | //fmt.Println(bitM.BitExist(5)) 46 | } 47 | -------------------------------------------------------------------------------- /network/network.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net" 7 | "os" 8 | "os/signal" 9 | "syscall" 10 | "tinydocker/config" 11 | "tinydocker/log" 12 | ) 13 | 14 | type NetConf struct { 15 | NetworkName string 16 | IpRange *net.IPNet 17 | Driver string 18 | BridgeName string 19 | BridgeIp *net.IPNet 20 | } 21 | 22 | type netMgr struct { 23 | Storage map[string]*NetConf 24 | } 25 | 26 | var NetMgr = &netMgr{ 27 | Storage: map[string]*NetConf{}, 28 | } 29 | 30 | func (n *netMgr) Sync() error { 31 | if _, err := os.Stat(config.NetStoragePath); err != nil { 32 | if os.IsNotExist(err) { 33 | os.Create(config.NetStoragePath) 34 | } else { 35 | return err 36 | } 37 | } 38 | data, err := json.Marshal(n) 39 | if err != nil { 40 | return err 41 | } 42 | err = os.WriteFile(config.NetStoragePath, data, 0644) 43 | if err != nil { 44 | return err 45 | } 46 | return nil 47 | } 48 | 49 | func (n *netMgr) LoadConf() error { 50 | if _, err := os.Stat(config.NetStoragePath); err != nil { 51 | if os.IsNotExist(err) { 52 | return nil 53 | } else { 54 | return err 55 | } 56 | } 57 | data, err := os.ReadFile(config.NetStoragePath) 58 | if err != nil { 59 | return err 60 | } 61 | if len(n.Storage) == 0 { 62 | n.Storage = make(map[string]*NetConf) 63 | } 64 | if len(data) == 0 { 65 | return nil 66 | } 67 | err = json.Unmarshal(data, n) 68 | if err != nil { 69 | return err 70 | } 71 | return nil 72 | } 73 | 74 | const ( 75 | defaultNetName = "testbridge" 76 | defaultSubnet = "192.169.0.1/24" 77 | ) 78 | 79 | type networktype string 80 | 81 | const ( 82 | BridgeNetworkType networktype = "bridge" 83 | ) 84 | 85 | func (n networktype) String() string { 86 | return string(n) 87 | } 88 | 89 | func Init() error { 90 | // 对默认网络进行初始化 91 | if err := BridgeDriver.CreateNetwork(defaultNetName, defaultSubnet, BridgeNetworkType); err != nil { 92 | return fmt.Errorf("err=%s", err) 93 | } 94 | if err := IpAmfs.SetIpUsed(defaultSubnet); err != nil { 95 | return err 96 | } 97 | return nil 98 | } 99 | 100 | func ConfigDefaultNetworkInNewNet(pid int) error { 101 | // 获取ip 102 | ip, err := IpAmfs.AllocIp(defaultSubnet) 103 | if err != nil { 104 | return fmt.Errorf("ipam alloc ip fail %s", err) 105 | } 106 | 107 | // 主机上创建 veth 设备,并连接到网桥上 108 | vethLink, networkConf, err := BridgeDriver.CrateVeth(defaultNetName) 109 | if err != nil { 110 | return fmt.Errorf("create veth fail err=%s", err) 111 | } 112 | // 主机上设置子进程网络命名空间 配置 113 | if err := BridgeDriver.setContainerIp(vethLink.PeerName, pid, ip, networkConf.BridgeIp); err != nil { 114 | return fmt.Errorf("setContainerIp fail err=%s peername=%s pid=%d ip=%v conf=%+v", err, vethLink.PeerName, pid, ip, networkConf) 115 | } 116 | // 通知子进程设置完毕 117 | log.Debug("parent process set ip success") 118 | return noticeSunProcessNetConfigFin(pid) 119 | } 120 | 121 | func noticeSunProcessNetConfigFin(pid int) error { 122 | return syscall.Kill(pid, syscall.SIGUSR2) 123 | } 124 | 125 | func WaitParentSetNewNet() { 126 | sigs := make(chan os.Signal, 1) 127 | signal.Notify(sigs, syscall.SIGUSR2) 128 | <-sigs 129 | log.Info("Received SIGUSR2 signal, prepare run container") 130 | } 131 | -------------------------------------------------------------------------------- /ubuntu-rootfs-16.04.06.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HobbyBear/tinydocker/9b887bae9189f25f24124742ce8b82fe4c51c824/ubuntu-rootfs-16.04.06.tar.gz -------------------------------------------------------------------------------- /workspace/workspace.go: -------------------------------------------------------------------------------- 1 | package workspace 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "syscall" 8 | ) 9 | 10 | const ( 11 | mntPath = "/root/mnt" 12 | workLayerPath = "/root/work" 13 | writeLayerPath = "/root/wlayer" 14 | imagePath = "ubuntu-base-16.04.6-base-amd64" 15 | mntOldPath = ".old" 16 | ) 17 | 18 | func workerLayer(containerName string) string { 19 | return fmt.Sprintf("%s/%s", workLayerPath, containerName) 20 | } 21 | 22 | func mntLayer(containerName string) string { 23 | return fmt.Sprintf("%s/%s", mntPath, containerName) 24 | } 25 | 26 | func writeLayer(containerName string) string { 27 | return fmt.Sprintf("%s/%s", writeLayerPath, containerName) 28 | } 29 | 30 | func mntOldLayer(containerName string) string { 31 | return fmt.Sprintf("%s/%s", mntLayer(containerName), mntOldPath) 32 | } 33 | 34 | func SetMntNamespace(containerName string) error { 35 | if err := os.MkdirAll(mntLayer(containerName), 0700); err != nil { 36 | return fmt.Errorf("mkdir mntlayer fail err=%s", err) 37 | } 38 | if err := os.MkdirAll(workerLayer(containerName), 0700); err != nil { 39 | return fmt.Errorf("mkdir work layer fail err=%s", err) 40 | } 41 | if err := os.MkdirAll(writeLayer(containerName), 0700); err != nil { 42 | return fmt.Errorf("mkdir write layer fail err=%s", err) 43 | } 44 | 45 | if err := syscall.Mount("overlay", mntLayer(containerName), "overlay", 0, 46 | fmt.Sprintf("upperdir=%s,lowerdir=%s,workdir=%s", 47 | writeLayer(containerName), imagePath, workerLayer(containerName))); err != nil { 48 | return fmt.Errorf("mount overlay fail err=%s", err) 49 | } 50 | 51 | if err := syscall.Mount("", "/", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil { 52 | return fmt.Errorf("reclare rootfs private fail err=%s", err) 53 | } 54 | 55 | if err := syscall.Mount(mntLayer(containerName), mntLayer(containerName), "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { 56 | return fmt.Errorf("mount rootfs in new mnt space fail err=%s", err) 57 | } 58 | if err := os.MkdirAll(mntOldLayer(containerName), 0700); err != nil { 59 | return fmt.Errorf("mkdir mnt old layer fail err=%s", err) 60 | } 61 | if err := syscall.PivotRoot(mntLayer(containerName), mntOldLayer(containerName)); err != nil { 62 | return fmt.Errorf("pivot root fail err=%s", err) 63 | } 64 | return nil 65 | } 66 | 67 | func delMntNamespace(path string) error { 68 | _, err := exec.Command("umount", path).CombinedOutput() 69 | if err != nil { 70 | return fmt.Errorf("umount fail path=%s err=%s", path, err) 71 | } 72 | if err := os.RemoveAll(path); err != nil { 73 | return fmt.Errorf("remove dir fail path=%s err=%s", path, err) 74 | } 75 | return nil 76 | } 77 | 78 | func DelMntNamespace(containerName string) error { 79 | if err := delMntNamespace(mntLayer(containerName)); err != nil { 80 | return err 81 | } 82 | if err := delMntNamespace(workerLayer(containerName)); err != nil { 83 | return err 84 | } 85 | if err := delMntNamespace(writeLayer(containerName)); err != nil { 86 | return err 87 | } 88 | return nil 89 | } 90 | --------------------------------------------------------------------------------