├── LICENSE ├── Makefile ├── README.md ├── iptables.bridge.sh ├── iptables.go ├── iptables.sh ├── main.go ├── network-namespace.sh ├── network.go ├── rootfs.tar └── simple_container └── simple_container.go /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-present cssivision 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | config: 2 | mkdir -p rootfs 3 | sudo tar -xvf rootfs.tar -C rootfs 4 | 5 | build: 6 | GOOS=linux GOARCH=amd64 go build 7 | 8 | run: 9 | sudo ./container run /bin/bash -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Container 2 | simple container implementation in go. 3 | 4 | ## Material 5 | 6 | 1, Unprivileged containers on Go. 7 | * [Part1: User and PID namespaces](http://lk4d4.darth.io/posts/unpriv1/) 8 | * [Part2: UTS namespace (setup namespaces)](http://lk4d4.darth.io/posts/unpriv2/) 9 | * [Part3: Mount namespace](http://lk4d4.darth.io/posts/unpriv3/) 10 | * [Part4: Network namespace](http://lk4d4.darth.io/posts/unpriv4/) 11 | 12 | 2, Docker implemented in around 100 lines of bash. 13 | * [https://github.com/p8952/bocker](https://github.com/p8952/bocker) 14 | 15 | 3, Code to accompany the "Namespaces in Go" series of articles. 16 | 17 | * [Part 1: Linux Namespaces](https://medium.com/@teddyking/linux-namespaces-850489d3ccf) 18 | * [Part 2: Namespaces in Go - Basics](https://medium.com/@teddyking/namespaces-in-go-basics-e3f0fc1ff69a) 19 | * [Part 3: Namespaces in Go - User](https://medium.com/@teddyking/namespaces-in-go-user-a54ef9476f2a) 20 | * [Part 4: Namespaces in Go - reexec](https://medium.com/@teddyking/namespaces-in-go-reexec-3d1295b91af8) 21 | * [Part 5: Namespaces in Go - Mount](https://medium.com/@teddyking/namespaces-in-go-mount-e4c04fe9fb29) 22 | * [Part 6: Namespaces in Go - Network](https://medium.com/@teddyking/namespaces-in-go-network-fdcf63e76100) 23 | * [Part 7: Namespaces in Go - UTS](https://medium.com/@teddyking/namespaces-in-go-uts-d47aebcdf00e) 24 | 25 | 4, Shell script to create network namespace. 26 | * [https://github.com/cssivision/container/blob/master/network-namespace.sh](https://github.com/cssivision/container/blob/master/network-namespace.sh) 27 | 28 | 5, Iptables 29 | * [a-deep-dive-into-iptables-and-netfilter-architecture](https://www.digitalocean.com/community/tutorials/a-deep-dive-into-iptables-and-netfilter-architecture) 30 | * [Security Guide IPTables](https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html-single/Security_Guide/index.html#sect-Security_Guide-IPTables) 31 | 32 | 33 | ## Network 34 | set dns resolver in container: 35 | ```sh 36 | echo "nameserver 8.8.8.8" >> /etc/resolv.conf 37 | ``` 38 | set ip forward in host: 39 | ```sh 40 | sysctl -w net.ipv4.ip_forward=1 41 | ``` 42 | 43 | ## Run 44 | ```sh 45 | make config 46 | make build 47 | make run 48 | ``` 49 | after this, you can `ping google.com`, this should work. 50 | -------------------------------------------------------------------------------- /iptables.bridge.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | DEVICE_NAME="container0" 6 | DEVICE_ADDR="10.88.37.1" 7 | 8 | iptables -P FORWARD DROP 9 | iptables -F FORWARD 10 | 11 | # Flush nat rules. 12 | iptables -t nat -F 13 | 14 | # Enable masquerading of ${DEVICE_ADDR}. 15 | iptables -t nat -A POSTROUTING -s ${DEVICE_ADDR}/24 -o eth0 -j MASQUERADE 16 | 17 | iptables -A FORWARD -i eth0 -o ${DEVICE_NAME} -j ACCEPT 18 | iptables -A FORWARD -o eth0 -i ${DEVICE_NAME} -j ACCEPT -------------------------------------------------------------------------------- /iptables.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/coreos/go-iptables/iptables" 7 | ) 8 | 9 | type IPTablesRule struct { 10 | table string 11 | chain string 12 | rulespec []string 13 | } 14 | 15 | func getIptablesRules(n, hostDevice, virtualDevice string) []IPTablesRule { 16 | 17 | return []IPTablesRule{ 18 | // iptables -t nat -A POSTROUTING -s ${DEVICE_ADDR}/24 -o eth0 -j MASQUERADE 19 | {"nat", "POSTROUTING", []string{"-s", n, "-o", hostDevice, "-j", "MASQUERADE"}}, 20 | // iptables -A FORWARD -i eth0 -o ${DEVICE_NAME} -j ACCEPT 21 | {"filter", "FORWARD", []string{"-i", hostDevice, "-o", virtualDevice, "-j", "ACCEPT"}}, 22 | // iptables -A FORWARD -o eth0 -i ${DEVICE_NAME} -j ACCEPT 23 | {"filter", "FORWARD", []string{"-o", hostDevice, "-i", virtualDevice, "-j", "ACCEPT"}}, 24 | } 25 | } 26 | 27 | func teardownIPTables(ipt iptables.IPTables, rules []IPTablesRule) { 28 | for _, rule := range rules { 29 | // We ignore errors here because if there's an error it's almost certainly because the rule 30 | // doesn't exist, which is fine (we don't need to delete rules that don't exist) 31 | ipt.Delete(rule.table, rule.chain, rule.rulespec...) 32 | } 33 | } 34 | 35 | func setIptables(iptablesRules []IPTablesRule) error { 36 | ipt, err := iptables.New() 37 | if err != nil { 38 | return fmt.Errorf("new iptable instance err: %v", err) 39 | } 40 | 41 | for _, rule := range iptablesRules { 42 | if err := ipt.AppendUnique(rule.table, rule.chain, rule.rulespec...); err != nil { 43 | return fmt.Errorf("failed to insert IPTables rule: %v", err) 44 | } 45 | } 46 | return nil 47 | } 48 | -------------------------------------------------------------------------------- /iptables.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | DEVICE_NAME="veth" 6 | DEVICE_ADDR="10.88.37.11" 7 | 8 | iptables -P FORWARD DROP 9 | iptables -F FORWARD 10 | 11 | # Flush nat rules. 12 | iptables -t nat -F 13 | 14 | # Enable masquerading of ${DEVICE_ADDR}. 15 | iptables -t nat -A POSTROUTING -s ${DEVICE_ADDR}/24 -o eth0 -j MASQUERADE 16 | 17 | iptables -A FORWARD -i eth0 -o ${DEVICE_NAME} -j ACCEPT 18 | iptables -A FORWARD -o eth0 -i ${DEVICE_NAME} -j ACCEPT -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "os/exec" 8 | "path" 9 | "syscall" 10 | ) 11 | 12 | func main() { 13 | switch os.Args[1] { 14 | case "run": 15 | parent() 16 | case "child": 17 | child() 18 | default: 19 | panic("what should I do") 20 | } 21 | } 22 | 23 | func parent() { 24 | cmd := exec.Command(os.Args[0], append([]string{"child"}, os.Args[2:]...)...) 25 | cmd.SysProcAttr = &syscall.SysProcAttr{ 26 | Cloneflags: syscall.CLONE_NEWUTS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNS | syscall.CLONE_NEWNET, 27 | } 28 | cmd.Stdin = os.Stdin 29 | cmd.Stdout = os.Stdout 30 | cmd.Stderr = os.Stderr 31 | 32 | if err := cmd.Start(); err != nil { 33 | panic(fmt.Sprintf("start parent err: %v", err)) 34 | } 35 | 36 | log.Printf("container PID: %d", cmd.Process.Pid) 37 | // set bridge and veth pair for container. 38 | if err := putIface(cmd.Process.Pid); err != nil { 39 | panic(fmt.Sprintf("putIface err: %v", err)) 40 | } 41 | 42 | if err := cmd.Wait(); err != nil { 43 | panic(fmt.Sprintf("wait parent err: %v\n", err)) 44 | } 45 | } 46 | 47 | func child() { 48 | fmt.Printf("start child......, pid %v\n", syscall.Getpid()) 49 | cmd := exec.Command(os.Args[2], os.Args[3:]...) 50 | cmd.Stdout = os.Stdout 51 | cmd.Stdin = os.Stdin 52 | cmd.Stderr = os.Stderr 53 | 54 | // setup environment for container. 55 | setup() 56 | if err := cmd.Run(); err != nil { 57 | panic(fmt.Sprintf("child panic: %v", err)) 58 | } 59 | } 60 | 61 | func setup() { 62 | if err := syscall.Sethostname([]byte("container")); err != nil { 63 | panic(fmt.Sprintf("Sethostname: %v", err)) 64 | } 65 | 66 | pwd, err := os.Getwd() 67 | if err != nil { 68 | panic(fmt.Sprintf("get pwd err: %v\n", err)) 69 | } 70 | 71 | target := path.Join(pwd, "rootfs") 72 | if err := syscall.Chroot(target); err != nil { 73 | panic(fmt.Sprintf("chroot err: %v\n", err)) 74 | } 75 | if err := os.Chdir("/"); err != nil { 76 | panic(fmt.Sprintf("chdir err: %v\n", err)) 77 | } 78 | 79 | if err := syscall.Mount("proc", "proc", "proc", 0, ""); err != nil { 80 | panic(fmt.Sprintf("failed to mount proc to %s: %v", target, err)) 81 | } 82 | 83 | lnk, err := waitForIface() 84 | if err != nil { 85 | panic(fmt.Sprintf("waitForIface err: %v", err)) 86 | } 87 | 88 | if err := setupIface(lnk); err != nil { 89 | panic(fmt.Sprintf("setupIface err: %v", err)) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /network-namespace.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | NS="ns1" 6 | VETH="veth1" 7 | VPEER="vpeer1" 8 | VETH_ADDR="10.200.1.1" 9 | VPEER_ADDR="10.200.1.2" 10 | 11 | if [[ $EUID -ne 0 ]]; then 12 | echo "You must be root to run this script" 13 | exit 1 14 | fi 15 | 16 | # Remove namespace if it exists. 17 | ip netns del $NS &>/dev/null 18 | 19 | # Create namespace 20 | ip netns add $NS 21 | 22 | # Create veth link. 23 | ip link add ${VETH} type veth peer name ${VPEER} 24 | 25 | # Add peer-1 to NS. 26 | ip link set ${VPEER} netns $NS 27 | 28 | # Setup IP address of ${VETH}. 29 | ip addr add ${VETH_ADDR}/24 dev ${VETH} 30 | ip link set ${VETH} up 31 | 32 | # Setup IP ${VPEER}. 33 | ip netns exec $NS ip addr add ${VPEER_ADDR}/24 dev ${VPEER} 34 | ip netns exec $NS ip link set ${VPEER} up 35 | ip netns exec $NS ip link set lo up 36 | ip netns exec $NS ip route add default via ${VETH_ADDR} 37 | 38 | # Enable IP-forwarding. 39 | echo 1 > /proc/sys/net/ipv4/ip_forward 40 | 41 | # Flush forward rules. 42 | iptables -P FORWARD DROP 43 | iptables -F FORWARD 44 | 45 | # Flush nat rules. 46 | iptables -t nat -F 47 | 48 | # Enable masquerading of 10.200.1.0. 49 | iptables -t nat -A POSTROUTING -s ${VETH_ADDR}/24 -o eth0 -j MASQUERADE 50 | 51 | iptables -A FORWARD -i eth0 -o ${VETH} -j ACCEPT 52 | iptables -A FORWARD -o eth0 -i ${VETH} -j ACCEPT 53 | 54 | # Get into namespace 55 | ip netns exec ${NS} /bin/bash --rcfile <(echo "PS1=\"${NS}> \"") -------------------------------------------------------------------------------- /network.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "time" 8 | 9 | "github.com/vishvananda/netlink" 10 | ) 11 | 12 | var ( 13 | bridgeName = "container0" 14 | hostDevice = "eth0" 15 | bridgeIP = "10.88.37.1/24" 16 | vethName = "veth" 17 | vethPeerName = "veth-peer" 18 | vethAddr = "10.88.37.11/24" 19 | vethPeerAddr = "10.88.37.22/24" 20 | ) 21 | 22 | func createBridge() (netlink.Link, error) { 23 | if br, err := netlink.LinkByName(bridgeName); err == nil { 24 | return br, nil 25 | } 26 | 27 | la := netlink.NewLinkAttrs() 28 | la.Name = bridgeName 29 | br := &netlink.Bridge{LinkAttrs: la} 30 | if err := netlink.LinkAdd(br); err != nil { 31 | return nil, fmt.Errorf("bridge creation: %v", err) 32 | } 33 | 34 | addr, err := netlink.ParseAddr(bridgeIP) 35 | if err != nil { 36 | return nil, fmt.Errorf("parse address %s: %v", bridgeIP, err) 37 | } 38 | 39 | if err := netlink.AddrAdd(br, addr); err != nil { 40 | return nil, fmt.Errorf("br add addr err: %v", err) 41 | } 42 | 43 | // sets up bridge ( ip link set dev container0 up ) 44 | if err := netlink.LinkSetUp(br); err != nil { 45 | return nil, err 46 | } 47 | return br, nil 48 | } 49 | 50 | type vethPair struct { 51 | Veth netlink.Link 52 | VethAddr string 53 | VethName string 54 | VethPeer netlink.Link 55 | VethPeerAddr string 56 | VethPeerName string 57 | } 58 | 59 | func createVethPair(pid int) (netlink.Link, error) { 60 | // get bridge to set as master for one side of veth-pair 61 | br, err := netlink.LinkByName(bridgeName) 62 | if err != nil { 63 | return nil, fmt.Errorf("find bridge err: %v", err) 64 | } 65 | 66 | // create *netlink.Veth 67 | la := netlink.NewLinkAttrs() 68 | la.Name = vethName 69 | la.MasterIndex = br.Attrs().Index 70 | 71 | vp := &netlink.Veth{LinkAttrs: la, PeerName: vethPeerName} 72 | netlink.LinkDel(vp) 73 | if err := netlink.LinkAdd(vp); err != nil { 74 | return nil, fmt.Errorf("veth pair creation %s <-> %s: %v", vethName, vethPeerName, err) 75 | } 76 | 77 | // get peer by name to put it to namespace 78 | peer, err := netlink.LinkByName(vethPeerName) 79 | if err != nil { 80 | return nil, fmt.Errorf("get peer interface: %v", err) 81 | } 82 | 83 | // put peer side to network namespace of specified PID 84 | if err := netlink.LinkSetNsPid(peer, pid); err != nil { 85 | return nil, fmt.Errorf("move peer to ns of %d: %v", pid, err) 86 | } 87 | 88 | addr, err := netlink.ParseAddr(vethAddr) 89 | if err != nil { 90 | return nil, fmt.Errorf("veth addr parse IP: %v", err) 91 | } 92 | 93 | if err := netlink.AddrAdd(vp, addr); err != nil { 94 | return nil, fmt.Errorf("veth addr add err: %v", err) 95 | } 96 | 97 | if err := netlink.LinkSetUp(vp); err != nil { 98 | return nil, fmt.Errorf("veth set up err: %v", err) 99 | } 100 | 101 | return vp, nil 102 | } 103 | 104 | func putIface(pid int) error { 105 | iptablesRules := getIptablesRules(bridgeIP, hostDevice, bridgeName) 106 | if err := setIptables(iptablesRules); err != nil { 107 | return fmt.Errorf("set iptables err: %v", err) 108 | } 109 | 110 | br, err := createBridge() 111 | if err != nil { 112 | return fmt.Errorf("create bridge err: %v", err) 113 | } 114 | veth, err := createVethPair(pid) 115 | if err != nil { 116 | return fmt.Errorf("create veth pair err: %v", err) 117 | } 118 | 119 | if err := netlink.LinkSetMaster(veth, br.(*netlink.Bridge)); err != nil { 120 | return fmt.Errorf("link set master err: %v", err) 121 | } 122 | 123 | return nil 124 | } 125 | 126 | func setupIface(link netlink.Link) error { 127 | // up loopback 128 | lo, err := netlink.LinkByName("lo") 129 | if err != nil { 130 | return fmt.Errorf("lo interface: %v", err) 131 | } 132 | if err := netlink.LinkSetUp(lo); err != nil { 133 | return fmt.Errorf("up veth: %v", err) 134 | } 135 | addr, err := netlink.ParseAddr(vethPeerAddr) 136 | if err != nil { 137 | return fmt.Errorf("parse IP: %v", err) 138 | } 139 | if err := netlink.AddrAdd(link, addr); err != nil { 140 | return fmt.Errorf("addr add err: %v", err) 141 | } 142 | 143 | if err := netlink.LinkSetUp(link); err != nil { 144 | return fmt.Errorf("link set up err: %v", err) 145 | } 146 | 147 | vethIP, _, err := net.ParseCIDR(vethAddr) 148 | if err != nil { 149 | return fmt.Errorf("parse veth ip err: %v", err) 150 | } 151 | route := &netlink.Route{ 152 | Scope: netlink.SCOPE_UNIVERSE, 153 | LinkIndex: link.Attrs().Index, 154 | Gw: vethIP, 155 | } 156 | 157 | if err := netlink.RouteAdd(route); err != nil { 158 | return fmt.Errorf("route add err: %v", err) 159 | } 160 | return nil 161 | } 162 | 163 | func waitForIface() (netlink.Link, error) { 164 | log.Println("Starting to wait for network interface") 165 | start := time.Now() 166 | for { 167 | fmt.Printf(".") 168 | if time.Since(start) > 5*time.Second { 169 | fmt.Printf("\n") 170 | return nil, fmt.Errorf("failed to find veth interface in 5 seconds") 171 | } 172 | // get list of all interfaces 173 | lst, err := netlink.LinkList() 174 | if err != nil { 175 | fmt.Printf("\n") 176 | return nil, err 177 | } 178 | for _, l := range lst { 179 | // if we found "veth" interface - it's time to continue setup 180 | if l.Type() == "veth" { 181 | fmt.Printf("\n") 182 | return l, nil 183 | } 184 | } 185 | time.Sleep(100 * time.Millisecond) 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /rootfs.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cssivision/container/0e8a40505375aacb6977fb7a2a86b1e6cac634fd/rootfs.tar -------------------------------------------------------------------------------- /simple_container/simple_container.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "os/exec" 8 | "path" 9 | "syscall" 10 | ) 11 | 12 | func main() { 13 | switch os.Args[1] { 14 | case "run": 15 | parent() 16 | case "child": 17 | child() 18 | default: 19 | panic("what should I do") 20 | } 21 | } 22 | 23 | func parent() { 24 | cmd := exec.Command(os.Args[0], append([]string{"child"}, os.Args[2:]...)...) 25 | cmd.SysProcAttr = &syscall.SysProcAttr{ 26 | Cloneflags: syscall.CLONE_NEWUTS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNS | syscall.CLONE_NEWNET, 27 | } 28 | cmd.Stdin = os.Stdin 29 | cmd.Stdout = os.Stdout 30 | cmd.Stderr = os.Stderr 31 | 32 | if err := cmd.Start(); err != nil { 33 | panic(fmt.Sprintf("start parent err: %v", err)) 34 | } 35 | 36 | log.Printf("container PID: %d", cmd.Process.Pid) 37 | // set bridge and veth pair for container. 38 | 39 | if err := cmd.Wait(); err != nil { 40 | panic(fmt.Sprintf("wait parent err: %v\n", err)) 41 | } 42 | } 43 | 44 | func child() { 45 | fmt.Printf("start child......, pid %v\n", syscall.Getpid()) 46 | cmd := exec.Command(os.Args[2], os.Args[3:]...) 47 | cmd.Stdout = os.Stdout 48 | cmd.Stdin = os.Stdin 49 | cmd.Stderr = os.Stderr 50 | 51 | // setup environment for container. 52 | // setup() 53 | if err := cmd.Run(); err != nil { 54 | panic(fmt.Sprintf("child panic: %v", err)) 55 | } 56 | } 57 | 58 | func setup() { 59 | if err := syscall.Sethostname([]byte("container")); err != nil { 60 | panic(fmt.Sprintf("Sethostname: %v", err)) 61 | } 62 | 63 | pwd, err := os.Getwd() 64 | if err != nil { 65 | panic(fmt.Sprintf("get pwd err: %v\n", err)) 66 | } 67 | 68 | target := path.Join(pwd, "rootfs") 69 | if err := syscall.Chroot(target); err != nil { 70 | panic(fmt.Sprintf("chroot err: %v\n", err)) 71 | } 72 | if err := os.Chdir("/"); err != nil { 73 | panic(fmt.Sprintf("chdir err: %v\n", err)) 74 | } 75 | 76 | if err := syscall.Mount("proc", "proc", "proc", 0, ""); err != nil { 77 | panic(fmt.Sprintf("failed to mount proc to %s: %v", target, err)) 78 | } 79 | 80 | } 81 | --------------------------------------------------------------------------------