├── go.mod ├── typ_none.go ├── typ_cloexec_nonblock.go ├── accept4.go ├── netns_others.go ├── export_linux_test.go ├── setbuffer_others.go ├── accept.go ├── go.sum ├── .github └── workflows │ ├── test.yml │ └── static-analysis.yml ├── doc.go ├── setbuffer_linux.go ├── LICENSE.md ├── README.md ├── CHANGELOG.md ├── conn_linux.go ├── netns_linux.go ├── conn_linux_test.go ├── internal └── sockettest │ └── sockettest.go ├── conn_test.go └── conn.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mdlayher/socket 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.9 7 | golang.org/x/net v0.14.0 8 | golang.org/x/sync v0.3.0 9 | golang.org/x/sys v0.11.0 10 | ) 11 | -------------------------------------------------------------------------------- /typ_none.go: -------------------------------------------------------------------------------- 1 | //go:build darwin 2 | // +build darwin 3 | 4 | package socket 5 | 6 | const ( 7 | // These operating systems do not support CLOEXEC and NONBLOCK socket 8 | // options. 9 | flagCLOEXEC = false 10 | socketFlags = 0 11 | ) 12 | -------------------------------------------------------------------------------- /typ_cloexec_nonblock.go: -------------------------------------------------------------------------------- 1 | //go:build !darwin 2 | // +build !darwin 3 | 4 | package socket 5 | 6 | import "golang.org/x/sys/unix" 7 | 8 | const ( 9 | // These operating systems support CLOEXEC and NONBLOCK socket options. 10 | flagCLOEXEC = true 11 | socketFlags = unix.SOCK_CLOEXEC | unix.SOCK_NONBLOCK 12 | ) 13 | -------------------------------------------------------------------------------- /accept4.go: -------------------------------------------------------------------------------- 1 | //go:build dragonfly || freebsd || illumos || linux 2 | // +build dragonfly freebsd illumos linux 3 | 4 | package socket 5 | 6 | import ( 7 | "golang.org/x/sys/unix" 8 | ) 9 | 10 | const sysAccept = "accept4" 11 | 12 | // accept wraps accept4(2). 13 | func accept(fd, flags int) (int, unix.Sockaddr, error) { 14 | return unix.Accept4(fd, flags) 15 | } 16 | -------------------------------------------------------------------------------- /netns_others.go: -------------------------------------------------------------------------------- 1 | //go:build !linux 2 | // +build !linux 3 | 4 | package socket 5 | 6 | import ( 7 | "fmt" 8 | "runtime" 9 | ) 10 | 11 | // withNetNS returns an error on non-Linux systems. 12 | func withNetNS(_ int, _ func() (*Conn, error)) (*Conn, error) { 13 | return nil, fmt.Errorf("socket: Linux network namespace support is not available on %s", runtime.GOOS) 14 | } 15 | -------------------------------------------------------------------------------- /export_linux_test.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | package socket 5 | 6 | // A NetNS is an exported wrapper for netNS for tests. 7 | type NetNS struct{ *netNS } 8 | 9 | // ThreadNetNS is an exported wrapper for threadNetNS for tests. 10 | func ThreadNetNS() (*NetNS, error) { 11 | ns, err := threadNetNS() 12 | if err != nil { 13 | return nil, err 14 | } 15 | 16 | return &NetNS{ns}, nil 17 | } 18 | -------------------------------------------------------------------------------- /setbuffer_others.go: -------------------------------------------------------------------------------- 1 | //go:build !linux 2 | // +build !linux 3 | 4 | package socket 5 | 6 | import "golang.org/x/sys/unix" 7 | 8 | // setReadBuffer wraps the SO_RCVBUF setsockopt(2) option. 9 | func (c *Conn) setReadBuffer(bytes int) error { 10 | return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF, bytes) 11 | } 12 | 13 | // setWriteBuffer wraps the SO_SNDBUF setsockopt(2) option. 14 | func (c *Conn) setWriteBuffer(bytes int) error { 15 | return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF, bytes) 16 | } 17 | -------------------------------------------------------------------------------- /accept.go: -------------------------------------------------------------------------------- 1 | //go:build !dragonfly && !freebsd && !illumos && !linux 2 | // +build !dragonfly,!freebsd,!illumos,!linux 3 | 4 | package socket 5 | 6 | import ( 7 | "fmt" 8 | "runtime" 9 | 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | const sysAccept = "accept" 14 | 15 | // accept wraps accept(2). 16 | func accept(fd, flags int) (int, unix.Sockaddr, error) { 17 | if flags != 0 { 18 | // These operating systems have no support for flags to accept(2). 19 | return 0, nil, fmt.Errorf("socket: Conn.Accept flags are ineffective on %s", runtime.GOOS) 20 | } 21 | 22 | return unix.Accept(fd) 23 | } 24 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= 2 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 3 | golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= 4 | golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= 5 | golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= 6 | golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= 7 | golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= 8 | golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 9 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - "*" 7 | pull_request: 8 | branches: 9 | - "*" 10 | 11 | jobs: 12 | build: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | go-version: ["1.20", "1.21.0"] 17 | os: [ubuntu-latest, macos-latest] 18 | runs-on: ${{ matrix.os }} 19 | 20 | steps: 21 | - name: Set up Go 22 | uses: actions/setup-go@v3 23 | with: 24 | go-version: ${{ matrix.go-version }} 25 | id: go 26 | 27 | - name: Check out code into the Go module directory 28 | uses: actions/checkout@v3 29 | 30 | - name: Run tests 31 | run: go test -race ./... 32 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package socket provides a low-level network connection type which integrates 2 | // with Go's runtime network poller to provide asynchronous I/O and deadline 3 | // support. 4 | // 5 | // This package focuses on UNIX-like operating systems which make use of BSD 6 | // sockets system call APIs. It is meant to be used as a foundation for the 7 | // creation of operating system-specific socket packages, for socket families 8 | // such as Linux's AF_NETLINK, AF_PACKET, or AF_VSOCK. This package should not 9 | // be used directly in end user applications. 10 | // 11 | // Any use of package socket should be guarded by build tags, as one would also 12 | // use when importing the syscall or golang.org/x/sys packages. 13 | package socket 14 | -------------------------------------------------------------------------------- /setbuffer_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | package socket 5 | 6 | import "golang.org/x/sys/unix" 7 | 8 | // setReadBuffer wraps the SO_RCVBUF{,FORCE} setsockopt(2) options. 9 | func (c *Conn) setReadBuffer(bytes int) error { 10 | err := c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, bytes) 11 | if err != nil { 12 | err = c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF, bytes) 13 | } 14 | return err 15 | } 16 | 17 | // setWriteBuffer wraps the SO_SNDBUF{,FORCE} setsockopt(2) options. 18 | func (c *Conn) setWriteBuffer(bytes int) error { 19 | err := c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, bytes) 20 | if err != nil { 21 | err = c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF, bytes) 22 | } 23 | return err 24 | } 25 | -------------------------------------------------------------------------------- /.github/workflows/static-analysis.yml: -------------------------------------------------------------------------------- 1 | name: Static Analysis 2 | 3 | on: 4 | push: 5 | branches: 6 | - "*" 7 | pull_request: 8 | branches: 9 | - "*" 10 | 11 | jobs: 12 | build: 13 | strategy: 14 | matrix: 15 | go-version: ["1.21.0"] 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Set up Go 20 | uses: actions/setup-go@v3 21 | with: 22 | go-version: ${{ matrix.go-version }} 23 | id: go 24 | 25 | - name: Check out code into the Go module directory 26 | uses: actions/checkout@v3 27 | 28 | - name: Install staticcheck 29 | run: go install honnef.co/go/tools/cmd/staticcheck@latest 30 | 31 | - name: Print staticcheck version 32 | run: staticcheck -version 33 | 34 | - name: Run staticcheck 35 | run: staticcheck ./... 36 | 37 | - name: Run go vet 38 | run: go vet ./... 39 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (C) 2021 Matt Layher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # socket [![Test Status](https://github.com/mdlayher/socket/workflows/Test/badge.svg)](https://github.com/mdlayher/socket/actions) [![Go Reference](https://pkg.go.dev/badge/github.com/mdlayher/socket.svg)](https://pkg.go.dev/github.com/mdlayher/socket) [![Go Report Card](https://goreportcard.com/badge/github.com/mdlayher/socket)](https://goreportcard.com/report/github.com/mdlayher/socket) 2 | 3 | Package `socket` provides a low-level network connection type which integrates 4 | with Go's runtime network poller to provide asynchronous I/O and deadline 5 | support. MIT Licensed. 6 | 7 | This package focuses on UNIX-like operating systems which make use of BSD 8 | sockets system call APIs. It is meant to be used as a foundation for the 9 | creation of operating system-specific socket packages, for socket families such 10 | as Linux's `AF_NETLINK`, `AF_PACKET`, or `AF_VSOCK`. This package should not be 11 | used directly in end user applications. 12 | 13 | Any use of package socket should be guarded by build tags, as one would also 14 | use when importing the `syscall` or `golang.org/x/sys` packages. 15 | 16 | ## Stability 17 | 18 | See the [CHANGELOG](./CHANGELOG.md) file for a description of changes between 19 | releases. 20 | 21 | This package only supports the two most recent major versions of Go, mirroring 22 | Go's own release policy. Older versions of Go may lack critical features and bug 23 | fixes which are necessary for this package to function correctly. 24 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## v0.5.1 4 | 5 | - [Improvement]: revert `go.mod` to Go 1.20 to [resolve an issue around Go 6 | module version upgrades](https://github.com/mdlayher/socket/issues/13). 7 | 8 | ## v0.5.0 9 | 10 | **This is the first release of package socket that only supports Go 1.21+. 11 | Users on older versions of Go must use v0.4.1.** 12 | 13 | - [Improvement]: drop support for older versions of Go. 14 | - [New API]: add `socket.Conn` wrappers for various `Getsockopt` and 15 | `Setsockopt` system calls. 16 | 17 | ## v0.4.1 18 | 19 | - [Bug Fix] [commit](https://github.com/mdlayher/socket/commit/2a14ceef4da279de1f957c5761fffcc6c87bbd3b): 20 | ensure `socket.Conn` can be used with non-socket file descriptors by handling 21 | `ENOTSOCK` in the constructor. 22 | 23 | ## v0.4.0 24 | 25 | **This is the first release of package socket that only supports Go 1.18+. 26 | Users on older versions of Go must use v0.3.0.** 27 | 28 | - [Improvement]: drop support for older versions of Go so we can begin using 29 | modern versions of `x/sys` and other dependencies. 30 | 31 | ## v0.3.0 32 | 33 | **This is the last release of package socket that supports Go 1.17 and below.** 34 | 35 | - [New API/API change] [PR](https://github.com/mdlayher/socket/pull/8): 36 | numerous `socket.Conn` methods now support context cancelation. Future 37 | releases will continue adding support as needed. 38 | - New `ReadContext` and `WriteContext` methods. 39 | - `Connect`, `Recvfrom`, `Recvmsg`, `Sendmsg`, and `Sendto` methods now accept 40 | a context. 41 | - `Sendto` parameter order was also fixed to match the underlying syscall. 42 | 43 | ## v0.2.3 44 | 45 | - [New API] [commit](https://github.com/mdlayher/socket/commit/a425d96e0f772c053164f8ce4c9c825380a98086): 46 | `socket.Conn` has new `Pidfd*` methods for wrapping the `pidfd_*(2)` family of 47 | system calls. 48 | 49 | ## v0.2.2 50 | 51 | - [New API] [commit](https://github.com/mdlayher/socket/commit/a2429f1dfe8ec2586df5a09f50ead865276cd027): 52 | `socket.Conn` has new `IoctlKCM*` methods for wrapping `ioctl(2)` for `AF_KCM` 53 | operations. 54 | 55 | ## v0.2.1 56 | 57 | - [New API] [commit](https://github.com/mdlayher/socket/commit/b18ddbe9caa0e34552b4409a3aa311cb460d2f99): 58 | `socket.Conn` has a new `SetsockoptPacketMreq` method for wrapping 59 | `setsockopt(2)` for `AF_PACKET` socket options. 60 | 61 | ## v0.2.0 62 | 63 | - [New API] [commit](https://github.com/mdlayher/socket/commit/6e912a68523c45e5fd899239f4b46c402dd856da): 64 | `socket.FileConn` can be used to create a `socket.Conn` from an existing 65 | `os.File`, which may be provided by systemd socket activation or another 66 | external mechanism. 67 | - [API change] [commit](https://github.com/mdlayher/socket/commit/66d61f565188c23fe02b24099ddc856d538bf1a7): 68 | `socket.Conn.Connect` now returns the `unix.Sockaddr` value provided by 69 | `getpeername(2)`, since we have to invoke that system call anyway to verify 70 | that a connection to a remote peer was successfully established. 71 | - [Bug Fix] [commit](https://github.com/mdlayher/socket/commit/b60b2dbe0ac3caff2338446a150083bde8c5c19c): 72 | check the correct error from `unix.GetsockoptInt` in the `socket.Conn.Connect` 73 | method. Thanks @vcabbage! 74 | 75 | ## v0.1.2 76 | 77 | - [Bug Fix]: `socket.Conn.Connect` now properly checks the `SO_ERROR` socket 78 | option value after calling `connect(2)` to verify whether or not a connection 79 | could successfully be established. This means that `Connect` should now report 80 | an error for an `AF_INET` TCP connection refused or `AF_VSOCK` connection 81 | reset by peer. 82 | - [New API]: add `socket.Conn.Getpeername` for use in `Connect`, but also for 83 | use by external callers. 84 | 85 | ## v0.1.1 86 | 87 | - [New API]: `socket.Conn` now has `CloseRead`, `CloseWrite`, and `Shutdown` 88 | methods. 89 | - [Improvement]: internal rework to more robustly handle various errors. 90 | 91 | ## v0.1.0 92 | 93 | - Initial unstable release. Most functionality has been developed and ported 94 | from package [`netlink`](https://github.com/mdlayher/netlink). 95 | -------------------------------------------------------------------------------- /conn_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | package socket 5 | 6 | import ( 7 | "context" 8 | "os" 9 | "unsafe" 10 | 11 | "golang.org/x/net/bpf" 12 | "golang.org/x/sys/unix" 13 | ) 14 | 15 | // IoctlKCMClone wraps ioctl(2) for unix.KCMClone values, but returns a Conn 16 | // rather than a raw file descriptor. 17 | func (c *Conn) IoctlKCMClone() (*Conn, error) { 18 | info, err := controlT(c, "ioctl", unix.IoctlKCMClone) 19 | if err != nil { 20 | return nil, err 21 | } 22 | 23 | // Successful clone, wrap in a Conn for use by the caller. 24 | return New(int(info.Fd), c.name) 25 | } 26 | 27 | // IoctlKCMAttach wraps ioctl(2) for unix.KCMAttach values. 28 | func (c *Conn) IoctlKCMAttach(info unix.KCMAttach) error { 29 | return c.control("ioctl", func(fd int) error { 30 | return unix.IoctlKCMAttach(fd, info) 31 | }) 32 | } 33 | 34 | // IoctlKCMUnattach wraps ioctl(2) for unix.KCMUnattach values. 35 | func (c *Conn) IoctlKCMUnattach(info unix.KCMUnattach) error { 36 | return c.control("ioctl", func(fd int) error { 37 | return unix.IoctlKCMUnattach(fd, info) 38 | }) 39 | } 40 | 41 | // PidfdGetfd wraps pidfd_getfd(2) for a Conn which wraps a pidfd, but returns a 42 | // Conn rather than a raw file descriptor. 43 | func (c *Conn) PidfdGetfd(targetFD, flags int) (*Conn, error) { 44 | outFD, err := controlT(c, "pidfd_getfd", func(fd int) (int, error) { 45 | return unix.PidfdGetfd(fd, targetFD, flags) 46 | }) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | // Successful getfd, wrap in a Conn for use by the caller. 52 | return New(outFD, c.name) 53 | } 54 | 55 | // PidfdSendSignal wraps pidfd_send_signal(2) for a Conn which wraps a Linux 56 | // pidfd. 57 | func (c *Conn) PidfdSendSignal(sig unix.Signal, info *unix.Siginfo, flags int) error { 58 | return c.control("pidfd_send_signal", func(fd int) error { 59 | return unix.PidfdSendSignal(fd, sig, info, flags) 60 | }) 61 | } 62 | 63 | // SetBPF attaches an assembled BPF program to a Conn. 64 | func (c *Conn) SetBPF(filter []bpf.RawInstruction) error { 65 | // We can't point to the first instruction in the array if no instructions 66 | // are present. 67 | if len(filter) == 0 { 68 | return os.NewSyscallError("setsockopt", unix.EINVAL) 69 | } 70 | 71 | prog := unix.SockFprog{ 72 | Len: uint16(len(filter)), 73 | Filter: (*unix.SockFilter)(unsafe.Pointer(&filter[0])), 74 | } 75 | 76 | return c.SetsockoptSockFprog(unix.SOL_SOCKET, unix.SO_ATTACH_FILTER, &prog) 77 | } 78 | 79 | // RemoveBPF removes a BPF filter from a Conn. 80 | func (c *Conn) RemoveBPF() error { 81 | // 0 argument is ignored. 82 | return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_DETACH_FILTER, 0) 83 | } 84 | 85 | // SetsockoptPacketMreq wraps setsockopt(2) for unix.PacketMreq values. 86 | func (c *Conn) SetsockoptPacketMreq(level, opt int, mreq *unix.PacketMreq) error { 87 | return c.control("setsockopt", func(fd int) error { 88 | return unix.SetsockoptPacketMreq(fd, level, opt, mreq) 89 | }) 90 | } 91 | 92 | // SetsockoptSockFprog wraps setsockopt(2) for unix.SockFprog values. 93 | func (c *Conn) SetsockoptSockFprog(level, opt int, fprog *unix.SockFprog) error { 94 | return c.control("setsockopt", func(fd int) error { 95 | return unix.SetsockoptSockFprog(fd, level, opt, fprog) 96 | }) 97 | } 98 | 99 | // GetsockoptTpacketStats wraps getsockopt(2) for unix.TpacketStats values. 100 | func (c *Conn) GetsockoptTpacketStats(level, name int) (*unix.TpacketStats, error) { 101 | return controlT(c, "getsockopt", func(fd int) (*unix.TpacketStats, error) { 102 | return unix.GetsockoptTpacketStats(fd, level, name) 103 | }) 104 | } 105 | 106 | // GetsockoptTpacketStatsV3 wraps getsockopt(2) for unix.TpacketStatsV3 values. 107 | func (c *Conn) GetsockoptTpacketStatsV3(level, name int) (*unix.TpacketStatsV3, error) { 108 | return controlT(c, "getsockopt", func(fd int) (*unix.TpacketStatsV3, error) { 109 | return unix.GetsockoptTpacketStatsV3(fd, level, name) 110 | }) 111 | } 112 | 113 | // Waitid wraps waitid(2). 114 | func (c *Conn) Waitid(idType int, info *unix.Siginfo, options int, rusage *unix.Rusage) error { 115 | return c.read(context.Background(), "waitid", func(fd int) error { 116 | return unix.Waitid(idType, fd, info, options, rusage) 117 | }) 118 | } 119 | -------------------------------------------------------------------------------- /netns_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | package socket 5 | 6 | import ( 7 | "errors" 8 | "fmt" 9 | "os" 10 | "runtime" 11 | 12 | "golang.org/x/sync/errgroup" 13 | "golang.org/x/sys/unix" 14 | ) 15 | 16 | // errNetNSDisabled is returned when network namespaces are unavailable on 17 | // a given system. 18 | var errNetNSDisabled = errors.New("socket: Linux network namespaces are not enabled on this system") 19 | 20 | // withNetNS invokes fn within the context of the network namespace specified by 21 | // fd, while also managing the logic required to safely do so by manipulating 22 | // thread-local state. 23 | func withNetNS(fd int, fn func() (*Conn, error)) (*Conn, error) { 24 | var ( 25 | eg errgroup.Group 26 | conn *Conn 27 | ) 28 | 29 | eg.Go(func() error { 30 | // Retrieve and store the calling OS thread's network namespace so the 31 | // thread can be reassigned to it after creating a socket in another network 32 | // namespace. 33 | runtime.LockOSThread() 34 | 35 | ns, err := threadNetNS() 36 | if err != nil { 37 | // No thread-local manipulation, unlock. 38 | runtime.UnlockOSThread() 39 | return err 40 | } 41 | defer ns.Close() 42 | 43 | // Beyond this point, the thread's network namespace is poisoned. Do not 44 | // unlock the OS thread until all network namespace manipulation completes 45 | // to avoid returning to the caller with altered thread-local state. 46 | 47 | // Assign the current OS thread the goroutine is locked to to the given 48 | // network namespace. 49 | if err := ns.Set(fd); err != nil { 50 | return err 51 | } 52 | 53 | // Attempt Conn creation and unconditionally restore the original namespace. 54 | c, err := fn() 55 | if nerr := ns.Restore(); nerr != nil { 56 | // Failed to restore original namespace. Return an error and allow the 57 | // runtime to terminate the thread. 58 | if err == nil { 59 | _ = c.Close() 60 | } 61 | 62 | return nerr 63 | } 64 | 65 | // No more thread-local state manipulation; return the new Conn. 66 | runtime.UnlockOSThread() 67 | conn = c 68 | return err 69 | }) 70 | 71 | if err := eg.Wait(); err != nil { 72 | return nil, err 73 | } 74 | 75 | return conn, nil 76 | } 77 | 78 | // A netNS is a handle that can manipulate network namespaces. 79 | // 80 | // Operations performed on a netNS must use runtime.LockOSThread before 81 | // manipulating any network namespaces. 82 | type netNS struct { 83 | // The handle to a network namespace. 84 | f *os.File 85 | 86 | // Indicates if network namespaces are disabled on this system, and thus 87 | // operations should become a no-op or return errors. 88 | disabled bool 89 | } 90 | 91 | // threadNetNS constructs a netNS using the network namespace of the calling 92 | // thread. If the namespace is not the default namespace, runtime.LockOSThread 93 | // should be invoked first. 94 | func threadNetNS() (*netNS, error) { 95 | return fileNetNS(fmt.Sprintf("/proc/self/task/%d/ns/net", unix.Gettid())) 96 | } 97 | 98 | // fileNetNS opens file and creates a netNS. fileNetNS should only be called 99 | // directly in tests. 100 | func fileNetNS(file string) (*netNS, error) { 101 | f, err := os.Open(file) 102 | switch { 103 | case err == nil: 104 | return &netNS{f: f}, nil 105 | case os.IsNotExist(err): 106 | // Network namespaces are not enabled on this system. Use this signal 107 | // to return errors elsewhere if the caller explicitly asks for a 108 | // network namespace to be set. 109 | return &netNS{disabled: true}, nil 110 | default: 111 | return nil, err 112 | } 113 | } 114 | 115 | // Close releases the handle to a network namespace. 116 | func (n *netNS) Close() error { 117 | return n.do(func() error { return n.f.Close() }) 118 | } 119 | 120 | // FD returns a file descriptor which represents the network namespace. 121 | func (n *netNS) FD() int { 122 | if n.disabled { 123 | // No reasonable file descriptor value in this case, so specify a 124 | // non-existent one. 125 | return -1 126 | } 127 | 128 | return int(n.f.Fd()) 129 | } 130 | 131 | // Restore restores the original network namespace for the calling thread. 132 | func (n *netNS) Restore() error { 133 | return n.do(func() error { return n.Set(n.FD()) }) 134 | } 135 | 136 | // Set sets a new network namespace for the current thread using fd. 137 | func (n *netNS) Set(fd int) error { 138 | return n.do(func() error { 139 | return os.NewSyscallError("setns", unix.Setns(fd, unix.CLONE_NEWNET)) 140 | }) 141 | } 142 | 143 | // do runs fn if network namespaces are enabled on this system. 144 | func (n *netNS) do(fn func() error) error { 145 | if n.disabled { 146 | return errNetNSDisabled 147 | } 148 | 149 | return fn() 150 | } 151 | -------------------------------------------------------------------------------- /conn_linux_test.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | package socket_test 5 | 6 | import ( 7 | "context" 8 | "errors" 9 | "fmt" 10 | "math" 11 | "net" 12 | "os" 13 | "runtime" 14 | "testing" 15 | 16 | "github.com/google/go-cmp/cmp" 17 | "github.com/mdlayher/socket" 18 | "github.com/mdlayher/socket/internal/sockettest" 19 | "golang.org/x/sync/errgroup" 20 | "golang.org/x/sys/unix" 21 | ) 22 | 23 | func TestLinuxConnBuffers(t *testing.T) { 24 | t.Parallel() 25 | 26 | // This test isn't necessarily Linux-specific but it's easiest to verify on 27 | // Linux because we can rely on the kernel's documented buffer size 28 | // manipulation behavior. 29 | c, err := socket.Socket(unix.AF_INET, unix.SOCK_STREAM, 0, "tcpv4", nil) 30 | if err != nil { 31 | t.Fatalf("failed to open socket: %v", err) 32 | } 33 | defer c.Close() 34 | 35 | const ( 36 | set = 8192 37 | 38 | // Per socket(7): 39 | // 40 | // "The kernel doubles this value (to allow space for 41 | // book‐keeping overhead) when it is set using setsockopt(2), 42 | // and this doubled value is returned by getsockopt(2)."" 43 | want = set * 2 44 | ) 45 | 46 | if err := c.SetReadBuffer(set); err != nil { 47 | t.Fatalf("failed to set read buffer size: %v", err) 48 | } 49 | 50 | if err := c.SetWriteBuffer(set); err != nil { 51 | t.Fatalf("failed to set write buffer size: %v", err) 52 | } 53 | 54 | // Now that we've set the buffers, we can check the size by asking the 55 | // kernel using SyscallConn and getsockopt. 56 | 57 | rcv, err := c.ReadBuffer() 58 | if err != nil { 59 | t.Fatalf("failed to get read buffer size: %v", err) 60 | } 61 | 62 | snd, err := c.WriteBuffer() 63 | if err != nil { 64 | t.Fatalf("failed to get write buffer size: %v", err) 65 | } 66 | 67 | if diff := cmp.Diff(want, rcv); diff != "" { 68 | t.Fatalf("unexpected read buffer size (-want +got):\n%s", diff) 69 | } 70 | if diff := cmp.Diff(want, snd); diff != "" { 71 | t.Fatalf("unexpected write buffer size (-want +got):\n%s", diff) 72 | } 73 | } 74 | 75 | func TestLinuxNetworkNamespaces(t *testing.T) { 76 | t.Parallel() 77 | 78 | l, err := sockettest.Listen(0, nil) 79 | if err != nil { 80 | t.Fatalf("failed to create listener: %v", err) 81 | } 82 | defer l.Close() 83 | 84 | addrC := make(chan net.Addr, 1) 85 | 86 | var eg errgroup.Group 87 | eg.Go(func() error { 88 | // We are poisoning this thread by creating a new anonymous network 89 | // namespace. Do not unlock the OS thread so that the runtime will kill 90 | // this thread when the goroutine exits. 91 | runtime.LockOSThread() 92 | 93 | if err := unix.Unshare(unix.CLONE_NEWNET); err != nil { 94 | // Explicit wrap to check for permission denied. 95 | return fmt.Errorf("failed to unshare network namespace: %w", err) 96 | } 97 | 98 | ns, err := socket.ThreadNetNS() 99 | if err != nil { 100 | return fmt.Errorf("failed to get listener thread's network namespace: %v", err) 101 | } 102 | 103 | // This OS thread has been moved to a different network namespace and 104 | // thus we should also be able to start a listener on the same port. 105 | l, err := sockettest.Listen( 106 | l.Addr().(*net.TCPAddr).Port, 107 | &socket.Config{NetNS: ns.FD()}, 108 | ) 109 | if err != nil { 110 | return fmt.Errorf("failed to create listener in network namespace: %v", err) 111 | } 112 | defer l.Close() 113 | 114 | addrC <- l.Addr() 115 | return nil 116 | }) 117 | 118 | if err := eg.Wait(); err != nil { 119 | if errors.Is(err, os.ErrPermission) { 120 | t.Skipf("skipping, permission denied: %v", err) 121 | } 122 | 123 | t.Fatalf("failed to run listener thread: %v", err) 124 | } 125 | 126 | select { 127 | case addr := <-addrC: 128 | if diff := cmp.Diff(l.Addr(), addr); diff != "" { 129 | t.Fatalf("unexpected network address (-want +got):\n%s", diff) 130 | } 131 | default: 132 | t.Fatal("listener thread did not return its local address") 133 | } 134 | } 135 | 136 | func TestLinuxDialVsockNoListener(t *testing.T) { 137 | t.Parallel() 138 | 139 | // See https://github.com/mdlayher/vsock/issues/47 and 140 | // https://github.com/lxc/lxd/pull/9894 for context on this test. 141 | c, err := socket.Socket(unix.AF_VSOCK, unix.SOCK_STREAM, 0, "vsock", nil) 142 | if err != nil { 143 | t.Fatalf("failed to open socket: %v", err) 144 | } 145 | defer c.Close() 146 | 147 | // Given a (hopefully) non-existent listener on localhost, expect 148 | // ECONNRESET. 149 | _, err = c.Connect(context.Background(), &unix.SockaddrVM{ 150 | CID: unix.VMADDR_CID_LOCAL, 151 | Port: math.MaxUint32, 152 | }) 153 | if err == nil { 154 | // See https://github.com/mdlayher/socket/issues/4. 155 | t.Skipf("skipping, expected error but vsock successfully connected to local service") 156 | } 157 | 158 | want := os.NewSyscallError("connect", unix.ECONNRESET) 159 | if diff := cmp.Diff(want, err); diff != "" { 160 | t.Fatalf("unexpected connect error (-want +got):\n%s", diff) 161 | } 162 | } 163 | 164 | func TestLinuxOpenPIDFD(t *testing.T) { 165 | // Verify we can use regular files with socket by properly handling 166 | // ENOTSOCK, as is the case with pidfds. 167 | fd, err := unix.PidfdOpen(1, unix.PIDFD_NONBLOCK) 168 | if err != nil { 169 | t.Fatalf("failed to open pidfd for init: %v", err) 170 | } 171 | 172 | c, err := socket.New(fd, "pidfd") 173 | if err != nil { 174 | t.Fatalf("failed to open Conn for pidfd: %v", err) 175 | } 176 | _ = c.Close() 177 | } 178 | 179 | func TestLinuxBindToDevice(t *testing.T) { 180 | t.Parallel() 181 | 182 | c, err := socket.Socket(unix.AF_INET, unix.SOCK_STREAM, 0, "tcpv4", nil) 183 | if err != nil { 184 | t.Fatalf("failed to open socket: %v", err) 185 | } 186 | defer c.Close() 187 | 188 | // Assumes the loopback interface is always the first device on Linux 189 | // machines. 190 | const ( 191 | name = "lo" 192 | index = 1 193 | ) 194 | 195 | if err := c.SetsockoptString(unix.SOL_SOCKET, unix.SO_BINDTODEVICE, name); err != nil { 196 | t.Fatalf("failed to bind to device: %v", err) 197 | } 198 | 199 | gotName, err := c.GetsockoptString(unix.SOL_SOCKET, unix.SO_BINDTODEVICE) 200 | if err != nil { 201 | t.Fatalf("failed to get bound interface name: %v", err) 202 | } 203 | if diff := cmp.Diff(name, gotName); diff != "" { 204 | t.Fatalf("unexpected interface name (-want +got):\n%s", diff) 205 | } 206 | 207 | gotIndex, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_BINDTOIFINDEX) 208 | if err != nil { 209 | t.Fatalf("failed to get bound interface index: %v", err) 210 | } 211 | if diff := cmp.Diff(index, gotIndex); diff != "" { 212 | t.Fatalf("unexpected interface index (-want +got):\n%s", diff) 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /internal/sockettest/sockettest.go: -------------------------------------------------------------------------------- 1 | // Package sockettest implements net.Listener and net.Conn types based on 2 | // *socket.Conn for use in the package's tests. 3 | package sockettest 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "io" 9 | "net" 10 | "os" 11 | "time" 12 | 13 | "github.com/mdlayher/socket" 14 | "golang.org/x/sys/unix" 15 | ) 16 | 17 | // A Listener is a net.Listener which can be extended with context support. 18 | type Listener struct { 19 | addr *net.TCPAddr 20 | c *socket.Conn 21 | ctx context.Context 22 | } 23 | 24 | func (l *Listener) Context(ctx context.Context) *Listener { 25 | l.ctx = ctx 26 | return l 27 | } 28 | 29 | // Listen creates an IPv6 TCP net.Listener backed by a *socket.Conn on the 30 | // specified port with optional configuration. Context ctx will be passed 31 | // to accept and accepted connections. 32 | func Listen(port int, cfg *socket.Config) (*Listener, error) { 33 | c, err := socket.Socket(unix.AF_INET6, unix.SOCK_STREAM, 0, "tcpv6-server", cfg) 34 | if err != nil { 35 | return nil, fmt.Errorf("failed to open socket: %v", err) 36 | } 37 | 38 | // Be sure to close the Conn if any of the system calls fail before we 39 | // return the Conn to the caller. 40 | 41 | if err := c.Bind(&unix.SockaddrInet6{Port: port}); err != nil { 42 | _ = c.Close() 43 | return nil, fmt.Errorf("failed to bind: %v", err) 44 | } 45 | 46 | if err := c.Listen(unix.SOMAXCONN); err != nil { 47 | _ = c.Close() 48 | return nil, fmt.Errorf("failed to listen: %v", err) 49 | } 50 | 51 | sa, err := c.Getsockname() 52 | if err != nil { 53 | _ = c.Close() 54 | return nil, fmt.Errorf("failed to getsockname: %v", err) 55 | } 56 | 57 | return &Listener{ 58 | addr: newTCPAddr(sa), 59 | c: c, 60 | }, nil 61 | } 62 | 63 | // FileListener creates an IPv6 TCP net.Listener backed by a *socket.Conn from 64 | // the input file. 65 | func FileListener(f *os.File) (*Listener, error) { 66 | c, err := socket.FileConn(f, "tcpv6-server") 67 | if err != nil { 68 | return nil, fmt.Errorf("failed to open file conn: %v", err) 69 | } 70 | 71 | sa, err := c.Getsockname() 72 | if err != nil { 73 | _ = c.Close() 74 | return nil, fmt.Errorf("failed to getsockname: %v", err) 75 | } 76 | 77 | return &Listener{ 78 | addr: newTCPAddr(sa), 79 | c: c, 80 | }, nil 81 | } 82 | 83 | func (l *Listener) Addr() net.Addr { return l.addr } 84 | func (l *Listener) Close() error { return l.c.Close() } 85 | func (l *Listener) Accept() (net.Conn, error) { 86 | ctx := context.Background() 87 | if l.ctx != nil { 88 | ctx = l.ctx 89 | } 90 | 91 | // SOCK_CLOEXEC and SOCK_NONBLOCK set automatically by Accept when possible. 92 | conn, rsa, err := l.c.Accept(ctx, 0) 93 | if err != nil { 94 | return nil, err 95 | } 96 | 97 | lsa, err := conn.Getsockname() 98 | if err != nil { 99 | // Don't leak the Conn if the system call fails. 100 | _ = conn.Close() 101 | return nil, err 102 | } 103 | 104 | c := &Conn{ 105 | Conn: conn, 106 | local: newTCPAddr(lsa), 107 | remote: newTCPAddr(rsa), 108 | } 109 | 110 | if l.ctx != nil { 111 | return c.Context(l.ctx), nil 112 | } 113 | 114 | return c, nil 115 | } 116 | 117 | // A Conn is a net.Conn which can be extended with context support. 118 | type Conn struct { 119 | Conn *socket.Conn 120 | local, remote *net.TCPAddr 121 | ctx context.Context 122 | } 123 | 124 | func (c *Conn) Context(ctx context.Context) *Conn { 125 | c.ctx = ctx 126 | return c 127 | } 128 | 129 | // Dial creates an IPv4 or IPv6 TCP net.Conn backed by a *socket.Conn with 130 | // optional configuration. 131 | func Dial(ctx context.Context, addr net.Addr, cfg *socket.Config) (*Conn, error) { 132 | ta, ok := addr.(*net.TCPAddr) 133 | if !ok { 134 | return nil, fmt.Errorf("expected *net.TCPAddr, but got: %T", addr) 135 | } 136 | 137 | var ( 138 | family int 139 | name string 140 | sa unix.Sockaddr 141 | ) 142 | 143 | if ta.IP.To16() != nil && ta.IP.To4() == nil { 144 | // IPv6. 145 | family = unix.AF_INET6 146 | name = "tcpv6-client" 147 | 148 | var sa6 unix.SockaddrInet6 149 | copy(sa6.Addr[:], ta.IP) 150 | sa6.Port = ta.Port 151 | 152 | sa = &sa6 153 | } else { 154 | // IPv4. 155 | family = unix.AF_INET 156 | name = "tcpv4-client" 157 | 158 | var sa4 unix.SockaddrInet4 159 | copy(sa4.Addr[:], ta.IP.To4()) 160 | sa4.Port = ta.Port 161 | 162 | sa = &sa4 163 | } 164 | 165 | c, err := socket.Socket(family, unix.SOCK_STREAM, 0, name, cfg) 166 | if err != nil { 167 | return nil, fmt.Errorf("failed to open socket: %v", err) 168 | } 169 | 170 | // Be sure to close the Conn if any of the system calls fail before we 171 | // return the Conn to the caller. 172 | 173 | rsa, err := c.Connect(ctx, sa) 174 | if err != nil { 175 | _ = c.Close() 176 | // Don't wrap, we want the raw error for tests. 177 | return nil, err 178 | } 179 | 180 | lsa, err := c.Getsockname() 181 | if err != nil { 182 | _ = c.Close() 183 | return nil, err 184 | } 185 | 186 | return &Conn{ 187 | Conn: c, 188 | local: newTCPAddr(lsa), 189 | remote: newTCPAddr(rsa), 190 | }, nil 191 | } 192 | 193 | func (c *Conn) Close() error { return c.Conn.Close() } 194 | func (c *Conn) CloseRead() error { return c.Conn.CloseRead() } 195 | func (c *Conn) CloseWrite() error { return c.Conn.CloseWrite() } 196 | func (c *Conn) LocalAddr() net.Addr { return c.local } 197 | func (c *Conn) RemoteAddr() net.Addr { return c.remote } 198 | func (c *Conn) SetDeadline(t time.Time) error { return c.Conn.SetDeadline(t) } 199 | func (c *Conn) SetReadDeadline(t time.Time) error { return c.Conn.SetReadDeadline(t) } 200 | func (c *Conn) SetWriteDeadline(t time.Time) error { return c.Conn.SetWriteDeadline(t) } 201 | 202 | func (c *Conn) Read(b []byte) (int, error) { 203 | var ( 204 | n int 205 | err error 206 | ) 207 | 208 | if c.ctx != nil { 209 | n, err = c.Conn.ReadContext(c.ctx, b) 210 | } else { 211 | n, err = c.Conn.Read(b) 212 | } 213 | 214 | return n, opError("read", err) 215 | } 216 | 217 | func (c *Conn) Write(b []byte) (int, error) { 218 | var ( 219 | n int 220 | err error 221 | ) 222 | 223 | if c.ctx != nil { 224 | n, err = c.Conn.WriteContext(c.ctx, b) 225 | } else { 226 | n, err = c.Conn.Write(b) 227 | } 228 | 229 | return n, opError("write", err) 230 | } 231 | 232 | func opError(op string, err error) error { 233 | // This is still a bit simplistic but sufficient for nettest.TestConn. 234 | switch err { 235 | case nil: 236 | return nil 237 | case io.EOF: 238 | return io.EOF 239 | default: 240 | return &net.OpError{Op: op, Err: err} 241 | } 242 | } 243 | 244 | func newTCPAddr(sa unix.Sockaddr) *net.TCPAddr { 245 | switch sa := sa.(type) { 246 | case *unix.SockaddrInet4: 247 | return &net.TCPAddr{ 248 | IP: sa.Addr[:], 249 | Port: sa.Port, 250 | } 251 | case *unix.SockaddrInet6: 252 | return &net.TCPAddr{ 253 | IP: sa.Addr[:], 254 | Port: sa.Port, 255 | } 256 | } 257 | 258 | panic("unknown address family") 259 | } 260 | -------------------------------------------------------------------------------- /conn_test.go: -------------------------------------------------------------------------------- 1 | package socket_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "math" 10 | "net" 11 | "net/netip" 12 | "os" 13 | "runtime" 14 | "sync" 15 | "testing" 16 | "time" 17 | 18 | "github.com/google/go-cmp/cmp" 19 | "github.com/google/go-cmp/cmp/cmpopts" 20 | "github.com/mdlayher/socket/internal/sockettest" 21 | "golang.org/x/net/nettest" 22 | "golang.org/x/sync/errgroup" 23 | "golang.org/x/sys/unix" 24 | ) 25 | 26 | func TestConn(t *testing.T) { 27 | t.Parallel() 28 | 29 | tests := []struct { 30 | name string 31 | pipe nettest.MakePipe 32 | }{ 33 | // Standard library plumbing. 34 | { 35 | name: "basic", 36 | pipe: makePipe( 37 | func() (net.Listener, error) { 38 | return sockettest.Listen(0, nil) 39 | }, 40 | func(addr net.Addr) (net.Conn, error) { 41 | return sockettest.Dial(context.Background(), addr, nil) 42 | }, 43 | ), 44 | }, 45 | // Our own implementations which have context cancelation support. 46 | { 47 | name: "context", 48 | pipe: makePipe( 49 | func() (net.Listener, error) { 50 | l, err := sockettest.Listen(0, nil) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | return l.Context(context.Background()), nil 56 | }, 57 | func(addr net.Addr) (net.Conn, error) { 58 | ctx := context.Background() 59 | 60 | c, err := sockettest.Dial(ctx, addr, nil) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | return c.Context(ctx), nil 66 | }, 67 | ), 68 | }, 69 | } 70 | 71 | for _, tt := range tests { 72 | tt := tt 73 | t.Run(tt.name, func(t *testing.T) { 74 | t.Parallel() 75 | 76 | nettest.TestConn(t, tt.pipe) 77 | 78 | // Our own extensions to TestConn. 79 | t.Run("CloseReadWrite", func(t *testing.T) { timeoutWrapper(t, tt.pipe, testCloseReadWrite) }) 80 | }) 81 | } 82 | } 83 | 84 | func TestDialTCPNoListener(t *testing.T) { 85 | t.Parallel() 86 | 87 | // See https://github.com/mdlayher/vsock/issues/47 and 88 | // https://github.com/lxc/lxd/pull/9894 for context on this test. 89 | // 90 | // 91 | // Given a (hopefully) non-existent listener on localhost, expect 92 | // ECONNREFUSED. 93 | _, err := sockettest.Dial(context.Background(), &net.TCPAddr{ 94 | IP: net.IPv6loopback, 95 | Port: math.MaxUint16, 96 | }, nil) 97 | 98 | want := os.NewSyscallError("connect", unix.ECONNREFUSED) 99 | if diff := cmp.Diff(want, err); diff != "" { 100 | t.Fatalf("unexpected connect error (-want +got):\n%s", diff) 101 | } 102 | } 103 | 104 | func TestDialTCPContextCanceledBefore(t *testing.T) { 105 | t.Parallel() 106 | 107 | // Context is canceled before any dialing can take place. 108 | ctx, cancel := context.WithCancel(context.Background()) 109 | cancel() 110 | 111 | _, err := sockettest.Dial(ctx, &net.TCPAddr{ 112 | IP: net.IPv6loopback, 113 | Port: math.MaxUint16, 114 | }, nil) 115 | 116 | if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" { 117 | t.Fatalf("unexpected connect error (-want +got):\n%s", diff) 118 | } 119 | } 120 | 121 | var ipTests = []struct { 122 | name string 123 | ip netip.Addr 124 | }{ 125 | // It appears we can dial addresses in the documentation range and 126 | // connect will hang, which is perfect for this test case. 127 | { 128 | name: "IPv4", 129 | ip: netip.MustParseAddr("192.0.2.1"), 130 | }, 131 | { 132 | name: "IPv6", 133 | ip: netip.MustParseAddr("2001:db8::1"), 134 | }, 135 | } 136 | 137 | func TestDialTCPContextCanceledDuring(t *testing.T) { 138 | t.Parallel() 139 | 140 | for _, tt := range ipTests { 141 | tt := tt 142 | t.Run(tt.name, func(t *testing.T) { 143 | t.Parallel() 144 | 145 | // Context is canceled during a blocking operation but without an 146 | // explicit deadline passed on the context. 147 | ctx, cancel := context.WithCancel(context.Background()) 148 | defer cancel() 149 | 150 | go func() { 151 | time.Sleep(1 * time.Second) 152 | cancel() 153 | }() 154 | 155 | _, err := sockettest.Dial(ctx, &net.TCPAddr{ 156 | IP: tt.ip.AsSlice(), 157 | Port: math.MaxUint16, 158 | }, nil) 159 | if errors.Is(err, unix.ENETUNREACH) || errors.Is(err, unix.EHOSTUNREACH) { 160 | t.Skipf("skipping, no outbound %s connectivity: %v", tt.name, err) 161 | } 162 | 163 | if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" { 164 | t.Fatalf("unexpected connect error (-want +got):\n%s", diff) 165 | } 166 | }) 167 | } 168 | } 169 | 170 | func TestDialTCPContextDeadlineExceeded(t *testing.T) { 171 | t.Parallel() 172 | 173 | for _, tt := range ipTests { 174 | tt := tt 175 | t.Run(tt.name, func(t *testing.T) { 176 | t.Parallel() 177 | 178 | // Dialing is canceled after the deadline passes. 179 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 180 | defer cancel() 181 | 182 | _, err := sockettest.Dial(ctx, &net.TCPAddr{ 183 | IP: tt.ip.AsSlice(), 184 | Port: math.MaxUint16, 185 | }, nil) 186 | if errors.Is(err, unix.ENETUNREACH) || errors.Is(err, unix.EHOSTUNREACH) { 187 | t.Skipf("skipping, no outbound %s connectivity: %v", tt.name, err) 188 | } 189 | 190 | if diff := cmp.Diff(context.DeadlineExceeded, err, cmpopts.EquateErrors()); diff != "" { 191 | t.Fatalf("unexpected connect error (-want +got):\n%s", diff) 192 | } 193 | }) 194 | } 195 | } 196 | 197 | func TestListenerAcceptTCPContextCanceledBefore(t *testing.T) { 198 | t.Parallel() 199 | 200 | l, err := sockettest.Listen(0, nil) 201 | if err != nil { 202 | t.Fatalf("failed to listen: %v", err) 203 | } 204 | defer l.Close() 205 | 206 | // Context is canceled before accept can take place. 207 | ctx, cancel := context.WithCancel(context.Background()) 208 | cancel() 209 | 210 | _, err = l.Context(ctx).Accept() 211 | if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" { 212 | t.Fatalf("unexpected accept error (-want +got):\n%s", diff) 213 | } 214 | } 215 | 216 | func TestListenerAcceptTCPContextCanceledDuring(t *testing.T) { 217 | t.Parallel() 218 | 219 | l, err := sockettest.Listen(0, nil) 220 | if err != nil { 221 | t.Fatalf("failed to listen: %v", err) 222 | } 223 | defer l.Close() 224 | 225 | // Context is canceled during a blocking operation but without an 226 | // explicit deadline passed on the context. 227 | ctx, cancel := context.WithCancel(context.Background()) 228 | defer cancel() 229 | 230 | go func() { 231 | time.Sleep(1 * time.Second) 232 | cancel() 233 | }() 234 | 235 | _, err = l.Context(ctx).Accept() 236 | if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" { 237 | t.Fatalf("unexpected accept error (-want +got):\n%s", diff) 238 | } 239 | } 240 | 241 | func TestListenerAcceptTCPContextDeadlineExceeded(t *testing.T) { 242 | t.Parallel() 243 | 244 | l, err := sockettest.Listen(0, nil) 245 | if err != nil { 246 | t.Fatalf("failed to listen: %v", err) 247 | } 248 | defer l.Close() 249 | 250 | // Accept is canceled after the deadline passes. 251 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 252 | defer cancel() 253 | 254 | _, err = l.Context(ctx).Accept() 255 | if diff := cmp.Diff(context.DeadlineExceeded, err, cmpopts.EquateErrors()); diff != "" { 256 | t.Fatalf("unexpected accept error (-want +got):\n%s", diff) 257 | } 258 | } 259 | 260 | func TestListenerConnTCPContextCanceled(t *testing.T) { 261 | t.Parallel() 262 | 263 | l, err := sockettest.Listen(0, nil) 264 | if err != nil { 265 | t.Fatalf("failed to open listener: %v", err) 266 | } 267 | defer l.Close() 268 | 269 | // Accept a single connection. 270 | var eg errgroup.Group 271 | eg.Go(func() error { 272 | c, err := l.Accept() 273 | if err != nil { 274 | return fmt.Errorf("failed to accept: %v", err) 275 | } 276 | defer c.Close() 277 | 278 | // Context is canceled during recvfrom. 279 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 280 | defer cancel() 281 | 282 | b := make([]byte, 1024) 283 | _, _, err = c.(*sockettest.Conn).Conn.Recvfrom(ctx, b, 0) 284 | return err 285 | }) 286 | 287 | c, err := net.Dial(l.Addr().Network(), l.Addr().String()) 288 | if err != nil { 289 | t.Fatalf("failed to dial listener: %v", err) 290 | } 291 | defer c.Close() 292 | 293 | // Client never sends data, so we wait until ctx cancel and errgroup return. 294 | if diff := cmp.Diff(context.DeadlineExceeded, eg.Wait(), cmpopts.EquateErrors()); diff != "" { 295 | t.Fatalf("unexpected recvfrom error (-want +got):\n%s", diff) 296 | } 297 | } 298 | 299 | func TestListenerConnTCPContextDeadlineExceeded(t *testing.T) { 300 | t.Parallel() 301 | 302 | l, err := sockettest.Listen(0, nil) 303 | if err != nil { 304 | t.Fatalf("failed to open listener: %v", err) 305 | } 306 | defer l.Close() 307 | 308 | // Accept a single connection. 309 | var eg errgroup.Group 310 | eg.Go(func() error { 311 | c, err := l.Accept() 312 | if err != nil { 313 | return fmt.Errorf("failed to accept: %v", err) 314 | } 315 | defer c.Close() 316 | 317 | // Context is canceled before recvfrom can take place. 318 | ctx, cancel := context.WithCancel(context.Background()) 319 | cancel() 320 | 321 | b := make([]byte, 1024) 322 | _, _, err = c.(*sockettest.Conn).Conn.Recvfrom(ctx, b, 0) 323 | return err 324 | }) 325 | 326 | c, err := net.Dial(l.Addr().Network(), l.Addr().String()) 327 | if err != nil { 328 | t.Fatalf("failed to dial listener: %v", err) 329 | } 330 | defer c.Close() 331 | 332 | // Client never sends data, so we wait until ctx cancel and errgroup return. 333 | if diff := cmp.Diff(context.Canceled, eg.Wait(), cmpopts.EquateErrors()); diff != "" { 334 | t.Fatalf("unexpected recvfrom error (-want +got):\n%s", diff) 335 | } 336 | } 337 | 338 | func TestFileConn(t *testing.T) { 339 | t.Parallel() 340 | 341 | // Use raw system calls to set up the socket since we assume anything being 342 | // passed into a FileConn is set up by another system, such as systemd's 343 | // socket activation. 344 | fd, err := unix.Socket(unix.AF_INET6, unix.SOCK_STREAM, 0) 345 | if err != nil { 346 | t.Fatalf("failed to open socket: %v", err) 347 | } 348 | 349 | // Bind to loopback, any available port. 350 | sa := &unix.SockaddrInet6{Addr: [16]byte{15: 0x01}} 351 | if err := unix.Bind(fd, sa); err != nil { 352 | t.Fatalf("failed to bind: %v", err) 353 | } 354 | 355 | if err := unix.Listen(fd, unix.SOMAXCONN); err != nil { 356 | t.Fatalf("failed to listen: %v", err) 357 | } 358 | 359 | // The socket should be ready, create a blocking file which is ready to be 360 | // passed into FileConn via the FileListener helper. 361 | f := os.NewFile(uintptr(fd), "tcpv6-listener") 362 | defer f.Close() 363 | 364 | l, err := sockettest.FileListener(f) 365 | if err != nil { 366 | t.Fatalf("failed to open file listener: %v", err) 367 | } 368 | defer l.Close() 369 | 370 | // To exercise the listener, attempt to accept and then immediately close a 371 | // single TCPv6 connection. Dial to the listener from the main goroutine and 372 | // wait for everything to finish. 373 | var eg errgroup.Group 374 | eg.Go(func() error { 375 | c, err := l.Accept() 376 | if err != nil { 377 | return fmt.Errorf("failed to accept: %v", err) 378 | } 379 | 380 | _ = c.Close() 381 | return nil 382 | }) 383 | 384 | c, err := net.Dial(l.Addr().Network(), l.Addr().String()) 385 | if err != nil { 386 | t.Fatalf("failed to dial listener: %v", err) 387 | } 388 | _ = c.Close() 389 | 390 | if err := eg.Wait(); err != nil { 391 | t.Fatalf("failed to wait for listener goroutine: %v", err) 392 | } 393 | } 394 | 395 | // Use our TCP net.Listener and net.Conn implementations backed by *socket.Conn 396 | // and run compliance tests with nettest.TestConn. 397 | // 398 | // This nettest.MakePipe function is adapted from nettest's own tests: 399 | // https://github.com/golang/net/blob/master/nettest/conntest_test.go 400 | // 401 | // Copyright 2016 The Go Authors. All rights reserved. Use of this source 402 | // code is governed by a BSD-style license that can be found in the LICENSE 403 | // file. 404 | func makePipe( 405 | listen func() (net.Listener, error), 406 | dial func(addr net.Addr) (net.Conn, error), 407 | ) nettest.MakePipe { 408 | return func() (c1, c2 net.Conn, stop func(), err error) { 409 | ln, err := listen() 410 | if err != nil { 411 | return nil, nil, nil, err 412 | } 413 | 414 | // Start a connection between two endpoints. 415 | var err1, err2 error 416 | done := make(chan bool) 417 | go func() { 418 | c2, err2 = ln.Accept() 419 | close(done) 420 | }() 421 | c1, err1 = dial(ln.Addr()) 422 | <-done 423 | 424 | stop = func() { 425 | if err1 == nil { 426 | c1.Close() 427 | } 428 | if err2 == nil { 429 | c2.Close() 430 | } 431 | ln.Close() 432 | } 433 | 434 | switch { 435 | case err1 != nil: 436 | stop() 437 | return nil, nil, nil, err1 438 | case err2 != nil: 439 | stop() 440 | return nil, nil, nil, err2 441 | default: 442 | return c1, c2, stop, nil 443 | } 444 | } 445 | } 446 | 447 | // Copied from x/net/nettest, pending acceptance of: 448 | // https://go-review.googlesource.com/c/net/+/372815 449 | type connTester func(t *testing.T, c1, c2 net.Conn) 450 | 451 | func timeoutWrapper(t *testing.T, mp nettest.MakePipe, f connTester) { 452 | t.Helper() 453 | c1, c2, stop, err := mp() 454 | if err != nil { 455 | t.Fatalf("unable to make pipe: %v", err) 456 | } 457 | var once sync.Once 458 | defer once.Do(func() { stop() }) 459 | timer := time.AfterFunc(time.Minute, func() { 460 | once.Do(func() { 461 | t.Error("test timed out; terminating pipe") 462 | stop() 463 | }) 464 | }) 465 | defer timer.Stop() 466 | f(t, c1, c2) 467 | } 468 | 469 | // testCloseReadWrite tests that net.Conns which also implement the optional 470 | // CloseRead and CloseWrite methods can be half-closed correctly. 471 | func testCloseReadWrite(t *testing.T, c1, c2 net.Conn) { 472 | // TODO(mdlayher): investigate why Mac/Windows errors are so different. 473 | if runtime.GOOS != "linux" { 474 | t.Skip("skipping, not supported on non-Linux platforms") 475 | } 476 | 477 | type closerConn interface { 478 | net.Conn 479 | CloseRead() error 480 | CloseWrite() error 481 | } 482 | 483 | cc1, ok1 := c1.(closerConn) 484 | cc2, ok2 := c2.(closerConn) 485 | if !ok1 || !ok2 { 486 | // Both c1 and c2 must implement closerConn to proceed. 487 | return 488 | } 489 | 490 | var wg sync.WaitGroup 491 | wg.Add(2) 492 | defer wg.Wait() 493 | 494 | go func() { 495 | defer wg.Done() 496 | 497 | // Writing succeeds at first but should result in a permanent "broken 498 | // pipe" error after closing the write side of the net.Conn. 499 | b := make([]byte, 64) 500 | if err := chunkedCopy(cc1, bytes.NewReader(b)); err != nil { 501 | t.Errorf("unexpected initial cc1.Write error: %v", err) 502 | } 503 | if err := cc1.CloseWrite(); err != nil { 504 | t.Errorf("unexpected cc1.CloseWrite error: %v", err) 505 | } 506 | _, err := cc1.Write(b) 507 | if nerr, ok := err.(net.Error); !ok || nerr.Timeout() { 508 | t.Errorf("unexpected final cc1.Write error: %v", err) 509 | } 510 | }() 511 | 512 | go func() { 513 | defer wg.Done() 514 | 515 | // Reading succeeds at first but should result in an EOF error after 516 | // closing the read side of the net.Conn. 517 | if err := chunkedCopy(io.Discard, cc2); err != nil { 518 | t.Errorf("unexpected initial cc2.Read error: %v", err) 519 | } 520 | if err := cc2.CloseRead(); err != nil { 521 | t.Errorf("unexpected cc2.CloseRead error: %v", err) 522 | } 523 | if _, err := cc2.Read(make([]byte, 64)); err != io.EOF { 524 | t.Errorf("unexpected final cc2.Read error: %v", err) 525 | } 526 | }() 527 | } 528 | 529 | // chunkedCopy copies from r to w in fixed-width chunks to avoid 530 | // causing a Write that exceeds the maximum packet size for packet-based 531 | // connections like "unixpacket". 532 | // We assume that the maximum packet size is at least 1024. 533 | func chunkedCopy(w io.Writer, r io.Reader) error { 534 | b := make([]byte, 1024) 535 | _, err := io.CopyBuffer(struct{ io.Writer }{w}, struct{ io.Reader }{r}, b) 536 | return err 537 | } 538 | -------------------------------------------------------------------------------- /conn.go: -------------------------------------------------------------------------------- 1 | package socket 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "io" 7 | "os" 8 | "sync" 9 | "sync/atomic" 10 | "syscall" 11 | "time" 12 | 13 | "golang.org/x/sys/unix" 14 | ) 15 | 16 | // Lock in an expected public interface for convenience. 17 | var _ interface { 18 | io.ReadWriteCloser 19 | syscall.Conn 20 | SetDeadline(t time.Time) error 21 | SetReadDeadline(t time.Time) error 22 | SetWriteDeadline(t time.Time) error 23 | } = &Conn{} 24 | 25 | // A Conn is a low-level network connection which integrates with Go's runtime 26 | // network poller to provide asynchronous I/O and deadline support. 27 | // 28 | // Many of a Conn's blocking methods support net.Conn deadlines as well as 29 | // cancelation via context. Note that passing a context with a deadline set will 30 | // override any of the previous deadlines set by calls to the SetDeadline family 31 | // of methods. 32 | type Conn struct { 33 | // Indicates whether or not Conn.Close has been called. Must be accessed 34 | // atomically. Atomics definitions must come first in the Conn struct. 35 | closed uint32 36 | 37 | // A unique name for the Conn which is also associated with derived file 38 | // descriptors such as those created by accept(2). 39 | name string 40 | 41 | // facts contains information we have determined about Conn to trigger 42 | // alternate behavior in certain functions. 43 | facts facts 44 | 45 | // Provides access to the underlying file registered with the runtime 46 | // network poller, and arbitrary raw I/O calls. 47 | fd *os.File 48 | rc syscall.RawConn 49 | } 50 | 51 | // facts contains facts about a Conn. 52 | type facts struct { 53 | // isStream reports whether this is a streaming descriptor, as opposed to a 54 | // packet-based descriptor like a UDP socket. 55 | isStream bool 56 | 57 | // zeroReadIsEOF reports Whether a zero byte read indicates EOF. This is 58 | // false for a message based socket connection. 59 | zeroReadIsEOF bool 60 | } 61 | 62 | // A Config contains options for a Conn. 63 | type Config struct { 64 | // NetNS specifies the Linux network namespace the Conn will operate in. 65 | // This option is unsupported on other operating systems. 66 | // 67 | // If set (non-zero), Conn will enter the specified network namespace and an 68 | // error will occur in Socket if the operation fails. 69 | // 70 | // If not set (zero), a best-effort attempt will be made to enter the 71 | // network namespace of the calling thread: this means that any changes made 72 | // to the calling thread's network namespace will also be reflected in Conn. 73 | // If this operation fails (due to lack of permissions or because network 74 | // namespaces are disabled by kernel configuration), Socket will not return 75 | // an error, and the Conn will operate in the default network namespace of 76 | // the process. This enables non-privileged use of Conn in applications 77 | // which do not require elevated privileges. 78 | // 79 | // Entering a network namespace is a privileged operation (root or 80 | // CAP_SYS_ADMIN are required), and most applications should leave this set 81 | // to 0. 82 | NetNS int 83 | } 84 | 85 | // High-level methods which provide convenience over raw system calls. 86 | 87 | // Close closes the underlying file descriptor for the Conn, which also causes 88 | // all in-flight I/O operations to immediately unblock and return errors. Any 89 | // subsequent uses of Conn will result in EBADF. 90 | func (c *Conn) Close() error { 91 | // The caller has expressed an intent to close the socket, so immediately 92 | // increment s.closed to force further calls to result in EBADF before also 93 | // closing the file descriptor to unblock any outstanding operations. 94 | // 95 | // Because other operations simply check for s.closed != 0, we will permit 96 | // double Close, which would increment s.closed beyond 1. 97 | if atomic.AddUint32(&c.closed, 1) != 1 { 98 | // Multiple Close calls. 99 | return nil 100 | } 101 | 102 | return os.NewSyscallError("close", c.fd.Close()) 103 | } 104 | 105 | // CloseRead shuts down the reading side of the Conn. Most callers should just 106 | // use Close. 107 | func (c *Conn) CloseRead() error { return c.Shutdown(unix.SHUT_RD) } 108 | 109 | // CloseWrite shuts down the writing side of the Conn. Most callers should just 110 | // use Close. 111 | func (c *Conn) CloseWrite() error { return c.Shutdown(unix.SHUT_WR) } 112 | 113 | // Read reads directly from the underlying file descriptor. 114 | func (c *Conn) Read(b []byte) (int, error) { return c.fd.Read(b) } 115 | 116 | // ReadContext reads from the underlying file descriptor with added support for 117 | // context cancelation. 118 | func (c *Conn) ReadContext(ctx context.Context, b []byte) (int, error) { 119 | if c.facts.isStream && len(b) > maxRW { 120 | b = b[:maxRW] 121 | } 122 | 123 | n, err := readT(c, ctx, "read", func(fd int) (int, error) { 124 | return unix.Read(fd, b) 125 | }) 126 | if n == 0 && err == nil && c.facts.zeroReadIsEOF { 127 | return 0, io.EOF 128 | } 129 | 130 | return n, os.NewSyscallError("read", err) 131 | } 132 | 133 | // Write writes directly to the underlying file descriptor. 134 | func (c *Conn) Write(b []byte) (int, error) { return c.fd.Write(b) } 135 | 136 | // WriteContext writes to the underlying file descriptor with added support for 137 | // context cancelation. 138 | func (c *Conn) WriteContext(ctx context.Context, b []byte) (int, error) { 139 | var ( 140 | n, nn int 141 | err error 142 | ) 143 | 144 | doErr := c.write(ctx, "write", func(fd int) error { 145 | max := len(b) 146 | if c.facts.isStream && max-nn > maxRW { 147 | max = nn + maxRW 148 | } 149 | 150 | n, err = unix.Write(fd, b[nn:max]) 151 | if n > 0 { 152 | nn += n 153 | } 154 | if nn == len(b) { 155 | return err 156 | } 157 | if n == 0 && err == nil { 158 | err = io.ErrUnexpectedEOF 159 | return nil 160 | } 161 | 162 | return err 163 | }) 164 | if doErr != nil { 165 | return 0, doErr 166 | } 167 | 168 | return nn, os.NewSyscallError("write", err) 169 | } 170 | 171 | // SetDeadline sets both the read and write deadlines associated with the Conn. 172 | func (c *Conn) SetDeadline(t time.Time) error { return c.fd.SetDeadline(t) } 173 | 174 | // SetReadDeadline sets the read deadline associated with the Conn. 175 | func (c *Conn) SetReadDeadline(t time.Time) error { return c.fd.SetReadDeadline(t) } 176 | 177 | // SetWriteDeadline sets the write deadline associated with the Conn. 178 | func (c *Conn) SetWriteDeadline(t time.Time) error { return c.fd.SetWriteDeadline(t) } 179 | 180 | // ReadBuffer gets the size of the operating system's receive buffer associated 181 | // with the Conn. 182 | func (c *Conn) ReadBuffer() (int, error) { 183 | return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF) 184 | } 185 | 186 | // WriteBuffer gets the size of the operating system's transmit buffer 187 | // associated with the Conn. 188 | func (c *Conn) WriteBuffer() (int, error) { 189 | return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF) 190 | } 191 | 192 | // SetReadBuffer sets the size of the operating system's receive buffer 193 | // associated with the Conn. 194 | // 195 | // When called with elevated privileges on Linux, the SO_RCVBUFFORCE option will 196 | // be used to override operating system limits. Otherwise SO_RCVBUF is used 197 | // (which obeys operating system limits). 198 | func (c *Conn) SetReadBuffer(bytes int) error { return c.setReadBuffer(bytes) } 199 | 200 | // SetWriteBuffer sets the size of the operating system's transmit buffer 201 | // associated with the Conn. 202 | // 203 | // When called with elevated privileges on Linux, the SO_SNDBUFFORCE option will 204 | // be used to override operating system limits. Otherwise SO_SNDBUF is used 205 | // (which obeys operating system limits). 206 | func (c *Conn) SetWriteBuffer(bytes int) error { return c.setWriteBuffer(bytes) } 207 | 208 | // SyscallConn returns a raw network connection. This implements the 209 | // syscall.Conn interface. 210 | // 211 | // SyscallConn is intended for advanced use cases, such as getting and setting 212 | // arbitrary socket options using the socket's file descriptor. If possible, 213 | // those operations should be performed using methods on Conn instead. 214 | // 215 | // Once invoked, it is the caller's responsibility to ensure that operations 216 | // performed using Conn and the syscall.RawConn do not conflict with each other. 217 | func (c *Conn) SyscallConn() (syscall.RawConn, error) { 218 | if atomic.LoadUint32(&c.closed) != 0 { 219 | return nil, os.NewSyscallError("syscallconn", unix.EBADF) 220 | } 221 | 222 | // TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of 223 | // FD remaining valid for duration of calls? 224 | return c.rc, nil 225 | } 226 | 227 | // Socket wraps the socket(2) system call to produce a Conn. domain, typ, and 228 | // proto are passed directly to socket(2), and name should be a unique name for 229 | // the socket type such as "netlink" or "vsock". 230 | // 231 | // The cfg parameter specifies optional configuration for the Conn. If nil, no 232 | // additional configuration will be applied. 233 | // 234 | // If the operating system supports SOCK_CLOEXEC and SOCK_NONBLOCK, they are 235 | // automatically applied to typ to mirror the standard library's socket flag 236 | // behaviors. 237 | func Socket(domain, typ, proto int, name string, cfg *Config) (*Conn, error) { 238 | if cfg == nil { 239 | cfg = &Config{} 240 | } 241 | 242 | if cfg.NetNS == 0 { 243 | // Non-Linux or no network namespace. 244 | return socket(domain, typ, proto, name) 245 | } 246 | 247 | // Linux only: create Conn in the specified network namespace. 248 | return withNetNS(cfg.NetNS, func() (*Conn, error) { 249 | return socket(domain, typ, proto, name) 250 | }) 251 | } 252 | 253 | // socket is the internal, cross-platform entry point for socket(2). 254 | func socket(domain, typ, proto int, name string) (*Conn, error) { 255 | var ( 256 | fd int 257 | err error 258 | ) 259 | 260 | for { 261 | fd, err = unix.Socket(domain, typ|socketFlags, proto) 262 | switch { 263 | case err == nil: 264 | // Some OSes already set CLOEXEC with typ. 265 | if !flagCLOEXEC { 266 | unix.CloseOnExec(fd) 267 | } 268 | 269 | // No error, prepare the Conn. 270 | return New(fd, name) 271 | case !ready(err): 272 | // System call interrupted or not ready, try again. 273 | continue 274 | case err == unix.EINVAL, err == unix.EPROTONOSUPPORT: 275 | // On Linux, SOCK_NONBLOCK and SOCK_CLOEXEC were introduced in 276 | // 2.6.27. On FreeBSD, both flags were introduced in FreeBSD 10. 277 | // EINVAL and EPROTONOSUPPORT check for earlier versions of these 278 | // OSes respectively. 279 | // 280 | // Mirror what the standard library does when creating file 281 | // descriptors: avoid racing a fork/exec with the creation of new 282 | // file descriptors, so that child processes do not inherit socket 283 | // file descriptors unexpectedly. 284 | // 285 | // For a more thorough explanation, see similar work in the Go tree: 286 | // func sysSocket in net/sock_cloexec.go, as well as the detailed 287 | // comment in syscall/exec_unix.go. 288 | syscall.ForkLock.RLock() 289 | fd, err = unix.Socket(domain, typ, proto) 290 | if err != nil { 291 | syscall.ForkLock.RUnlock() 292 | return nil, os.NewSyscallError("socket", err) 293 | } 294 | unix.CloseOnExec(fd) 295 | syscall.ForkLock.RUnlock() 296 | 297 | return New(fd, name) 298 | default: 299 | // Unhandled error. 300 | return nil, os.NewSyscallError("socket", err) 301 | } 302 | } 303 | } 304 | 305 | // FileConn returns a copy of the network connection corresponding to the open 306 | // file. It is the caller's responsibility to close the file when finished. 307 | // Closing the Conn does not affect the File, and closing the File does not 308 | // affect the Conn. 309 | func FileConn(f *os.File, name string) (*Conn, error) { 310 | // First we'll try to do fctnl(2) with F_DUPFD_CLOEXEC because we can dup 311 | // the file descriptor and set the flag in one syscall. 312 | fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) 313 | switch err { 314 | case nil: 315 | // OK, ready to set up non-blocking I/O. 316 | return New(fd, name) 317 | case unix.EINVAL: 318 | // The kernel rejected our fcntl(2), fall back to separate dup(2) and 319 | // setting close on exec. 320 | // 321 | // Mirror what the standard library does when creating file descriptors: 322 | // avoid racing a fork/exec with the creation of new file descriptors, 323 | // so that child processes do not inherit socket file descriptors 324 | // unexpectedly. 325 | syscall.ForkLock.RLock() 326 | fd, err := unix.Dup(fd) 327 | if err != nil { 328 | syscall.ForkLock.RUnlock() 329 | return nil, os.NewSyscallError("dup", err) 330 | } 331 | unix.CloseOnExec(fd) 332 | syscall.ForkLock.RUnlock() 333 | 334 | return New(fd, name) 335 | default: 336 | // Any other errors. 337 | return nil, os.NewSyscallError("fcntl", err) 338 | } 339 | } 340 | 341 | // New wraps an existing file descriptor to create a Conn. name should be a 342 | // unique name for the socket type such as "netlink" or "vsock". 343 | // 344 | // Most callers should use Socket or FileConn to construct a Conn. New is 345 | // intended for integrating with specific system calls which provide a file 346 | // descriptor that supports asynchronous I/O. The file descriptor is immediately 347 | // set to nonblocking mode and registered with Go's runtime network poller for 348 | // future I/O operations. 349 | // 350 | // Unlike FileConn, New does not duplicate the existing file descriptor in any 351 | // way. The returned Conn takes ownership of the underlying file descriptor. 352 | func New(fd int, name string) (*Conn, error) { 353 | // All Conn I/O is nonblocking for integration with Go's runtime network 354 | // poller. Depending on the OS this might already be set but it can't hurt 355 | // to set it again. 356 | if err := unix.SetNonblock(fd, true); err != nil { 357 | return nil, os.NewSyscallError("setnonblock", err) 358 | } 359 | 360 | // os.NewFile registers the non-blocking file descriptor with the runtime 361 | // poller, which is then used for most subsequent operations except those 362 | // that require raw I/O via SyscallConn. 363 | // 364 | // See also: https://golang.org/pkg/os/#NewFile 365 | f := os.NewFile(uintptr(fd), name) 366 | rc, err := f.SyscallConn() 367 | if err != nil { 368 | return nil, err 369 | } 370 | 371 | c := &Conn{ 372 | name: name, 373 | fd: f, 374 | rc: rc, 375 | } 376 | 377 | // Probe the file descriptor for socket settings. 378 | sotype, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_TYPE) 379 | switch { 380 | case err == nil: 381 | // File is a socket, check its properties. 382 | c.facts = facts{ 383 | isStream: sotype == unix.SOCK_STREAM, 384 | zeroReadIsEOF: sotype != unix.SOCK_DGRAM && sotype != unix.SOCK_RAW, 385 | } 386 | case errors.Is(err, unix.ENOTSOCK): 387 | // File is not a socket, treat it as a regular file. 388 | c.facts = facts{ 389 | isStream: true, 390 | zeroReadIsEOF: true, 391 | } 392 | default: 393 | return nil, err 394 | } 395 | 396 | return c, nil 397 | } 398 | 399 | // Low-level methods which provide raw system call access. 400 | 401 | // Accept wraps accept(2) or accept4(2) depending on the operating system, but 402 | // returns a Conn for the accepted connection rather than a raw file descriptor. 403 | // 404 | // If the operating system supports accept4(2) (which allows flags), 405 | // SOCK_CLOEXEC and SOCK_NONBLOCK are automatically applied to flags to mirror 406 | // the standard library's socket flag behaviors. 407 | // 408 | // If the operating system only supports accept(2) (which does not allow flags) 409 | // and flags is not zero, an error will be returned. 410 | // 411 | // Accept obeys context cancelation and uses the deadline set on the context to 412 | // cancel accepting the next connection. If a deadline is set on ctx, this 413 | // deadline will override any previous deadlines set using SetDeadline or 414 | // SetReadDeadline. Upon return, the read deadline is cleared. 415 | func (c *Conn) Accept(ctx context.Context, flags int) (*Conn, unix.Sockaddr, error) { 416 | type ret struct { 417 | nfd int 418 | sa unix.Sockaddr 419 | } 420 | 421 | r, err := readT(c, ctx, sysAccept, func(fd int) (ret, error) { 422 | // Either accept(2) or accept4(2) depending on the OS. 423 | nfd, sa, err := accept(fd, flags|socketFlags) 424 | return ret{nfd, sa}, err 425 | }) 426 | if err != nil { 427 | // internal/poll, context error, or user function error. 428 | return nil, nil, err 429 | } 430 | 431 | // Successfully accepted a connection, wrap it in a Conn for use by the 432 | // caller. 433 | ac, err := New(r.nfd, c.name) 434 | if err != nil { 435 | return nil, nil, err 436 | } 437 | 438 | return ac, r.sa, nil 439 | } 440 | 441 | // Bind wraps bind(2). 442 | func (c *Conn) Bind(sa unix.Sockaddr) error { 443 | return c.control("bind", func(fd int) error { return unix.Bind(fd, sa) }) 444 | } 445 | 446 | // Connect wraps connect(2). In order to verify that the underlying socket is 447 | // connected to a remote peer, Connect calls getpeername(2) and returns the 448 | // unix.Sockaddr from that call. 449 | // 450 | // Connect obeys context cancelation and uses the deadline set on the context to 451 | // cancel connecting to a remote peer. If a deadline is set on ctx, this 452 | // deadline will override any previous deadlines set using SetDeadline or 453 | // SetWriteDeadline. Upon return, the write deadline is cleared. 454 | func (c *Conn) Connect(ctx context.Context, sa unix.Sockaddr) (unix.Sockaddr, error) { 455 | const op = "connect" 456 | 457 | // TODO(mdlayher): it would seem that trying to connect to unbound vsock 458 | // listeners by calling Connect multiple times results in ECONNRESET for the 459 | // first and nil error for subsequent calls. Do we need to memoize the 460 | // error? Check what the stdlib behavior is. 461 | 462 | var ( 463 | // Track progress between invocations of the write closure. We don't 464 | // have an explicit WaitWrite call like internal/poll does, so we have 465 | // to wait until the runtime calls the closure again to indicate we can 466 | // write. 467 | progress uint32 468 | 469 | // Capture closure sockaddr and error. 470 | rsa unix.Sockaddr 471 | err error 472 | ) 473 | 474 | doErr := c.write(ctx, op, func(fd int) error { 475 | if atomic.AddUint32(&progress, 1) == 1 { 476 | // First call: initiate connect. 477 | return unix.Connect(fd, sa) 478 | } 479 | 480 | // Subsequent calls: the runtime network poller indicates fd is 481 | // writable. Check for errno. 482 | errno, gerr := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_ERROR) 483 | if gerr != nil { 484 | return gerr 485 | } 486 | if errno != 0 { 487 | // Connection is still not ready or failed. If errno indicates 488 | // the socket is not ready, we will wait for the next write 489 | // event. Otherwise we propagate this errno back to the as a 490 | // permanent error. 491 | uerr := unix.Errno(errno) 492 | err = uerr 493 | return uerr 494 | } 495 | 496 | // According to internal/poll, it's possible for the runtime network 497 | // poller to spuriously wake us and return errno 0 for SO_ERROR. 498 | // Make sure we are actually connected to a peer. 499 | peer, err := c.Getpeername() 500 | if err != nil { 501 | // internal/poll unconditionally goes back to WaitWrite. 502 | // Synthesize an error that will do the same for us. 503 | return unix.EAGAIN 504 | } 505 | 506 | // Connection complete. 507 | rsa = peer 508 | return nil 509 | }) 510 | if doErr != nil { 511 | // internal/poll or context error. 512 | return nil, doErr 513 | } 514 | 515 | if err == unix.EISCONN { 516 | // TODO(mdlayher): is this block obsolete with the addition of the 517 | // getsockopt SO_ERROR check above? 518 | // 519 | // EISCONN is reported if the socket is already established and should 520 | // not be treated as an error. 521 | // - Darwin reports this for at least TCP sockets 522 | // - Linux reports this for at least AF_VSOCK sockets 523 | return rsa, nil 524 | } 525 | 526 | return rsa, os.NewSyscallError(op, err) 527 | } 528 | 529 | // Getsockname wraps getsockname(2). 530 | func (c *Conn) Getsockname() (unix.Sockaddr, error) { 531 | return controlT(c, "getsockname", unix.Getsockname) 532 | } 533 | 534 | // Getpeername wraps getpeername(2). 535 | func (c *Conn) Getpeername() (unix.Sockaddr, error) { 536 | return controlT(c, "getpeername", unix.Getpeername) 537 | } 538 | 539 | // GetsockoptICMPv6Filter wraps getsockopt(2) for *unix.ICMPv6Filter values. 540 | func (c *Conn) GetsockoptICMPv6Filter(level, opt int) (*unix.ICMPv6Filter, error) { 541 | return controlT(c, "getsockopt", func(fd int) (*unix.ICMPv6Filter, error) { 542 | return unix.GetsockoptICMPv6Filter(fd, level, opt) 543 | }) 544 | } 545 | 546 | // GetsockoptInt wraps getsockopt(2) for integer values. 547 | func (c *Conn) GetsockoptInt(level, opt int) (int, error) { 548 | return controlT(c, "getsockopt", func(fd int) (int, error) { 549 | return unix.GetsockoptInt(fd, level, opt) 550 | }) 551 | } 552 | 553 | // GetsockoptString wraps getsockopt(2) for string values. 554 | func (c *Conn) GetsockoptString(level, opt int) (string, error) { 555 | return controlT(c, "getsockopt", func(fd int) (string, error) { 556 | return unix.GetsockoptString(fd, level, opt) 557 | }) 558 | } 559 | 560 | // Listen wraps listen(2). 561 | func (c *Conn) Listen(n int) error { 562 | return c.control("listen", func(fd int) error { return unix.Listen(fd, n) }) 563 | } 564 | 565 | // Recvmsg wraps recvmsg(2). 566 | func (c *Conn) Recvmsg(ctx context.Context, p, oob []byte, flags int) (int, int, int, unix.Sockaddr, error) { 567 | type ret struct { 568 | n, oobn, recvflags int 569 | from unix.Sockaddr 570 | } 571 | 572 | r, err := readT(c, ctx, "recvmsg", func(fd int) (ret, error) { 573 | n, oobn, recvflags, from, err := unix.Recvmsg(fd, p, oob, flags) 574 | return ret{n, oobn, recvflags, from}, err 575 | }) 576 | if r.n == 0 && err == nil && c.facts.zeroReadIsEOF { 577 | return 0, 0, 0, nil, io.EOF 578 | } 579 | 580 | return r.n, r.oobn, r.recvflags, r.from, err 581 | } 582 | 583 | // Recvfrom wraps recvfrom(2). 584 | func (c *Conn) Recvfrom(ctx context.Context, p []byte, flags int) (int, unix.Sockaddr, error) { 585 | type ret struct { 586 | n int 587 | addr unix.Sockaddr 588 | } 589 | 590 | out, err := readT(c, ctx, "recvfrom", func(fd int) (ret, error) { 591 | n, addr, err := unix.Recvfrom(fd, p, flags) 592 | return ret{n, addr}, err 593 | }) 594 | if out.n == 0 && err == nil && c.facts.zeroReadIsEOF { 595 | return 0, nil, io.EOF 596 | } 597 | 598 | return out.n, out.addr, err 599 | } 600 | 601 | // Sendmsg wraps sendmsg(2). 602 | func (c *Conn) Sendmsg(ctx context.Context, p, oob []byte, to unix.Sockaddr, flags int) (int, error) { 603 | return writeT(c, ctx, "sendmsg", func(fd int) (int, error) { 604 | return unix.SendmsgN(fd, p, oob, to, flags) 605 | }) 606 | } 607 | 608 | // Sendto wraps sendto(2). 609 | func (c *Conn) Sendto(ctx context.Context, p []byte, flags int, to unix.Sockaddr) error { 610 | return c.write(ctx, "sendto", func(fd int) error { 611 | return unix.Sendto(fd, p, flags, to) 612 | }) 613 | } 614 | 615 | // SetsockoptICMPv6Filter wraps setsockopt(2) for *unix.ICMPv6Filter values. 616 | func (c *Conn) SetsockoptICMPv6Filter(level, opt int, filter *unix.ICMPv6Filter) error { 617 | return c.control("setsockopt", func(fd int) error { 618 | return unix.SetsockoptICMPv6Filter(fd, level, opt, filter) 619 | }) 620 | } 621 | 622 | // SetsockoptInt wraps setsockopt(2) for integer values. 623 | func (c *Conn) SetsockoptInt(level, opt, value int) error { 624 | return c.control("setsockopt", func(fd int) error { 625 | return unix.SetsockoptInt(fd, level, opt, value) 626 | }) 627 | } 628 | 629 | // SetsockoptString wraps setsockopt(2) for string values. 630 | func (c *Conn) SetsockoptString(level, opt int, value string) error { 631 | return c.control("setsockopt", func(fd int) error { 632 | return unix.SetsockoptString(fd, level, opt, value) 633 | }) 634 | } 635 | 636 | // Shutdown wraps shutdown(2). 637 | func (c *Conn) Shutdown(how int) error { 638 | return c.control("shutdown", func(fd int) error { return unix.Shutdown(fd, how) }) 639 | } 640 | 641 | // Conn low-level read/write/control functions. These functions mirror the 642 | // syscall.RawConn APIs but the input closures return errors rather than 643 | // booleans. 644 | 645 | // read wraps readT to execute a function and capture its error result. This is 646 | // a convenience wrapper for functions which don't return any extra values. 647 | func (c *Conn) read(ctx context.Context, op string, f func(fd int) error) error { 648 | _, err := readT(c, ctx, op, func(fd int) (struct{}, error) { 649 | return struct{}{}, f(fd) 650 | }) 651 | return err 652 | } 653 | 654 | // write executes f, a write function, against the associated file descriptor. 655 | // op is used to create an *os.SyscallError if the file descriptor is closed. 656 | func (c *Conn) write(ctx context.Context, op string, f func(fd int) error) error { 657 | _, err := writeT(c, ctx, op, func(fd int) (struct{}, error) { 658 | return struct{}{}, f(fd) 659 | }) 660 | return err 661 | } 662 | 663 | // readT executes c.rc.Read for op using the input function, returning a newly 664 | // allocated result T. 665 | func readT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) { 666 | return rwT(c, rwContext[T]{ 667 | Context: ctx, 668 | Type: read, 669 | Op: op, 670 | Do: f, 671 | }) 672 | } 673 | 674 | // writeT executes c.rc.Write for op using the input function, returning a newly 675 | // allocated result T. 676 | func writeT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) { 677 | return rwT(c, rwContext[T]{ 678 | Context: ctx, 679 | Type: write, 680 | Op: op, 681 | Do: f, 682 | }) 683 | } 684 | 685 | // readWrite indicates if an operation intends to read or write. 686 | type readWrite bool 687 | 688 | // Possible readWrite values. 689 | const ( 690 | read readWrite = false 691 | write readWrite = true 692 | ) 693 | 694 | // An rwContext provides arguments to rwT. 695 | type rwContext[T any] struct { 696 | // The caller's context passed for cancelation. 697 | Context context.Context 698 | 699 | // The type of an operation: read or write. 700 | Type readWrite 701 | 702 | // The name of the operation used in errors. 703 | Op string 704 | 705 | // The actual function to perform. 706 | Do func(fd int) (T, error) 707 | } 708 | 709 | // rwT executes c.rc.Read or c.rc.Write (depending on the value of rw.Type) for 710 | // rw.Op using the input function, returning a newly allocated result T. 711 | // 712 | // It obeys context cancelation and the rw.Context must not be nil. 713 | func rwT[T any](c *Conn, rw rwContext[T]) (T, error) { 714 | if atomic.LoadUint32(&c.closed) != 0 { 715 | // If the file descriptor is already closed, do nothing. 716 | return *new(T), os.NewSyscallError(rw.Op, unix.EBADF) 717 | } 718 | 719 | if err := rw.Context.Err(); err != nil { 720 | // Early exit due to context cancel. 721 | return *new(T), os.NewSyscallError(rw.Op, err) 722 | } 723 | 724 | var ( 725 | // The read or write function used to access the runtime network poller. 726 | poll func(func(uintptr) bool) error 727 | 728 | // The read or write function used to set the matching deadline. 729 | deadline func(time.Time) error 730 | ) 731 | 732 | if rw.Type == write { 733 | poll = c.rc.Write 734 | deadline = c.SetWriteDeadline 735 | } else { 736 | poll = c.rc.Read 737 | deadline = c.SetReadDeadline 738 | } 739 | 740 | var ( 741 | // Whether or not the context carried a deadline we are actively using 742 | // for cancelation. 743 | setDeadline bool 744 | 745 | // Signals for the cancelation watcher goroutine. 746 | wg sync.WaitGroup 747 | doneC = make(chan struct{}) 748 | 749 | // Atomic: reports whether we have to disarm the deadline. 750 | needDisarm atomic.Bool 751 | ) 752 | 753 | // On cancel, clean up the watcher. 754 | defer func() { 755 | close(doneC) 756 | wg.Wait() 757 | }() 758 | 759 | if d, ok := rw.Context.Deadline(); ok { 760 | // The context has an explicit deadline. We will use it for cancelation 761 | // but disarm it after poll for the next call. 762 | if err := deadline(d); err != nil { 763 | return *new(T), err 764 | } 765 | setDeadline = true 766 | needDisarm.Store(true) 767 | } else { 768 | // The context does not have an explicit deadline. We have to watch for 769 | // cancelation so we can propagate that signal to immediately unblock 770 | // the runtime network poller. 771 | // 772 | // TODO(mdlayher): is it possible to detect a background context vs a 773 | // context with possible future cancel? 774 | wg.Add(1) 775 | go func() { 776 | defer wg.Done() 777 | 778 | select { 779 | case <-rw.Context.Done(): 780 | // Cancel the operation. Make the caller disarm after poll 781 | // returns. 782 | needDisarm.Store(true) 783 | _ = deadline(time.Unix(0, 1)) 784 | case <-doneC: 785 | // Nothing to do. 786 | } 787 | }() 788 | } 789 | 790 | var ( 791 | t T 792 | err error 793 | ) 794 | 795 | pollErr := poll(func(fd uintptr) bool { 796 | t, err = rw.Do(int(fd)) 797 | return ready(err) 798 | }) 799 | 800 | if needDisarm.Load() { 801 | _ = deadline(time.Time{}) 802 | } 803 | 804 | if pollErr != nil { 805 | if rw.Context.Err() != nil || (setDeadline && errors.Is(pollErr, os.ErrDeadlineExceeded)) { 806 | // The caller canceled the operation or we set a deadline internally 807 | // and it was reached. 808 | // 809 | // Unpack a plain context error. We wait for the context to be done 810 | // to synchronize state externally. Otherwise we have noticed I/O 811 | // timeout wakeups when we set a deadline but the context was not 812 | // yet marked done. 813 | <-rw.Context.Done() 814 | return *new(T), os.NewSyscallError(rw.Op, rw.Context.Err()) 815 | } 816 | 817 | // Error from syscall.RawConn methods. Conventionally the standard 818 | // library does not wrap internal/poll errors in os.NewSyscallError. 819 | return *new(T), pollErr 820 | } 821 | 822 | // Result from user function. 823 | return t, os.NewSyscallError(rw.Op, err) 824 | } 825 | 826 | // control executes Conn.control for op using the input function. 827 | func (c *Conn) control(op string, f func(fd int) error) error { 828 | _, err := controlT(c, op, func(fd int) (struct{}, error) { 829 | return struct{}{}, f(fd) 830 | }) 831 | return err 832 | } 833 | 834 | // controlT executes c.rc.Control for op using the input function, returning a 835 | // newly allocated result T. 836 | func controlT[T any](c *Conn, op string, f func(fd int) (T, error)) (T, error) { 837 | if atomic.LoadUint32(&c.closed) != 0 { 838 | // If the file descriptor is already closed, do nothing. 839 | return *new(T), os.NewSyscallError(op, unix.EBADF) 840 | } 841 | 842 | var ( 843 | t T 844 | err error 845 | ) 846 | 847 | doErr := c.rc.Control(func(fd uintptr) { 848 | // Repeatedly attempt the syscall(s) invoked by f until completion is 849 | // indicated by the return value of ready or the context is canceled. 850 | // 851 | // The last values for t and err are captured outside of the closure for 852 | // use when the loop breaks. 853 | for { 854 | t, err = f(int(fd)) 855 | if ready(err) { 856 | return 857 | } 858 | } 859 | }) 860 | if doErr != nil { 861 | // Error from syscall.RawConn methods. Conventionally the standard 862 | // library does not wrap internal/poll errors in os.NewSyscallError. 863 | return *new(T), doErr 864 | } 865 | 866 | // Result from user function. 867 | return t, os.NewSyscallError(op, err) 868 | } 869 | 870 | // ready indicates readiness based on the value of err. 871 | func ready(err error) bool { 872 | switch err { 873 | case unix.EAGAIN, unix.EINPROGRESS, unix.EINTR: 874 | // When a socket is in non-blocking mode, we might see a variety of errors: 875 | // - EAGAIN: most common case for a socket read not being ready 876 | // - EINPROGRESS: reported by some sockets when first calling connect 877 | // - EINTR: system call interrupted, more frequently occurs in Go 1.14+ 878 | // because goroutines can be asynchronously preempted 879 | // 880 | // Return false to let the poller wait for readiness. See the source code 881 | // for internal/poll.FD.RawRead for more details. 882 | return false 883 | default: 884 | // Ready regardless of whether there was an error or no error. 885 | return true 886 | } 887 | } 888 | 889 | // Darwin and FreeBSD can't read or write 2GB+ files at a time, 890 | // even on 64-bit systems. 891 | // The same is true of socket implementations on many systems. 892 | // See golang.org/issue/7812 and golang.org/issue/16266. 893 | // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned. 894 | const maxRW = 1 << 30 895 | --------------------------------------------------------------------------------