├── go.mod
├── typ_none.go
├── typ_cloexec_nonblock.go
├── accept4.go
├── netns_others.go
├── export_linux_test.go
├── setbuffer_others.go
├── accept.go
├── go.sum
├── .github
    └── workflows
    │   ├── test.yml
    │   └── static-analysis.yml
├── doc.go
├── setbuffer_linux.go
├── LICENSE.md
├── README.md
├── CHANGELOG.md
├── conn_linux.go
├── netns_linux.go
├── conn_linux_test.go
├── internal
    └── sockettest
    │   └── sockettest.go
├── conn_test.go
└── conn.go


/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/mdlayher/socket
 2 | 
 3 | go 1.20
 4 | 
 5 | require (
 6 | 	github.com/google/go-cmp v0.5.9
 7 | 	golang.org/x/net v0.14.0
 8 | 	golang.org/x/sync v0.3.0
 9 | 	golang.org/x/sys v0.11.0
10 | )
11 | 


--------------------------------------------------------------------------------
/typ_none.go:
--------------------------------------------------------------------------------
 1 | //go:build darwin
 2 | // +build darwin
 3 | 
 4 | package socket
 5 | 
 6 | const (
 7 | 	// These operating systems do not support CLOEXEC and NONBLOCK socket
 8 | 	// options.
 9 | 	flagCLOEXEC = false
10 | 	socketFlags = 0
11 | )
12 | 


--------------------------------------------------------------------------------
/typ_cloexec_nonblock.go:
--------------------------------------------------------------------------------
 1 | //go:build !darwin
 2 | // +build !darwin
 3 | 
 4 | package socket
 5 | 
 6 | import "golang.org/x/sys/unix"
 7 | 
 8 | const (
 9 | 	// These operating systems support CLOEXEC and NONBLOCK socket options.
10 | 	flagCLOEXEC = true
11 | 	socketFlags = unix.SOCK_CLOEXEC | unix.SOCK_NONBLOCK
12 | )
13 | 


--------------------------------------------------------------------------------
/accept4.go:
--------------------------------------------------------------------------------
 1 | //go:build dragonfly || freebsd || illumos || linux
 2 | // +build dragonfly freebsd illumos linux
 3 | 
 4 | package socket
 5 | 
 6 | import (
 7 | 	"golang.org/x/sys/unix"
 8 | )
 9 | 
10 | const sysAccept = "accept4"
11 | 
12 | // accept wraps accept4(2).
13 | func accept(fd, flags int) (int, unix.Sockaddr, error) {
14 | 	return unix.Accept4(fd, flags)
15 | }
16 | 


--------------------------------------------------------------------------------
/netns_others.go:
--------------------------------------------------------------------------------
 1 | //go:build !linux
 2 | // +build !linux
 3 | 
 4 | package socket
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"runtime"
 9 | )
10 | 
11 | // withNetNS returns an error on non-Linux systems.
12 | func withNetNS(_ int, _ func() (*Conn, error)) (*Conn, error) {
13 | 	return nil, fmt.Errorf("socket: Linux network namespace support is not available on %s", runtime.GOOS)
14 | }
15 | 


--------------------------------------------------------------------------------
/export_linux_test.go:
--------------------------------------------------------------------------------
 1 | //go:build linux
 2 | // +build linux
 3 | 
 4 | package socket
 5 | 
 6 | // A NetNS is an exported wrapper for netNS for tests.
 7 | type NetNS struct{ *netNS }
 8 | 
 9 | // ThreadNetNS is an exported wrapper for threadNetNS for tests.
10 | func ThreadNetNS() (*NetNS, error) {
11 | 	ns, err := threadNetNS()
12 | 	if err != nil {
13 | 		return nil, err
14 | 	}
15 | 
16 | 	return &NetNS{ns}, nil
17 | }
18 | 


--------------------------------------------------------------------------------
/setbuffer_others.go:
--------------------------------------------------------------------------------
 1 | //go:build !linux
 2 | // +build !linux
 3 | 
 4 | package socket
 5 | 
 6 | import "golang.org/x/sys/unix"
 7 | 
 8 | // setReadBuffer wraps the SO_RCVBUF setsockopt(2) option.
 9 | func (c *Conn) setReadBuffer(bytes int) error {
10 | 	return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF, bytes)
11 | }
12 | 
13 | // setWriteBuffer wraps the SO_SNDBUF setsockopt(2) option.
14 | func (c *Conn) setWriteBuffer(bytes int) error {
15 | 	return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF, bytes)
16 | }
17 | 


--------------------------------------------------------------------------------
/accept.go:
--------------------------------------------------------------------------------
 1 | //go:build !dragonfly && !freebsd && !illumos && !linux
 2 | // +build !dragonfly,!freebsd,!illumos,!linux
 3 | 
 4 | package socket
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"runtime"
 9 | 
10 | 	"golang.org/x/sys/unix"
11 | )
12 | 
13 | const sysAccept = "accept"
14 | 
15 | // accept wraps accept(2).
16 | func accept(fd, flags int) (int, unix.Sockaddr, error) {
17 | 	if flags != 0 {
18 | 		// These operating systems have no support for flags to accept(2).
19 | 		return 0, nil, fmt.Errorf("socket: Conn.Accept flags are ineffective on %s", runtime.GOOS)
20 | 	}
21 | 
22 | 	return unix.Accept(fd)
23 | }
24 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
2 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
3 | golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
4 | golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
5 | golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
6 | golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
7 | golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
8 | golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
9 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "*"
 7 |   pull_request:
 8 |     branches:
 9 |       - "*"
10 | 
11 | jobs:
12 |   build:
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         go-version: ["1.20", "1.21.0"]
17 |         os: [ubuntu-latest, macos-latest]
18 |     runs-on: ${{ matrix.os }}
19 | 
20 |     steps:
21 |       - name: Set up Go
22 |         uses: actions/setup-go@v3
23 |         with:
24 |           go-version: ${{ matrix.go-version }}
25 |         id: go
26 | 
27 |       - name: Check out code into the Go module directory
28 |         uses: actions/checkout@v3
29 | 
30 |       - name: Run tests
31 |         run: go test -race ./...
32 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | // Package socket provides a low-level network connection type which integrates
 2 | // with Go's runtime network poller to provide asynchronous I/O and deadline
 3 | // support.
 4 | //
 5 | // This package focuses on UNIX-like operating systems which make use of BSD
 6 | // sockets system call APIs. It is meant to be used as a foundation for the
 7 | // creation of operating system-specific socket packages, for socket families
 8 | // such as Linux's AF_NETLINK, AF_PACKET, or AF_VSOCK. This package should not
 9 | // be used directly in end user applications.
10 | //
11 | // Any use of package socket should be guarded by build tags, as one would also
12 | // use when importing the syscall or golang.org/x/sys packages.
13 | package socket
14 | 


--------------------------------------------------------------------------------
/setbuffer_linux.go:
--------------------------------------------------------------------------------
 1 | //go:build linux
 2 | // +build linux
 3 | 
 4 | package socket
 5 | 
 6 | import "golang.org/x/sys/unix"
 7 | 
 8 | // setReadBuffer wraps the SO_RCVBUF{,FORCE} setsockopt(2) options.
 9 | func (c *Conn) setReadBuffer(bytes int) error {
10 | 	err := c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, bytes)
11 | 	if err != nil {
12 | 		err = c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF, bytes)
13 | 	}
14 | 	return err
15 | }
16 | 
17 | // setWriteBuffer wraps the SO_SNDBUF{,FORCE} setsockopt(2) options.
18 | func (c *Conn) setWriteBuffer(bytes int) error {
19 | 	err := c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, bytes)
20 | 	if err != nil {
21 | 		err = c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF, bytes)
22 | 	}
23 | 	return err
24 | }
25 | 


--------------------------------------------------------------------------------
/.github/workflows/static-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: Static Analysis
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "*"
 7 |   pull_request:
 8 |     branches:
 9 |       - "*"
10 | 
11 | jobs:
12 |   build:
13 |     strategy:
14 |       matrix:
15 |         go-version: ["1.21.0"]
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |       - name: Set up Go
20 |         uses: actions/setup-go@v3
21 |         with:
22 |           go-version: ${{ matrix.go-version }}
23 |         id: go
24 | 
25 |       - name: Check out code into the Go module directory
26 |         uses: actions/checkout@v3
27 | 
28 |       - name: Install staticcheck
29 |         run: go install honnef.co/go/tools/cmd/staticcheck@latest
30 | 
31 |       - name: Print staticcheck version
32 |         run: staticcheck -version
33 | 
34 |       - name: Run staticcheck
35 |         run: staticcheck ./...
36 | 
37 |       - name: Run go vet
38 |         run: go vet ./...
39 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (C) 2021 Matt Layher
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # socket [![Test Status](https://github.com/mdlayher/socket/workflows/Test/badge.svg)](https://github.com/mdlayher/socket/actions) [![Go Reference](https://pkg.go.dev/badge/github.com/mdlayher/socket.svg)](https://pkg.go.dev/github.com/mdlayher/socket) [![Go Report Card](https://goreportcard.com/badge/github.com/mdlayher/socket)](https://goreportcard.com/report/github.com/mdlayher/socket)
 2 | 
 3 | Package `socket` provides a low-level network connection type which integrates
 4 | with Go's runtime network poller to provide asynchronous I/O and deadline
 5 | support. MIT Licensed.
 6 | 
 7 | This package focuses on UNIX-like operating systems which make use of BSD
 8 | sockets system call APIs. It is meant to be used as a foundation for the
 9 | creation of operating system-specific socket packages, for socket families such
10 | as Linux's `AF_NETLINK`, `AF_PACKET`, or `AF_VSOCK`. This package should not be
11 | used directly in end user applications.
12 | 
13 | Any use of package socket should be guarded by build tags, as one would also
14 | use when importing the `syscall` or `golang.org/x/sys` packages.
15 | 
16 | ## Stability
17 | 
18 | See the [CHANGELOG](./CHANGELOG.md) file for a description of changes between
19 | releases.
20 | 
21 | This package only supports the two most recent major versions of Go, mirroring
22 | Go's own release policy. Older versions of Go may lack critical features and bug
23 | fixes which are necessary for this package to function correctly.
24 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | ## v0.5.1
 4 | 
 5 | - [Improvement]: revert `go.mod` to Go 1.20 to [resolve an issue around Go
 6 |   module version upgrades](https://github.com/mdlayher/socket/issues/13).
 7 | 
 8 | ## v0.5.0
 9 | 
10 | **This is the first release of package socket that only supports Go 1.21+.
11 | Users on older versions of Go must use v0.4.1.**
12 | 
13 | - [Improvement]: drop support for older versions of Go.
14 | - [New API]: add `socket.Conn` wrappers for various `Getsockopt` and
15 |   `Setsockopt` system calls.
16 | 
17 | ## v0.4.1
18 | 
19 | - [Bug Fix] [commit](https://github.com/mdlayher/socket/commit/2a14ceef4da279de1f957c5761fffcc6c87bbd3b):
20 |   ensure `socket.Conn` can be used with non-socket file descriptors by handling
21 |   `ENOTSOCK` in the constructor.
22 | 
23 | ## v0.4.0
24 | 
25 | **This is the first release of package socket that only supports Go 1.18+.
26 | Users on older versions of Go must use v0.3.0.**
27 | 
28 | - [Improvement]: drop support for older versions of Go so we can begin using
29 |   modern versions of `x/sys` and other dependencies.
30 | 
31 | ## v0.3.0
32 | 
33 | **This is the last release of package socket that supports Go 1.17 and below.**
34 | 
35 | - [New API/API change] [PR](https://github.com/mdlayher/socket/pull/8):
36 |   numerous `socket.Conn` methods now support context cancelation. Future
37 |   releases will continue adding support as needed.
38 |   - New `ReadContext` and `WriteContext` methods.
39 |   - `Connect`, `Recvfrom`, `Recvmsg`, `Sendmsg`, and `Sendto` methods now accept
40 |     a context.
41 |   - `Sendto` parameter order was also fixed to match the underlying syscall.
42 | 
43 | ## v0.2.3
44 | 
45 | - [New API] [commit](https://github.com/mdlayher/socket/commit/a425d96e0f772c053164f8ce4c9c825380a98086):
46 |   `socket.Conn` has new `Pidfd*` methods for wrapping the `pidfd_*(2)` family of
47 |   system calls.
48 | 
49 | ## v0.2.2
50 | 
51 | - [New API] [commit](https://github.com/mdlayher/socket/commit/a2429f1dfe8ec2586df5a09f50ead865276cd027):
52 |   `socket.Conn` has new `IoctlKCM*` methods for wrapping `ioctl(2)` for `AF_KCM`
53 |   operations.
54 | 
55 | ## v0.2.1
56 | 
57 | - [New API] [commit](https://github.com/mdlayher/socket/commit/b18ddbe9caa0e34552b4409a3aa311cb460d2f99):
58 |   `socket.Conn` has a new `SetsockoptPacketMreq` method for wrapping
59 |   `setsockopt(2)` for `AF_PACKET` socket options.
60 | 
61 | ## v0.2.0
62 | 
63 | - [New API] [commit](https://github.com/mdlayher/socket/commit/6e912a68523c45e5fd899239f4b46c402dd856da):
64 |   `socket.FileConn` can be used to create a `socket.Conn` from an existing
65 |   `os.File`, which may be provided by systemd socket activation or another
66 |   external mechanism.
67 | - [API change] [commit](https://github.com/mdlayher/socket/commit/66d61f565188c23fe02b24099ddc856d538bf1a7):
68 |   `socket.Conn.Connect` now returns the `unix.Sockaddr` value provided by
69 |   `getpeername(2)`, since we have to invoke that system call anyway to verify
70 |   that a connection to a remote peer was successfully established.
71 | - [Bug Fix] [commit](https://github.com/mdlayher/socket/commit/b60b2dbe0ac3caff2338446a150083bde8c5c19c):
72 |   check the correct error from `unix.GetsockoptInt` in the `socket.Conn.Connect`
73 |   method. Thanks @vcabbage!
74 | 
75 | ## v0.1.2
76 | 
77 | - [Bug Fix]: `socket.Conn.Connect` now properly checks the `SO_ERROR` socket
78 |   option value after calling `connect(2)` to verify whether or not a connection
79 |   could successfully be established. This means that `Connect` should now report
80 |   an error for an `AF_INET` TCP connection refused or `AF_VSOCK` connection
81 |   reset by peer.
82 | - [New API]: add `socket.Conn.Getpeername` for use in `Connect`, but also for
83 |   use by external callers.
84 | 
85 | ## v0.1.1
86 | 
87 | - [New API]: `socket.Conn` now has `CloseRead`, `CloseWrite`, and `Shutdown`
88 |   methods.
89 | - [Improvement]: internal rework to more robustly handle various errors.
90 | 
91 | ## v0.1.0
92 | 
93 | - Initial unstable release. Most functionality has been developed and ported
94 | from package [`netlink`](https://github.com/mdlayher/netlink).
95 | 


--------------------------------------------------------------------------------
/conn_linux.go:
--------------------------------------------------------------------------------
  1 | //go:build linux
  2 | // +build linux
  3 | 
  4 | package socket
  5 | 
  6 | import (
  7 | 	"context"
  8 | 	"os"
  9 | 	"unsafe"
 10 | 
 11 | 	"golang.org/x/net/bpf"
 12 | 	"golang.org/x/sys/unix"
 13 | )
 14 | 
 15 | // IoctlKCMClone wraps ioctl(2) for unix.KCMClone values, but returns a Conn
 16 | // rather than a raw file descriptor.
 17 | func (c *Conn) IoctlKCMClone() (*Conn, error) {
 18 | 	info, err := controlT(c, "ioctl", unix.IoctlKCMClone)
 19 | 	if err != nil {
 20 | 		return nil, err
 21 | 	}
 22 | 
 23 | 	// Successful clone, wrap in a Conn for use by the caller.
 24 | 	return New(int(info.Fd), c.name)
 25 | }
 26 | 
 27 | // IoctlKCMAttach wraps ioctl(2) for unix.KCMAttach values.
 28 | func (c *Conn) IoctlKCMAttach(info unix.KCMAttach) error {
 29 | 	return c.control("ioctl", func(fd int) error {
 30 | 		return unix.IoctlKCMAttach(fd, info)
 31 | 	})
 32 | }
 33 | 
 34 | // IoctlKCMUnattach wraps ioctl(2) for unix.KCMUnattach values.
 35 | func (c *Conn) IoctlKCMUnattach(info unix.KCMUnattach) error {
 36 | 	return c.control("ioctl", func(fd int) error {
 37 | 		return unix.IoctlKCMUnattach(fd, info)
 38 | 	})
 39 | }
 40 | 
 41 | // PidfdGetfd wraps pidfd_getfd(2) for a Conn which wraps a pidfd, but returns a
 42 | // Conn rather than a raw file descriptor.
 43 | func (c *Conn) PidfdGetfd(targetFD, flags int) (*Conn, error) {
 44 | 	outFD, err := controlT(c, "pidfd_getfd", func(fd int) (int, error) {
 45 | 		return unix.PidfdGetfd(fd, targetFD, flags)
 46 | 	})
 47 | 	if err != nil {
 48 | 		return nil, err
 49 | 	}
 50 | 
 51 | 	// Successful getfd, wrap in a Conn for use by the caller.
 52 | 	return New(outFD, c.name)
 53 | }
 54 | 
 55 | // PidfdSendSignal wraps pidfd_send_signal(2) for a Conn which wraps a Linux
 56 | // pidfd.
 57 | func (c *Conn) PidfdSendSignal(sig unix.Signal, info *unix.Siginfo, flags int) error {
 58 | 	return c.control("pidfd_send_signal", func(fd int) error {
 59 | 		return unix.PidfdSendSignal(fd, sig, info, flags)
 60 | 	})
 61 | }
 62 | 
 63 | // SetBPF attaches an assembled BPF program to a Conn.
 64 | func (c *Conn) SetBPF(filter []bpf.RawInstruction) error {
 65 | 	// We can't point to the first instruction in the array if no instructions
 66 | 	// are present.
 67 | 	if len(filter) == 0 {
 68 | 		return os.NewSyscallError("setsockopt", unix.EINVAL)
 69 | 	}
 70 | 
 71 | 	prog := unix.SockFprog{
 72 | 		Len:    uint16(len(filter)),
 73 | 		Filter: (*unix.SockFilter)(unsafe.Pointer(&filter[0])),
 74 | 	}
 75 | 
 76 | 	return c.SetsockoptSockFprog(unix.SOL_SOCKET, unix.SO_ATTACH_FILTER, &prog)
 77 | }
 78 | 
 79 | // RemoveBPF removes a BPF filter from a Conn.
 80 | func (c *Conn) RemoveBPF() error {
 81 | 	// 0 argument is ignored.
 82 | 	return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_DETACH_FILTER, 0)
 83 | }
 84 | 
 85 | // SetsockoptPacketMreq wraps setsockopt(2) for unix.PacketMreq values.
 86 | func (c *Conn) SetsockoptPacketMreq(level, opt int, mreq *unix.PacketMreq) error {
 87 | 	return c.control("setsockopt", func(fd int) error {
 88 | 		return unix.SetsockoptPacketMreq(fd, level, opt, mreq)
 89 | 	})
 90 | }
 91 | 
 92 | // SetsockoptSockFprog wraps setsockopt(2) for unix.SockFprog values.
 93 | func (c *Conn) SetsockoptSockFprog(level, opt int, fprog *unix.SockFprog) error {
 94 | 	return c.control("setsockopt", func(fd int) error {
 95 | 		return unix.SetsockoptSockFprog(fd, level, opt, fprog)
 96 | 	})
 97 | }
 98 | 
 99 | // GetsockoptTpacketStats wraps getsockopt(2) for unix.TpacketStats values.
100 | func (c *Conn) GetsockoptTpacketStats(level, name int) (*unix.TpacketStats, error) {
101 | 	return controlT(c, "getsockopt", func(fd int) (*unix.TpacketStats, error) {
102 | 		return unix.GetsockoptTpacketStats(fd, level, name)
103 | 	})
104 | }
105 | 
106 | // GetsockoptTpacketStatsV3 wraps getsockopt(2) for unix.TpacketStatsV3 values.
107 | func (c *Conn) GetsockoptTpacketStatsV3(level, name int) (*unix.TpacketStatsV3, error) {
108 | 	return controlT(c, "getsockopt", func(fd int) (*unix.TpacketStatsV3, error) {
109 | 		return unix.GetsockoptTpacketStatsV3(fd, level, name)
110 | 	})
111 | }
112 | 
113 | // Waitid wraps waitid(2).
114 | func (c *Conn) Waitid(idType int, info *unix.Siginfo, options int, rusage *unix.Rusage) error {
115 | 	return c.read(context.Background(), "waitid", func(fd int) error {
116 | 		return unix.Waitid(idType, fd, info, options, rusage)
117 | 	})
118 | }
119 | 


--------------------------------------------------------------------------------
/netns_linux.go:
--------------------------------------------------------------------------------
  1 | //go:build linux
  2 | // +build linux
  3 | 
  4 | package socket
  5 | 
  6 | import (
  7 | 	"errors"
  8 | 	"fmt"
  9 | 	"os"
 10 | 	"runtime"
 11 | 
 12 | 	"golang.org/x/sync/errgroup"
 13 | 	"golang.org/x/sys/unix"
 14 | )
 15 | 
 16 | // errNetNSDisabled is returned when network namespaces are unavailable on
 17 | // a given system.
 18 | var errNetNSDisabled = errors.New("socket: Linux network namespaces are not enabled on this system")
 19 | 
 20 | // withNetNS invokes fn within the context of the network namespace specified by
 21 | // fd, while also managing the logic required to safely do so by manipulating
 22 | // thread-local state.
 23 | func withNetNS(fd int, fn func() (*Conn, error)) (*Conn, error) {
 24 | 	var (
 25 | 		eg   errgroup.Group
 26 | 		conn *Conn
 27 | 	)
 28 | 
 29 | 	eg.Go(func() error {
 30 | 		// Retrieve and store the calling OS thread's network namespace so the
 31 | 		// thread can be reassigned to it after creating a socket in another network
 32 | 		// namespace.
 33 | 		runtime.LockOSThread()
 34 | 
 35 | 		ns, err := threadNetNS()
 36 | 		if err != nil {
 37 | 			// No thread-local manipulation, unlock.
 38 | 			runtime.UnlockOSThread()
 39 | 			return err
 40 | 		}
 41 | 		defer ns.Close()
 42 | 
 43 | 		// Beyond this point, the thread's network namespace is poisoned. Do not
 44 | 		// unlock the OS thread until all network namespace manipulation completes
 45 | 		// to avoid returning to the caller with altered thread-local state.
 46 | 
 47 | 		// Assign the current OS thread the goroutine is locked to to the given
 48 | 		// network namespace.
 49 | 		if err := ns.Set(fd); err != nil {
 50 | 			return err
 51 | 		}
 52 | 
 53 | 		// Attempt Conn creation and unconditionally restore the original namespace.
 54 | 		c, err := fn()
 55 | 		if nerr := ns.Restore(); nerr != nil {
 56 | 			// Failed to restore original namespace. Return an error and allow the
 57 | 			// runtime to terminate the thread.
 58 | 			if err == nil {
 59 | 				_ = c.Close()
 60 | 			}
 61 | 
 62 | 			return nerr
 63 | 		}
 64 | 
 65 | 		// No more thread-local state manipulation; return the new Conn.
 66 | 		runtime.UnlockOSThread()
 67 | 		conn = c
 68 | 		return err
 69 | 	})
 70 | 
 71 | 	if err := eg.Wait(); err != nil {
 72 | 		return nil, err
 73 | 	}
 74 | 
 75 | 	return conn, nil
 76 | }
 77 | 
 78 | // A netNS is a handle that can manipulate network namespaces.
 79 | //
 80 | // Operations performed on a netNS must use runtime.LockOSThread before
 81 | // manipulating any network namespaces.
 82 | type netNS struct {
 83 | 	// The handle to a network namespace.
 84 | 	f *os.File
 85 | 
 86 | 	// Indicates if network namespaces are disabled on this system, and thus
 87 | 	// operations should become a no-op or return errors.
 88 | 	disabled bool
 89 | }
 90 | 
 91 | // threadNetNS constructs a netNS using the network namespace of the calling
 92 | // thread. If the namespace is not the default namespace, runtime.LockOSThread
 93 | // should be invoked first.
 94 | func threadNetNS() (*netNS, error) {
 95 | 	return fileNetNS(fmt.Sprintf("/proc/self/task/%d/ns/net", unix.Gettid()))
 96 | }
 97 | 
 98 | // fileNetNS opens file and creates a netNS. fileNetNS should only be called
 99 | // directly in tests.
100 | func fileNetNS(file string) (*netNS, error) {
101 | 	f, err := os.Open(file)
102 | 	switch {
103 | 	case err == nil:
104 | 		return &netNS{f: f}, nil
105 | 	case os.IsNotExist(err):
106 | 		// Network namespaces are not enabled on this system. Use this signal
107 | 		// to return errors elsewhere if the caller explicitly asks for a
108 | 		// network namespace to be set.
109 | 		return &netNS{disabled: true}, nil
110 | 	default:
111 | 		return nil, err
112 | 	}
113 | }
114 | 
115 | // Close releases the handle to a network namespace.
116 | func (n *netNS) Close() error {
117 | 	return n.do(func() error { return n.f.Close() })
118 | }
119 | 
120 | // FD returns a file descriptor which represents the network namespace.
121 | func (n *netNS) FD() int {
122 | 	if n.disabled {
123 | 		// No reasonable file descriptor value in this case, so specify a
124 | 		// non-existent one.
125 | 		return -1
126 | 	}
127 | 
128 | 	return int(n.f.Fd())
129 | }
130 | 
131 | // Restore restores the original network namespace for the calling thread.
132 | func (n *netNS) Restore() error {
133 | 	return n.do(func() error { return n.Set(n.FD()) })
134 | }
135 | 
136 | // Set sets a new network namespace for the current thread using fd.
137 | func (n *netNS) Set(fd int) error {
138 | 	return n.do(func() error {
139 | 		return os.NewSyscallError("setns", unix.Setns(fd, unix.CLONE_NEWNET))
140 | 	})
141 | }
142 | 
143 | // do runs fn if network namespaces are enabled on this system.
144 | func (n *netNS) do(fn func() error) error {
145 | 	if n.disabled {
146 | 		return errNetNSDisabled
147 | 	}
148 | 
149 | 	return fn()
150 | }
151 | 


--------------------------------------------------------------------------------
/conn_linux_test.go:
--------------------------------------------------------------------------------
  1 | //go:build linux
  2 | // +build linux
  3 | 
  4 | package socket_test
  5 | 
  6 | import (
  7 | 	"context"
  8 | 	"errors"
  9 | 	"fmt"
 10 | 	"math"
 11 | 	"net"
 12 | 	"os"
 13 | 	"runtime"
 14 | 	"testing"
 15 | 
 16 | 	"github.com/google/go-cmp/cmp"
 17 | 	"github.com/mdlayher/socket"
 18 | 	"github.com/mdlayher/socket/internal/sockettest"
 19 | 	"golang.org/x/sync/errgroup"
 20 | 	"golang.org/x/sys/unix"
 21 | )
 22 | 
 23 | func TestLinuxConnBuffers(t *testing.T) {
 24 | 	t.Parallel()
 25 | 
 26 | 	// This test isn't necessarily Linux-specific but it's easiest to verify on
 27 | 	// Linux because we can rely on the kernel's documented buffer size
 28 | 	// manipulation behavior.
 29 | 	c, err := socket.Socket(unix.AF_INET, unix.SOCK_STREAM, 0, "tcpv4", nil)
 30 | 	if err != nil {
 31 | 		t.Fatalf("failed to open socket: %v", err)
 32 | 	}
 33 | 	defer c.Close()
 34 | 
 35 | 	const (
 36 | 		set = 8192
 37 | 
 38 | 		// Per socket(7):
 39 | 		//
 40 | 		// "The kernel doubles this value (to allow space for
 41 | 		// book‐keeping overhead) when it is set using setsockopt(2),
 42 | 		// and this doubled value is returned by getsockopt(2).""
 43 | 		want = set * 2
 44 | 	)
 45 | 
 46 | 	if err := c.SetReadBuffer(set); err != nil {
 47 | 		t.Fatalf("failed to set read buffer size: %v", err)
 48 | 	}
 49 | 
 50 | 	if err := c.SetWriteBuffer(set); err != nil {
 51 | 		t.Fatalf("failed to set write buffer size: %v", err)
 52 | 	}
 53 | 
 54 | 	// Now that we've set the buffers, we can check the size by asking the
 55 | 	// kernel using SyscallConn and getsockopt.
 56 | 
 57 | 	rcv, err := c.ReadBuffer()
 58 | 	if err != nil {
 59 | 		t.Fatalf("failed to get read buffer size: %v", err)
 60 | 	}
 61 | 
 62 | 	snd, err := c.WriteBuffer()
 63 | 	if err != nil {
 64 | 		t.Fatalf("failed to get write buffer size: %v", err)
 65 | 	}
 66 | 
 67 | 	if diff := cmp.Diff(want, rcv); diff != "" {
 68 | 		t.Fatalf("unexpected read buffer size (-want +got):\n%s", diff)
 69 | 	}
 70 | 	if diff := cmp.Diff(want, snd); diff != "" {
 71 | 		t.Fatalf("unexpected write buffer size (-want +got):\n%s", diff)
 72 | 	}
 73 | }
 74 | 
 75 | func TestLinuxNetworkNamespaces(t *testing.T) {
 76 | 	t.Parallel()
 77 | 
 78 | 	l, err := sockettest.Listen(0, nil)
 79 | 	if err != nil {
 80 | 		t.Fatalf("failed to create listener: %v", err)
 81 | 	}
 82 | 	defer l.Close()
 83 | 
 84 | 	addrC := make(chan net.Addr, 1)
 85 | 
 86 | 	var eg errgroup.Group
 87 | 	eg.Go(func() error {
 88 | 		// We are poisoning this thread by creating a new anonymous network
 89 | 		// namespace. Do not unlock the OS thread so that the runtime will kill
 90 | 		// this thread when the goroutine exits.
 91 | 		runtime.LockOSThread()
 92 | 
 93 | 		if err := unix.Unshare(unix.CLONE_NEWNET); err != nil {
 94 | 			// Explicit wrap to check for permission denied.
 95 | 			return fmt.Errorf("failed to unshare network namespace: %w", err)
 96 | 		}
 97 | 
 98 | 		ns, err := socket.ThreadNetNS()
 99 | 		if err != nil {
100 | 			return fmt.Errorf("failed to get listener thread's network namespace: %v", err)
101 | 		}
102 | 
103 | 		// This OS thread has been moved to a different network namespace and
104 | 		// thus we should also be able to start a listener on the same port.
105 | 		l, err := sockettest.Listen(
106 | 			l.Addr().(*net.TCPAddr).Port,
107 | 			&socket.Config{NetNS: ns.FD()},
108 | 		)
109 | 		if err != nil {
110 | 			return fmt.Errorf("failed to create listener in network namespace: %v", err)
111 | 		}
112 | 		defer l.Close()
113 | 
114 | 		addrC <- l.Addr()
115 | 		return nil
116 | 	})
117 | 
118 | 	if err := eg.Wait(); err != nil {
119 | 		if errors.Is(err, os.ErrPermission) {
120 | 			t.Skipf("skipping, permission denied: %v", err)
121 | 		}
122 | 
123 | 		t.Fatalf("failed to run listener thread: %v", err)
124 | 	}
125 | 
126 | 	select {
127 | 	case addr := <-addrC:
128 | 		if diff := cmp.Diff(l.Addr(), addr); diff != "" {
129 | 			t.Fatalf("unexpected network address (-want +got):\n%s", diff)
130 | 		}
131 | 	default:
132 | 		t.Fatal("listener thread did not return its local address")
133 | 	}
134 | }
135 | 
136 | func TestLinuxDialVsockNoListener(t *testing.T) {
137 | 	t.Parallel()
138 | 
139 | 	// See https://github.com/mdlayher/vsock/issues/47 and
140 | 	// https://github.com/lxc/lxd/pull/9894 for context on this test.
141 | 	c, err := socket.Socket(unix.AF_VSOCK, unix.SOCK_STREAM, 0, "vsock", nil)
142 | 	if err != nil {
143 | 		t.Fatalf("failed to open socket: %v", err)
144 | 	}
145 | 	defer c.Close()
146 | 
147 | 	// Given a (hopefully) non-existent listener on localhost, expect
148 | 	// ECONNRESET.
149 | 	_, err = c.Connect(context.Background(), &unix.SockaddrVM{
150 | 		CID:  unix.VMADDR_CID_LOCAL,
151 | 		Port: math.MaxUint32,
152 | 	})
153 | 	if err == nil {
154 | 		// See https://github.com/mdlayher/socket/issues/4.
155 | 		t.Skipf("skipping, expected error but vsock successfully connected to local service")
156 | 	}
157 | 
158 | 	want := os.NewSyscallError("connect", unix.ECONNRESET)
159 | 	if diff := cmp.Diff(want, err); diff != "" {
160 | 		t.Fatalf("unexpected connect error (-want +got):\n%s", diff)
161 | 	}
162 | }
163 | 
164 | func TestLinuxOpenPIDFD(t *testing.T) {
165 | 	// Verify we can use regular files with socket by properly handling
166 | 	// ENOTSOCK, as is the case with pidfds.
167 | 	fd, err := unix.PidfdOpen(1, unix.PIDFD_NONBLOCK)
168 | 	if err != nil {
169 | 		t.Fatalf("failed to open pidfd for init: %v", err)
170 | 	}
171 | 
172 | 	c, err := socket.New(fd, "pidfd")
173 | 	if err != nil {
174 | 		t.Fatalf("failed to open Conn for pidfd: %v", err)
175 | 	}
176 | 	_ = c.Close()
177 | }
178 | 
179 | func TestLinuxBindToDevice(t *testing.T) {
180 | 	t.Parallel()
181 | 
182 | 	c, err := socket.Socket(unix.AF_INET, unix.SOCK_STREAM, 0, "tcpv4", nil)
183 | 	if err != nil {
184 | 		t.Fatalf("failed to open socket: %v", err)
185 | 	}
186 | 	defer c.Close()
187 | 
188 | 	// Assumes the loopback interface is always the first device on Linux
189 | 	// machines.
190 | 	const (
191 | 		name  = "lo"
192 | 		index = 1
193 | 	)
194 | 
195 | 	if err := c.SetsockoptString(unix.SOL_SOCKET, unix.SO_BINDTODEVICE, name); err != nil {
196 | 		t.Fatalf("failed to bind to device: %v", err)
197 | 	}
198 | 
199 | 	gotName, err := c.GetsockoptString(unix.SOL_SOCKET, unix.SO_BINDTODEVICE)
200 | 	if err != nil {
201 | 		t.Fatalf("failed to get bound interface name: %v", err)
202 | 	}
203 | 	if diff := cmp.Diff(name, gotName); diff != "" {
204 | 		t.Fatalf("unexpected interface name (-want +got):\n%s", diff)
205 | 	}
206 | 
207 | 	gotIndex, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_BINDTOIFINDEX)
208 | 	if err != nil {
209 | 		t.Fatalf("failed to get bound interface index: %v", err)
210 | 	}
211 | 	if diff := cmp.Diff(index, gotIndex); diff != "" {
212 | 		t.Fatalf("unexpected interface index (-want +got):\n%s", diff)
213 | 	}
214 | }
215 | 


--------------------------------------------------------------------------------
/internal/sockettest/sockettest.go:
--------------------------------------------------------------------------------
  1 | // Package sockettest implements net.Listener and net.Conn types based on
  2 | // *socket.Conn for use in the package's tests.
  3 | package sockettest
  4 | 
  5 | import (
  6 | 	"context"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"net"
 10 | 	"os"
 11 | 	"time"
 12 | 
 13 | 	"github.com/mdlayher/socket"
 14 | 	"golang.org/x/sys/unix"
 15 | )
 16 | 
 17 | // A Listener is a net.Listener which can be extended with context support.
 18 | type Listener struct {
 19 | 	addr *net.TCPAddr
 20 | 	c    *socket.Conn
 21 | 	ctx  context.Context
 22 | }
 23 | 
 24 | func (l *Listener) Context(ctx context.Context) *Listener {
 25 | 	l.ctx = ctx
 26 | 	return l
 27 | }
 28 | 
 29 | // Listen creates an IPv6 TCP net.Listener backed by a *socket.Conn on the
 30 | // specified port with optional configuration. Context ctx will be passed
 31 | // to accept and accepted connections.
 32 | func Listen(port int, cfg *socket.Config) (*Listener, error) {
 33 | 	c, err := socket.Socket(unix.AF_INET6, unix.SOCK_STREAM, 0, "tcpv6-server", cfg)
 34 | 	if err != nil {
 35 | 		return nil, fmt.Errorf("failed to open socket: %v", err)
 36 | 	}
 37 | 
 38 | 	// Be sure to close the Conn if any of the system calls fail before we
 39 | 	// return the Conn to the caller.
 40 | 
 41 | 	if err := c.Bind(&unix.SockaddrInet6{Port: port}); err != nil {
 42 | 		_ = c.Close()
 43 | 		return nil, fmt.Errorf("failed to bind: %v", err)
 44 | 	}
 45 | 
 46 | 	if err := c.Listen(unix.SOMAXCONN); err != nil {
 47 | 		_ = c.Close()
 48 | 		return nil, fmt.Errorf("failed to listen: %v", err)
 49 | 	}
 50 | 
 51 | 	sa, err := c.Getsockname()
 52 | 	if err != nil {
 53 | 		_ = c.Close()
 54 | 		return nil, fmt.Errorf("failed to getsockname: %v", err)
 55 | 	}
 56 | 
 57 | 	return &Listener{
 58 | 		addr: newTCPAddr(sa),
 59 | 		c:    c,
 60 | 	}, nil
 61 | }
 62 | 
 63 | // FileListener creates an IPv6 TCP net.Listener backed by a *socket.Conn from
 64 | // the input file.
 65 | func FileListener(f *os.File) (*Listener, error) {
 66 | 	c, err := socket.FileConn(f, "tcpv6-server")
 67 | 	if err != nil {
 68 | 		return nil, fmt.Errorf("failed to open file conn: %v", err)
 69 | 	}
 70 | 
 71 | 	sa, err := c.Getsockname()
 72 | 	if err != nil {
 73 | 		_ = c.Close()
 74 | 		return nil, fmt.Errorf("failed to getsockname: %v", err)
 75 | 	}
 76 | 
 77 | 	return &Listener{
 78 | 		addr: newTCPAddr(sa),
 79 | 		c:    c,
 80 | 	}, nil
 81 | }
 82 | 
 83 | func (l *Listener) Addr() net.Addr { return l.addr }
 84 | func (l *Listener) Close() error   { return l.c.Close() }
 85 | func (l *Listener) Accept() (net.Conn, error) {
 86 | 	ctx := context.Background()
 87 | 	if l.ctx != nil {
 88 | 		ctx = l.ctx
 89 | 	}
 90 | 
 91 | 	// SOCK_CLOEXEC and SOCK_NONBLOCK set automatically by Accept when possible.
 92 | 	conn, rsa, err := l.c.Accept(ctx, 0)
 93 | 	if err != nil {
 94 | 		return nil, err
 95 | 	}
 96 | 
 97 | 	lsa, err := conn.Getsockname()
 98 | 	if err != nil {
 99 | 		// Don't leak the Conn if the system call fails.
100 | 		_ = conn.Close()
101 | 		return nil, err
102 | 	}
103 | 
104 | 	c := &Conn{
105 | 		Conn:   conn,
106 | 		local:  newTCPAddr(lsa),
107 | 		remote: newTCPAddr(rsa),
108 | 	}
109 | 
110 | 	if l.ctx != nil {
111 | 		return c.Context(l.ctx), nil
112 | 	}
113 | 
114 | 	return c, nil
115 | }
116 | 
117 | // A Conn is a net.Conn which can be extended with context support.
118 | type Conn struct {
119 | 	Conn          *socket.Conn
120 | 	local, remote *net.TCPAddr
121 | 	ctx           context.Context
122 | }
123 | 
124 | func (c *Conn) Context(ctx context.Context) *Conn {
125 | 	c.ctx = ctx
126 | 	return c
127 | }
128 | 
129 | // Dial creates an IPv4 or IPv6 TCP net.Conn backed by a *socket.Conn with
130 | // optional configuration.
131 | func Dial(ctx context.Context, addr net.Addr, cfg *socket.Config) (*Conn, error) {
132 | 	ta, ok := addr.(*net.TCPAddr)
133 | 	if !ok {
134 | 		return nil, fmt.Errorf("expected *net.TCPAddr, but got: %T", addr)
135 | 	}
136 | 
137 | 	var (
138 | 		family int
139 | 		name   string
140 | 		sa     unix.Sockaddr
141 | 	)
142 | 
143 | 	if ta.IP.To16() != nil && ta.IP.To4() == nil {
144 | 		// IPv6.
145 | 		family = unix.AF_INET6
146 | 		name = "tcpv6-client"
147 | 
148 | 		var sa6 unix.SockaddrInet6
149 | 		copy(sa6.Addr[:], ta.IP)
150 | 		sa6.Port = ta.Port
151 | 
152 | 		sa = &sa6
153 | 	} else {
154 | 		// IPv4.
155 | 		family = unix.AF_INET
156 | 		name = "tcpv4-client"
157 | 
158 | 		var sa4 unix.SockaddrInet4
159 | 		copy(sa4.Addr[:], ta.IP.To4())
160 | 		sa4.Port = ta.Port
161 | 
162 | 		sa = &sa4
163 | 	}
164 | 
165 | 	c, err := socket.Socket(family, unix.SOCK_STREAM, 0, name, cfg)
166 | 	if err != nil {
167 | 		return nil, fmt.Errorf("failed to open socket: %v", err)
168 | 	}
169 | 
170 | 	// Be sure to close the Conn if any of the system calls fail before we
171 | 	// return the Conn to the caller.
172 | 
173 | 	rsa, err := c.Connect(ctx, sa)
174 | 	if err != nil {
175 | 		_ = c.Close()
176 | 		// Don't wrap, we want the raw error for tests.
177 | 		return nil, err
178 | 	}
179 | 
180 | 	lsa, err := c.Getsockname()
181 | 	if err != nil {
182 | 		_ = c.Close()
183 | 		return nil, err
184 | 	}
185 | 
186 | 	return &Conn{
187 | 		Conn:   c,
188 | 		local:  newTCPAddr(lsa),
189 | 		remote: newTCPAddr(rsa),
190 | 	}, nil
191 | }
192 | 
193 | func (c *Conn) Close() error                       { return c.Conn.Close() }
194 | func (c *Conn) CloseRead() error                   { return c.Conn.CloseRead() }
195 | func (c *Conn) CloseWrite() error                  { return c.Conn.CloseWrite() }
196 | func (c *Conn) LocalAddr() net.Addr                { return c.local }
197 | func (c *Conn) RemoteAddr() net.Addr               { return c.remote }
198 | func (c *Conn) SetDeadline(t time.Time) error      { return c.Conn.SetDeadline(t) }
199 | func (c *Conn) SetReadDeadline(t time.Time) error  { return c.Conn.SetReadDeadline(t) }
200 | func (c *Conn) SetWriteDeadline(t time.Time) error { return c.Conn.SetWriteDeadline(t) }
201 | 
202 | func (c *Conn) Read(b []byte) (int, error) {
203 | 	var (
204 | 		n   int
205 | 		err error
206 | 	)
207 | 
208 | 	if c.ctx != nil {
209 | 		n, err = c.Conn.ReadContext(c.ctx, b)
210 | 	} else {
211 | 		n, err = c.Conn.Read(b)
212 | 	}
213 | 
214 | 	return n, opError("read", err)
215 | }
216 | 
217 | func (c *Conn) Write(b []byte) (int, error) {
218 | 	var (
219 | 		n   int
220 | 		err error
221 | 	)
222 | 
223 | 	if c.ctx != nil {
224 | 		n, err = c.Conn.WriteContext(c.ctx, b)
225 | 	} else {
226 | 		n, err = c.Conn.Write(b)
227 | 	}
228 | 
229 | 	return n, opError("write", err)
230 | }
231 | 
232 | func opError(op string, err error) error {
233 | 	// This is still a bit simplistic but sufficient for nettest.TestConn.
234 | 	switch err {
235 | 	case nil:
236 | 		return nil
237 | 	case io.EOF:
238 | 		return io.EOF
239 | 	default:
240 | 		return &net.OpError{Op: op, Err: err}
241 | 	}
242 | }
243 | 
244 | func newTCPAddr(sa unix.Sockaddr) *net.TCPAddr {
245 | 	switch sa := sa.(type) {
246 | 	case *unix.SockaddrInet4:
247 | 		return &net.TCPAddr{
248 | 			IP:   sa.Addr[:],
249 | 			Port: sa.Port,
250 | 		}
251 | 	case *unix.SockaddrInet6:
252 | 		return &net.TCPAddr{
253 | 			IP:   sa.Addr[:],
254 | 			Port: sa.Port,
255 | 		}
256 | 	}
257 | 
258 | 	panic("unknown address family")
259 | }
260 | 


--------------------------------------------------------------------------------
/conn_test.go:
--------------------------------------------------------------------------------
  1 | package socket_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"math"
 10 | 	"net"
 11 | 	"net/netip"
 12 | 	"os"
 13 | 	"runtime"
 14 | 	"sync"
 15 | 	"testing"
 16 | 	"time"
 17 | 
 18 | 	"github.com/google/go-cmp/cmp"
 19 | 	"github.com/google/go-cmp/cmp/cmpopts"
 20 | 	"github.com/mdlayher/socket/internal/sockettest"
 21 | 	"golang.org/x/net/nettest"
 22 | 	"golang.org/x/sync/errgroup"
 23 | 	"golang.org/x/sys/unix"
 24 | )
 25 | 
 26 | func TestConn(t *testing.T) {
 27 | 	t.Parallel()
 28 | 
 29 | 	tests := []struct {
 30 | 		name string
 31 | 		pipe nettest.MakePipe
 32 | 	}{
 33 | 		// Standard library plumbing.
 34 | 		{
 35 | 			name: "basic",
 36 | 			pipe: makePipe(
 37 | 				func() (net.Listener, error) {
 38 | 					return sockettest.Listen(0, nil)
 39 | 				},
 40 | 				func(addr net.Addr) (net.Conn, error) {
 41 | 					return sockettest.Dial(context.Background(), addr, nil)
 42 | 				},
 43 | 			),
 44 | 		},
 45 | 		// Our own implementations which have context cancelation support.
 46 | 		{
 47 | 			name: "context",
 48 | 			pipe: makePipe(
 49 | 				func() (net.Listener, error) {
 50 | 					l, err := sockettest.Listen(0, nil)
 51 | 					if err != nil {
 52 | 						return nil, err
 53 | 					}
 54 | 
 55 | 					return l.Context(context.Background()), nil
 56 | 				},
 57 | 				func(addr net.Addr) (net.Conn, error) {
 58 | 					ctx := context.Background()
 59 | 
 60 | 					c, err := sockettest.Dial(ctx, addr, nil)
 61 | 					if err != nil {
 62 | 						return nil, err
 63 | 					}
 64 | 
 65 | 					return c.Context(ctx), nil
 66 | 				},
 67 | 			),
 68 | 		},
 69 | 	}
 70 | 
 71 | 	for _, tt := range tests {
 72 | 		tt := tt
 73 | 		t.Run(tt.name, func(t *testing.T) {
 74 | 			t.Parallel()
 75 | 
 76 | 			nettest.TestConn(t, tt.pipe)
 77 | 
 78 | 			// Our own extensions to TestConn.
 79 | 			t.Run("CloseReadWrite", func(t *testing.T) { timeoutWrapper(t, tt.pipe, testCloseReadWrite) })
 80 | 		})
 81 | 	}
 82 | }
 83 | 
 84 | func TestDialTCPNoListener(t *testing.T) {
 85 | 	t.Parallel()
 86 | 
 87 | 	// See https://github.com/mdlayher/vsock/issues/47 and
 88 | 	// https://github.com/lxc/lxd/pull/9894 for context on this test.
 89 | 	//
 90 | 	//
 91 | 	// Given a (hopefully) non-existent listener on localhost, expect
 92 | 	// ECONNREFUSED.
 93 | 	_, err := sockettest.Dial(context.Background(), &net.TCPAddr{
 94 | 		IP:   net.IPv6loopback,
 95 | 		Port: math.MaxUint16,
 96 | 	}, nil)
 97 | 
 98 | 	want := os.NewSyscallError("connect", unix.ECONNREFUSED)
 99 | 	if diff := cmp.Diff(want, err); diff != "" {
100 | 		t.Fatalf("unexpected connect error (-want +got):\n%s", diff)
101 | 	}
102 | }
103 | 
104 | func TestDialTCPContextCanceledBefore(t *testing.T) {
105 | 	t.Parallel()
106 | 
107 | 	// Context is canceled before any dialing can take place.
108 | 	ctx, cancel := context.WithCancel(context.Background())
109 | 	cancel()
110 | 
111 | 	_, err := sockettest.Dial(ctx, &net.TCPAddr{
112 | 		IP:   net.IPv6loopback,
113 | 		Port: math.MaxUint16,
114 | 	}, nil)
115 | 
116 | 	if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" {
117 | 		t.Fatalf("unexpected connect error (-want +got):\n%s", diff)
118 | 	}
119 | }
120 | 
121 | var ipTests = []struct {
122 | 	name string
123 | 	ip   netip.Addr
124 | }{
125 | 	// It appears we can dial addresses in the documentation range and
126 | 	// connect will hang, which is perfect for this test case.
127 | 	{
128 | 		name: "IPv4",
129 | 		ip:   netip.MustParseAddr("192.0.2.1"),
130 | 	},
131 | 	{
132 | 		name: "IPv6",
133 | 		ip:   netip.MustParseAddr("2001:db8::1"),
134 | 	},
135 | }
136 | 
137 | func TestDialTCPContextCanceledDuring(t *testing.T) {
138 | 	t.Parallel()
139 | 
140 | 	for _, tt := range ipTests {
141 | 		tt := tt
142 | 		t.Run(tt.name, func(t *testing.T) {
143 | 			t.Parallel()
144 | 
145 | 			// Context is canceled during a blocking operation but without an
146 | 			// explicit deadline passed on the context.
147 | 			ctx, cancel := context.WithCancel(context.Background())
148 | 			defer cancel()
149 | 
150 | 			go func() {
151 | 				time.Sleep(1 * time.Second)
152 | 				cancel()
153 | 			}()
154 | 
155 | 			_, err := sockettest.Dial(ctx, &net.TCPAddr{
156 | 				IP:   tt.ip.AsSlice(),
157 | 				Port: math.MaxUint16,
158 | 			}, nil)
159 | 			if errors.Is(err, unix.ENETUNREACH) || errors.Is(err, unix.EHOSTUNREACH) {
160 | 				t.Skipf("skipping, no outbound %s connectivity: %v", tt.name, err)
161 | 			}
162 | 
163 | 			if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" {
164 | 				t.Fatalf("unexpected connect error (-want +got):\n%s", diff)
165 | 			}
166 | 		})
167 | 	}
168 | }
169 | 
170 | func TestDialTCPContextDeadlineExceeded(t *testing.T) {
171 | 	t.Parallel()
172 | 
173 | 	for _, tt := range ipTests {
174 | 		tt := tt
175 | 		t.Run(tt.name, func(t *testing.T) {
176 | 			t.Parallel()
177 | 
178 | 			// Dialing is canceled after the deadline passes.
179 | 			ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
180 | 			defer cancel()
181 | 
182 | 			_, err := sockettest.Dial(ctx, &net.TCPAddr{
183 | 				IP:   tt.ip.AsSlice(),
184 | 				Port: math.MaxUint16,
185 | 			}, nil)
186 | 			if errors.Is(err, unix.ENETUNREACH) || errors.Is(err, unix.EHOSTUNREACH) {
187 | 				t.Skipf("skipping, no outbound %s connectivity: %v", tt.name, err)
188 | 			}
189 | 
190 | 			if diff := cmp.Diff(context.DeadlineExceeded, err, cmpopts.EquateErrors()); diff != "" {
191 | 				t.Fatalf("unexpected connect error (-want +got):\n%s", diff)
192 | 			}
193 | 		})
194 | 	}
195 | }
196 | 
197 | func TestListenerAcceptTCPContextCanceledBefore(t *testing.T) {
198 | 	t.Parallel()
199 | 
200 | 	l, err := sockettest.Listen(0, nil)
201 | 	if err != nil {
202 | 		t.Fatalf("failed to listen: %v", err)
203 | 	}
204 | 	defer l.Close()
205 | 
206 | 	// Context is canceled before accept can take place.
207 | 	ctx, cancel := context.WithCancel(context.Background())
208 | 	cancel()
209 | 
210 | 	_, err = l.Context(ctx).Accept()
211 | 	if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" {
212 | 		t.Fatalf("unexpected accept error (-want +got):\n%s", diff)
213 | 	}
214 | }
215 | 
216 | func TestListenerAcceptTCPContextCanceledDuring(t *testing.T) {
217 | 	t.Parallel()
218 | 
219 | 	l, err := sockettest.Listen(0, nil)
220 | 	if err != nil {
221 | 		t.Fatalf("failed to listen: %v", err)
222 | 	}
223 | 	defer l.Close()
224 | 
225 | 	// Context is canceled during a blocking operation but without an
226 | 	// explicit deadline passed on the context.
227 | 	ctx, cancel := context.WithCancel(context.Background())
228 | 	defer cancel()
229 | 
230 | 	go func() {
231 | 		time.Sleep(1 * time.Second)
232 | 		cancel()
233 | 	}()
234 | 
235 | 	_, err = l.Context(ctx).Accept()
236 | 	if diff := cmp.Diff(context.Canceled, err, cmpopts.EquateErrors()); diff != "" {
237 | 		t.Fatalf("unexpected accept error (-want +got):\n%s", diff)
238 | 	}
239 | }
240 | 
241 | func TestListenerAcceptTCPContextDeadlineExceeded(t *testing.T) {
242 | 	t.Parallel()
243 | 
244 | 	l, err := sockettest.Listen(0, nil)
245 | 	if err != nil {
246 | 		t.Fatalf("failed to listen: %v", err)
247 | 	}
248 | 	defer l.Close()
249 | 
250 | 	// Accept is canceled after the deadline passes.
251 | 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
252 | 	defer cancel()
253 | 
254 | 	_, err = l.Context(ctx).Accept()
255 | 	if diff := cmp.Diff(context.DeadlineExceeded, err, cmpopts.EquateErrors()); diff != "" {
256 | 		t.Fatalf("unexpected accept error (-want +got):\n%s", diff)
257 | 	}
258 | }
259 | 
260 | func TestListenerConnTCPContextCanceled(t *testing.T) {
261 | 	t.Parallel()
262 | 
263 | 	l, err := sockettest.Listen(0, nil)
264 | 	if err != nil {
265 | 		t.Fatalf("failed to open listener: %v", err)
266 | 	}
267 | 	defer l.Close()
268 | 
269 | 	// Accept a single connection.
270 | 	var eg errgroup.Group
271 | 	eg.Go(func() error {
272 | 		c, err := l.Accept()
273 | 		if err != nil {
274 | 			return fmt.Errorf("failed to accept: %v", err)
275 | 		}
276 | 		defer c.Close()
277 | 
278 | 		// Context is canceled during recvfrom.
279 | 		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
280 | 		defer cancel()
281 | 
282 | 		b := make([]byte, 1024)
283 | 		_, _, err = c.(*sockettest.Conn).Conn.Recvfrom(ctx, b, 0)
284 | 		return err
285 | 	})
286 | 
287 | 	c, err := net.Dial(l.Addr().Network(), l.Addr().String())
288 | 	if err != nil {
289 | 		t.Fatalf("failed to dial listener: %v", err)
290 | 	}
291 | 	defer c.Close()
292 | 
293 | 	// Client never sends data, so we wait until ctx cancel and errgroup return.
294 | 	if diff := cmp.Diff(context.DeadlineExceeded, eg.Wait(), cmpopts.EquateErrors()); diff != "" {
295 | 		t.Fatalf("unexpected recvfrom error (-want +got):\n%s", diff)
296 | 	}
297 | }
298 | 
299 | func TestListenerConnTCPContextDeadlineExceeded(t *testing.T) {
300 | 	t.Parallel()
301 | 
302 | 	l, err := sockettest.Listen(0, nil)
303 | 	if err != nil {
304 | 		t.Fatalf("failed to open listener: %v", err)
305 | 	}
306 | 	defer l.Close()
307 | 
308 | 	// Accept a single connection.
309 | 	var eg errgroup.Group
310 | 	eg.Go(func() error {
311 | 		c, err := l.Accept()
312 | 		if err != nil {
313 | 			return fmt.Errorf("failed to accept: %v", err)
314 | 		}
315 | 		defer c.Close()
316 | 
317 | 		// Context is canceled before recvfrom can take place.
318 | 		ctx, cancel := context.WithCancel(context.Background())
319 | 		cancel()
320 | 
321 | 		b := make([]byte, 1024)
322 | 		_, _, err = c.(*sockettest.Conn).Conn.Recvfrom(ctx, b, 0)
323 | 		return err
324 | 	})
325 | 
326 | 	c, err := net.Dial(l.Addr().Network(), l.Addr().String())
327 | 	if err != nil {
328 | 		t.Fatalf("failed to dial listener: %v", err)
329 | 	}
330 | 	defer c.Close()
331 | 
332 | 	// Client never sends data, so we wait until ctx cancel and errgroup return.
333 | 	if diff := cmp.Diff(context.Canceled, eg.Wait(), cmpopts.EquateErrors()); diff != "" {
334 | 		t.Fatalf("unexpected recvfrom error (-want +got):\n%s", diff)
335 | 	}
336 | }
337 | 
338 | func TestFileConn(t *testing.T) {
339 | 	t.Parallel()
340 | 
341 | 	// Use raw system calls to set up the socket since we assume anything being
342 | 	// passed into a FileConn is set up by another system, such as systemd's
343 | 	// socket activation.
344 | 	fd, err := unix.Socket(unix.AF_INET6, unix.SOCK_STREAM, 0)
345 | 	if err != nil {
346 | 		t.Fatalf("failed to open socket: %v", err)
347 | 	}
348 | 
349 | 	// Bind to loopback, any available port.
350 | 	sa := &unix.SockaddrInet6{Addr: [16]byte{15: 0x01}}
351 | 	if err := unix.Bind(fd, sa); err != nil {
352 | 		t.Fatalf("failed to bind: %v", err)
353 | 	}
354 | 
355 | 	if err := unix.Listen(fd, unix.SOMAXCONN); err != nil {
356 | 		t.Fatalf("failed to listen: %v", err)
357 | 	}
358 | 
359 | 	// The socket should be ready, create a blocking file which is ready to be
360 | 	// passed into FileConn via the FileListener helper.
361 | 	f := os.NewFile(uintptr(fd), "tcpv6-listener")
362 | 	defer f.Close()
363 | 
364 | 	l, err := sockettest.FileListener(f)
365 | 	if err != nil {
366 | 		t.Fatalf("failed to open file listener: %v", err)
367 | 	}
368 | 	defer l.Close()
369 | 
370 | 	// To exercise the listener, attempt to accept and then immediately close a
371 | 	// single TCPv6 connection. Dial to the listener from the main goroutine and
372 | 	// wait for everything to finish.
373 | 	var eg errgroup.Group
374 | 	eg.Go(func() error {
375 | 		c, err := l.Accept()
376 | 		if err != nil {
377 | 			return fmt.Errorf("failed to accept: %v", err)
378 | 		}
379 | 
380 | 		_ = c.Close()
381 | 		return nil
382 | 	})
383 | 
384 | 	c, err := net.Dial(l.Addr().Network(), l.Addr().String())
385 | 	if err != nil {
386 | 		t.Fatalf("failed to dial listener: %v", err)
387 | 	}
388 | 	_ = c.Close()
389 | 
390 | 	if err := eg.Wait(); err != nil {
391 | 		t.Fatalf("failed to wait for listener goroutine: %v", err)
392 | 	}
393 | }
394 | 
395 | // Use our TCP net.Listener and net.Conn implementations backed by *socket.Conn
396 | // and run compliance tests with nettest.TestConn.
397 | //
398 | // This nettest.MakePipe function is adapted from nettest's own tests:
399 | // https://github.com/golang/net/blob/master/nettest/conntest_test.go
400 | //
401 | // Copyright 2016 The Go Authors. All rights reserved. Use of this source
402 | // code is governed by a BSD-style license that can be found in the LICENSE
403 | // file.
404 | func makePipe(
405 | 	listen func() (net.Listener, error),
406 | 	dial func(addr net.Addr) (net.Conn, error),
407 | ) nettest.MakePipe {
408 | 	return func() (c1, c2 net.Conn, stop func(), err error) {
409 | 		ln, err := listen()
410 | 		if err != nil {
411 | 			return nil, nil, nil, err
412 | 		}
413 | 
414 | 		// Start a connection between two endpoints.
415 | 		var err1, err2 error
416 | 		done := make(chan bool)
417 | 		go func() {
418 | 			c2, err2 = ln.Accept()
419 | 			close(done)
420 | 		}()
421 | 		c1, err1 = dial(ln.Addr())
422 | 		<-done
423 | 
424 | 		stop = func() {
425 | 			if err1 == nil {
426 | 				c1.Close()
427 | 			}
428 | 			if err2 == nil {
429 | 				c2.Close()
430 | 			}
431 | 			ln.Close()
432 | 		}
433 | 
434 | 		switch {
435 | 		case err1 != nil:
436 | 			stop()
437 | 			return nil, nil, nil, err1
438 | 		case err2 != nil:
439 | 			stop()
440 | 			return nil, nil, nil, err2
441 | 		default:
442 | 			return c1, c2, stop, nil
443 | 		}
444 | 	}
445 | }
446 | 
447 | // Copied from x/net/nettest, pending acceptance of:
448 | // https://go-review.googlesource.com/c/net/+/372815
449 | type connTester func(t *testing.T, c1, c2 net.Conn)
450 | 
451 | func timeoutWrapper(t *testing.T, mp nettest.MakePipe, f connTester) {
452 | 	t.Helper()
453 | 	c1, c2, stop, err := mp()
454 | 	if err != nil {
455 | 		t.Fatalf("unable to make pipe: %v", err)
456 | 	}
457 | 	var once sync.Once
458 | 	defer once.Do(func() { stop() })
459 | 	timer := time.AfterFunc(time.Minute, func() {
460 | 		once.Do(func() {
461 | 			t.Error("test timed out; terminating pipe")
462 | 			stop()
463 | 		})
464 | 	})
465 | 	defer timer.Stop()
466 | 	f(t, c1, c2)
467 | }
468 | 
469 | // testCloseReadWrite tests that net.Conns which also implement the optional
470 | // CloseRead and CloseWrite methods can be half-closed correctly.
471 | func testCloseReadWrite(t *testing.T, c1, c2 net.Conn) {
472 | 	// TODO(mdlayher): investigate why Mac/Windows errors are so different.
473 | 	if runtime.GOOS != "linux" {
474 | 		t.Skip("skipping, not supported on non-Linux platforms")
475 | 	}
476 | 
477 | 	type closerConn interface {
478 | 		net.Conn
479 | 		CloseRead() error
480 | 		CloseWrite() error
481 | 	}
482 | 
483 | 	cc1, ok1 := c1.(closerConn)
484 | 	cc2, ok2 := c2.(closerConn)
485 | 	if !ok1 || !ok2 {
486 | 		// Both c1 and c2 must implement closerConn to proceed.
487 | 		return
488 | 	}
489 | 
490 | 	var wg sync.WaitGroup
491 | 	wg.Add(2)
492 | 	defer wg.Wait()
493 | 
494 | 	go func() {
495 | 		defer wg.Done()
496 | 
497 | 		// Writing succeeds at first but should result in a permanent "broken
498 | 		// pipe" error after closing the write side of the net.Conn.
499 | 		b := make([]byte, 64)
500 | 		if err := chunkedCopy(cc1, bytes.NewReader(b)); err != nil {
501 | 			t.Errorf("unexpected initial cc1.Write error: %v", err)
502 | 		}
503 | 		if err := cc1.CloseWrite(); err != nil {
504 | 			t.Errorf("unexpected cc1.CloseWrite error: %v", err)
505 | 		}
506 | 		_, err := cc1.Write(b)
507 | 		if nerr, ok := err.(net.Error); !ok || nerr.Timeout() {
508 | 			t.Errorf("unexpected final cc1.Write error: %v", err)
509 | 		}
510 | 	}()
511 | 
512 | 	go func() {
513 | 		defer wg.Done()
514 | 
515 | 		// Reading succeeds at first but should result in an EOF error after
516 | 		// closing the read side of the net.Conn.
517 | 		if err := chunkedCopy(io.Discard, cc2); err != nil {
518 | 			t.Errorf("unexpected initial cc2.Read error: %v", err)
519 | 		}
520 | 		if err := cc2.CloseRead(); err != nil {
521 | 			t.Errorf("unexpected cc2.CloseRead error: %v", err)
522 | 		}
523 | 		if _, err := cc2.Read(make([]byte, 64)); err != io.EOF {
524 | 			t.Errorf("unexpected final cc2.Read error: %v", err)
525 | 		}
526 | 	}()
527 | }
528 | 
529 | // chunkedCopy copies from r to w in fixed-width chunks to avoid
530 | // causing a Write that exceeds the maximum packet size for packet-based
531 | // connections like "unixpacket".
532 | // We assume that the maximum packet size is at least 1024.
533 | func chunkedCopy(w io.Writer, r io.Reader) error {
534 | 	b := make([]byte, 1024)
535 | 	_, err := io.CopyBuffer(struct{ io.Writer }{w}, struct{ io.Reader }{r}, b)
536 | 	return err
537 | }
538 | 


--------------------------------------------------------------------------------
/conn.go:
--------------------------------------------------------------------------------
  1 | package socket
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"io"
  7 | 	"os"
  8 | 	"sync"
  9 | 	"sync/atomic"
 10 | 	"syscall"
 11 | 	"time"
 12 | 
 13 | 	"golang.org/x/sys/unix"
 14 | )
 15 | 
 16 | // Lock in an expected public interface for convenience.
 17 | var _ interface {
 18 | 	io.ReadWriteCloser
 19 | 	syscall.Conn
 20 | 	SetDeadline(t time.Time) error
 21 | 	SetReadDeadline(t time.Time) error
 22 | 	SetWriteDeadline(t time.Time) error
 23 | } = &Conn{}
 24 | 
 25 | // A Conn is a low-level network connection which integrates with Go's runtime
 26 | // network poller to provide asynchronous I/O and deadline support.
 27 | //
 28 | // Many of a Conn's blocking methods support net.Conn deadlines as well as
 29 | // cancelation via context. Note that passing a context with a deadline set will
 30 | // override any of the previous deadlines set by calls to the SetDeadline family
 31 | // of methods.
 32 | type Conn struct {
 33 | 	// Indicates whether or not Conn.Close has been called. Must be accessed
 34 | 	// atomically. Atomics definitions must come first in the Conn struct.
 35 | 	closed uint32
 36 | 
 37 | 	// A unique name for the Conn which is also associated with derived file
 38 | 	// descriptors such as those created by accept(2).
 39 | 	name string
 40 | 
 41 | 	// facts contains information we have determined about Conn to trigger
 42 | 	// alternate behavior in certain functions.
 43 | 	facts facts
 44 | 
 45 | 	// Provides access to the underlying file registered with the runtime
 46 | 	// network poller, and arbitrary raw I/O calls.
 47 | 	fd *os.File
 48 | 	rc syscall.RawConn
 49 | }
 50 | 
 51 | // facts contains facts about a Conn.
 52 | type facts struct {
 53 | 	// isStream reports whether this is a streaming descriptor, as opposed to a
 54 | 	// packet-based descriptor like a UDP socket.
 55 | 	isStream bool
 56 | 
 57 | 	// zeroReadIsEOF reports Whether a zero byte read indicates EOF. This is
 58 | 	// false for a message based socket connection.
 59 | 	zeroReadIsEOF bool
 60 | }
 61 | 
 62 | // A Config contains options for a Conn.
 63 | type Config struct {
 64 | 	// NetNS specifies the Linux network namespace the Conn will operate in.
 65 | 	// This option is unsupported on other operating systems.
 66 | 	//
 67 | 	// If set (non-zero), Conn will enter the specified network namespace and an
 68 | 	// error will occur in Socket if the operation fails.
 69 | 	//
 70 | 	// If not set (zero), a best-effort attempt will be made to enter the
 71 | 	// network namespace of the calling thread: this means that any changes made
 72 | 	// to the calling thread's network namespace will also be reflected in Conn.
 73 | 	// If this operation fails (due to lack of permissions or because network
 74 | 	// namespaces are disabled by kernel configuration), Socket will not return
 75 | 	// an error, and the Conn will operate in the default network namespace of
 76 | 	// the process. This enables non-privileged use of Conn in applications
 77 | 	// which do not require elevated privileges.
 78 | 	//
 79 | 	// Entering a network namespace is a privileged operation (root or
 80 | 	// CAP_SYS_ADMIN are required), and most applications should leave this set
 81 | 	// to 0.
 82 | 	NetNS int
 83 | }
 84 | 
 85 | // High-level methods which provide convenience over raw system calls.
 86 | 
 87 | // Close closes the underlying file descriptor for the Conn, which also causes
 88 | // all in-flight I/O operations to immediately unblock and return errors. Any
 89 | // subsequent uses of Conn will result in EBADF.
 90 | func (c *Conn) Close() error {
 91 | 	// The caller has expressed an intent to close the socket, so immediately
 92 | 	// increment s.closed to force further calls to result in EBADF before also
 93 | 	// closing the file descriptor to unblock any outstanding operations.
 94 | 	//
 95 | 	// Because other operations simply check for s.closed != 0, we will permit
 96 | 	// double Close, which would increment s.closed beyond 1.
 97 | 	if atomic.AddUint32(&c.closed, 1) != 1 {
 98 | 		// Multiple Close calls.
 99 | 		return nil
100 | 	}
101 | 
102 | 	return os.NewSyscallError("close", c.fd.Close())
103 | }
104 | 
105 | // CloseRead shuts down the reading side of the Conn. Most callers should just
106 | // use Close.
107 | func (c *Conn) CloseRead() error { return c.Shutdown(unix.SHUT_RD) }
108 | 
109 | // CloseWrite shuts down the writing side of the Conn. Most callers should just
110 | // use Close.
111 | func (c *Conn) CloseWrite() error { return c.Shutdown(unix.SHUT_WR) }
112 | 
113 | // Read reads directly from the underlying file descriptor.
114 | func (c *Conn) Read(b []byte) (int, error) { return c.fd.Read(b) }
115 | 
116 | // ReadContext reads from the underlying file descriptor with added support for
117 | // context cancelation.
118 | func (c *Conn) ReadContext(ctx context.Context, b []byte) (int, error) {
119 | 	if c.facts.isStream && len(b) > maxRW {
120 | 		b = b[:maxRW]
121 | 	}
122 | 
123 | 	n, err := readT(c, ctx, "read", func(fd int) (int, error) {
124 | 		return unix.Read(fd, b)
125 | 	})
126 | 	if n == 0 && err == nil && c.facts.zeroReadIsEOF {
127 | 		return 0, io.EOF
128 | 	}
129 | 
130 | 	return n, os.NewSyscallError("read", err)
131 | }
132 | 
133 | // Write writes directly to the underlying file descriptor.
134 | func (c *Conn) Write(b []byte) (int, error) { return c.fd.Write(b) }
135 | 
136 | // WriteContext writes to the underlying file descriptor with added support for
137 | // context cancelation.
138 | func (c *Conn) WriteContext(ctx context.Context, b []byte) (int, error) {
139 | 	var (
140 | 		n, nn int
141 | 		err   error
142 | 	)
143 | 
144 | 	doErr := c.write(ctx, "write", func(fd int) error {
145 | 		max := len(b)
146 | 		if c.facts.isStream && max-nn > maxRW {
147 | 			max = nn + maxRW
148 | 		}
149 | 
150 | 		n, err = unix.Write(fd, b[nn:max])
151 | 		if n > 0 {
152 | 			nn += n
153 | 		}
154 | 		if nn == len(b) {
155 | 			return err
156 | 		}
157 | 		if n == 0 && err == nil {
158 | 			err = io.ErrUnexpectedEOF
159 | 			return nil
160 | 		}
161 | 
162 | 		return err
163 | 	})
164 | 	if doErr != nil {
165 | 		return 0, doErr
166 | 	}
167 | 
168 | 	return nn, os.NewSyscallError("write", err)
169 | }
170 | 
171 | // SetDeadline sets both the read and write deadlines associated with the Conn.
172 | func (c *Conn) SetDeadline(t time.Time) error { return c.fd.SetDeadline(t) }
173 | 
174 | // SetReadDeadline sets the read deadline associated with the Conn.
175 | func (c *Conn) SetReadDeadline(t time.Time) error { return c.fd.SetReadDeadline(t) }
176 | 
177 | // SetWriteDeadline sets the write deadline associated with the Conn.
178 | func (c *Conn) SetWriteDeadline(t time.Time) error { return c.fd.SetWriteDeadline(t) }
179 | 
180 | // ReadBuffer gets the size of the operating system's receive buffer associated
181 | // with the Conn.
182 | func (c *Conn) ReadBuffer() (int, error) {
183 | 	return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF)
184 | }
185 | 
186 | // WriteBuffer gets the size of the operating system's transmit buffer
187 | // associated with the Conn.
188 | func (c *Conn) WriteBuffer() (int, error) {
189 | 	return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF)
190 | }
191 | 
192 | // SetReadBuffer sets the size of the operating system's receive buffer
193 | // associated with the Conn.
194 | //
195 | // When called with elevated privileges on Linux, the SO_RCVBUFFORCE option will
196 | // be used to override operating system limits. Otherwise SO_RCVBUF is used
197 | // (which obeys operating system limits).
198 | func (c *Conn) SetReadBuffer(bytes int) error { return c.setReadBuffer(bytes) }
199 | 
200 | // SetWriteBuffer sets the size of the operating system's transmit buffer
201 | // associated with the Conn.
202 | //
203 | // When called with elevated privileges on Linux, the SO_SNDBUFFORCE option will
204 | // be used to override operating system limits. Otherwise SO_SNDBUF is used
205 | // (which obeys operating system limits).
206 | func (c *Conn) SetWriteBuffer(bytes int) error { return c.setWriteBuffer(bytes) }
207 | 
208 | // SyscallConn returns a raw network connection. This implements the
209 | // syscall.Conn interface.
210 | //
211 | // SyscallConn is intended for advanced use cases, such as getting and setting
212 | // arbitrary socket options using the socket's file descriptor. If possible,
213 | // those operations should be performed using methods on Conn instead.
214 | //
215 | // Once invoked, it is the caller's responsibility to ensure that operations
216 | // performed using Conn and the syscall.RawConn do not conflict with each other.
217 | func (c *Conn) SyscallConn() (syscall.RawConn, error) {
218 | 	if atomic.LoadUint32(&c.closed) != 0 {
219 | 		return nil, os.NewSyscallError("syscallconn", unix.EBADF)
220 | 	}
221 | 
222 | 	// TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of
223 | 	// FD remaining valid for duration of calls?
224 | 	return c.rc, nil
225 | }
226 | 
227 | // Socket wraps the socket(2) system call to produce a Conn. domain, typ, and
228 | // proto are passed directly to socket(2), and name should be a unique name for
229 | // the socket type such as "netlink" or "vsock".
230 | //
231 | // The cfg parameter specifies optional configuration for the Conn. If nil, no
232 | // additional configuration will be applied.
233 | //
234 | // If the operating system supports SOCK_CLOEXEC and SOCK_NONBLOCK, they are
235 | // automatically applied to typ to mirror the standard library's socket flag
236 | // behaviors.
237 | func Socket(domain, typ, proto int, name string, cfg *Config) (*Conn, error) {
238 | 	if cfg == nil {
239 | 		cfg = &Config{}
240 | 	}
241 | 
242 | 	if cfg.NetNS == 0 {
243 | 		// Non-Linux or no network namespace.
244 | 		return socket(domain, typ, proto, name)
245 | 	}
246 | 
247 | 	// Linux only: create Conn in the specified network namespace.
248 | 	return withNetNS(cfg.NetNS, func() (*Conn, error) {
249 | 		return socket(domain, typ, proto, name)
250 | 	})
251 | }
252 | 
253 | // socket is the internal, cross-platform entry point for socket(2).
254 | func socket(domain, typ, proto int, name string) (*Conn, error) {
255 | 	var (
256 | 		fd  int
257 | 		err error
258 | 	)
259 | 
260 | 	for {
261 | 		fd, err = unix.Socket(domain, typ|socketFlags, proto)
262 | 		switch {
263 | 		case err == nil:
264 | 			// Some OSes already set CLOEXEC with typ.
265 | 			if !flagCLOEXEC {
266 | 				unix.CloseOnExec(fd)
267 | 			}
268 | 
269 | 			// No error, prepare the Conn.
270 | 			return New(fd, name)
271 | 		case !ready(err):
272 | 			// System call interrupted or not ready, try again.
273 | 			continue
274 | 		case err == unix.EINVAL, err == unix.EPROTONOSUPPORT:
275 | 			// On Linux, SOCK_NONBLOCK and SOCK_CLOEXEC were introduced in
276 | 			// 2.6.27. On FreeBSD, both flags were introduced in FreeBSD 10.
277 | 			// EINVAL and EPROTONOSUPPORT check for earlier versions of these
278 | 			// OSes respectively.
279 | 			//
280 | 			// Mirror what the standard library does when creating file
281 | 			// descriptors: avoid racing a fork/exec with the creation of new
282 | 			// file descriptors, so that child processes do not inherit socket
283 | 			// file descriptors unexpectedly.
284 | 			//
285 | 			// For a more thorough explanation, see similar work in the Go tree:
286 | 			// func sysSocket in net/sock_cloexec.go, as well as the detailed
287 | 			// comment in syscall/exec_unix.go.
288 | 			syscall.ForkLock.RLock()
289 | 			fd, err = unix.Socket(domain, typ, proto)
290 | 			if err != nil {
291 | 				syscall.ForkLock.RUnlock()
292 | 				return nil, os.NewSyscallError("socket", err)
293 | 			}
294 | 			unix.CloseOnExec(fd)
295 | 			syscall.ForkLock.RUnlock()
296 | 
297 | 			return New(fd, name)
298 | 		default:
299 | 			// Unhandled error.
300 | 			return nil, os.NewSyscallError("socket", err)
301 | 		}
302 | 	}
303 | }
304 | 
305 | // FileConn returns a copy of the network connection corresponding to the open
306 | // file. It is the caller's responsibility to close the file when finished.
307 | // Closing the Conn does not affect the File, and closing the File does not
308 | // affect the Conn.
309 | func FileConn(f *os.File, name string) (*Conn, error) {
310 | 	// First we'll try to do fctnl(2) with F_DUPFD_CLOEXEC because we can dup
311 | 	// the file descriptor and set the flag in one syscall.
312 | 	fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
313 | 	switch err {
314 | 	case nil:
315 | 		// OK, ready to set up non-blocking I/O.
316 | 		return New(fd, name)
317 | 	case unix.EINVAL:
318 | 		// The kernel rejected our fcntl(2), fall back to separate dup(2) and
319 | 		// setting close on exec.
320 | 		//
321 | 		// Mirror what the standard library does when creating file descriptors:
322 | 		// avoid racing a fork/exec with the creation of new file descriptors,
323 | 		// so that child processes do not inherit socket file descriptors
324 | 		// unexpectedly.
325 | 		syscall.ForkLock.RLock()
326 | 		fd, err := unix.Dup(fd)
327 | 		if err != nil {
328 | 			syscall.ForkLock.RUnlock()
329 | 			return nil, os.NewSyscallError("dup", err)
330 | 		}
331 | 		unix.CloseOnExec(fd)
332 | 		syscall.ForkLock.RUnlock()
333 | 
334 | 		return New(fd, name)
335 | 	default:
336 | 		// Any other errors.
337 | 		return nil, os.NewSyscallError("fcntl", err)
338 | 	}
339 | }
340 | 
341 | // New wraps an existing file descriptor to create a Conn. name should be a
342 | // unique name for the socket type such as "netlink" or "vsock".
343 | //
344 | // Most callers should use Socket or FileConn to construct a Conn. New is
345 | // intended for integrating with specific system calls which provide a file
346 | // descriptor that supports asynchronous I/O. The file descriptor is immediately
347 | // set to nonblocking mode and registered with Go's runtime network poller for
348 | // future I/O operations.
349 | //
350 | // Unlike FileConn, New does not duplicate the existing file descriptor in any
351 | // way. The returned Conn takes ownership of the underlying file descriptor.
352 | func New(fd int, name string) (*Conn, error) {
353 | 	// All Conn I/O is nonblocking for integration with Go's runtime network
354 | 	// poller. Depending on the OS this might already be set but it can't hurt
355 | 	// to set it again.
356 | 	if err := unix.SetNonblock(fd, true); err != nil {
357 | 		return nil, os.NewSyscallError("setnonblock", err)
358 | 	}
359 | 
360 | 	// os.NewFile registers the non-blocking file descriptor with the runtime
361 | 	// poller, which is then used for most subsequent operations except those
362 | 	// that require raw I/O via SyscallConn.
363 | 	//
364 | 	// See also: https://golang.org/pkg/os/#NewFile
365 | 	f := os.NewFile(uintptr(fd), name)
366 | 	rc, err := f.SyscallConn()
367 | 	if err != nil {
368 | 		return nil, err
369 | 	}
370 | 
371 | 	c := &Conn{
372 | 		name: name,
373 | 		fd:   f,
374 | 		rc:   rc,
375 | 	}
376 | 
377 | 	// Probe the file descriptor for socket settings.
378 | 	sotype, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_TYPE)
379 | 	switch {
380 | 	case err == nil:
381 | 		// File is a socket, check its properties.
382 | 		c.facts = facts{
383 | 			isStream:      sotype == unix.SOCK_STREAM,
384 | 			zeroReadIsEOF: sotype != unix.SOCK_DGRAM && sotype != unix.SOCK_RAW,
385 | 		}
386 | 	case errors.Is(err, unix.ENOTSOCK):
387 | 		// File is not a socket, treat it as a regular file.
388 | 		c.facts = facts{
389 | 			isStream:      true,
390 | 			zeroReadIsEOF: true,
391 | 		}
392 | 	default:
393 | 		return nil, err
394 | 	}
395 | 
396 | 	return c, nil
397 | }
398 | 
399 | // Low-level methods which provide raw system call access.
400 | 
401 | // Accept wraps accept(2) or accept4(2) depending on the operating system, but
402 | // returns a Conn for the accepted connection rather than a raw file descriptor.
403 | //
404 | // If the operating system supports accept4(2) (which allows flags),
405 | // SOCK_CLOEXEC and SOCK_NONBLOCK are automatically applied to flags to mirror
406 | // the standard library's socket flag behaviors.
407 | //
408 | // If the operating system only supports accept(2) (which does not allow flags)
409 | // and flags is not zero, an error will be returned.
410 | //
411 | // Accept obeys context cancelation and uses the deadline set on the context to
412 | // cancel accepting the next connection. If a deadline is set on ctx, this
413 | // deadline will override any previous deadlines set using SetDeadline or
414 | // SetReadDeadline. Upon return, the read deadline is cleared.
415 | func (c *Conn) Accept(ctx context.Context, flags int) (*Conn, unix.Sockaddr, error) {
416 | 	type ret struct {
417 | 		nfd int
418 | 		sa  unix.Sockaddr
419 | 	}
420 | 
421 | 	r, err := readT(c, ctx, sysAccept, func(fd int) (ret, error) {
422 | 		// Either accept(2) or accept4(2) depending on the OS.
423 | 		nfd, sa, err := accept(fd, flags|socketFlags)
424 | 		return ret{nfd, sa}, err
425 | 	})
426 | 	if err != nil {
427 | 		// internal/poll, context error, or user function error.
428 | 		return nil, nil, err
429 | 	}
430 | 
431 | 	// Successfully accepted a connection, wrap it in a Conn for use by the
432 | 	// caller.
433 | 	ac, err := New(r.nfd, c.name)
434 | 	if err != nil {
435 | 		return nil, nil, err
436 | 	}
437 | 
438 | 	return ac, r.sa, nil
439 | }
440 | 
441 | // Bind wraps bind(2).
442 | func (c *Conn) Bind(sa unix.Sockaddr) error {
443 | 	return c.control("bind", func(fd int) error { return unix.Bind(fd, sa) })
444 | }
445 | 
446 | // Connect wraps connect(2). In order to verify that the underlying socket is
447 | // connected to a remote peer, Connect calls getpeername(2) and returns the
448 | // unix.Sockaddr from that call.
449 | //
450 | // Connect obeys context cancelation and uses the deadline set on the context to
451 | // cancel connecting to a remote peer. If a deadline is set on ctx, this
452 | // deadline will override any previous deadlines set using SetDeadline or
453 | // SetWriteDeadline. Upon return, the write deadline is cleared.
454 | func (c *Conn) Connect(ctx context.Context, sa unix.Sockaddr) (unix.Sockaddr, error) {
455 | 	const op = "connect"
456 | 
457 | 	// TODO(mdlayher): it would seem that trying to connect to unbound vsock
458 | 	// listeners by calling Connect multiple times results in ECONNRESET for the
459 | 	// first and nil error for subsequent calls. Do we need to memoize the
460 | 	// error? Check what the stdlib behavior is.
461 | 
462 | 	var (
463 | 		// Track progress between invocations of the write closure. We don't
464 | 		// have an explicit WaitWrite call like internal/poll does, so we have
465 | 		// to wait until the runtime calls the closure again to indicate we can
466 | 		// write.
467 | 		progress uint32
468 | 
469 | 		// Capture closure sockaddr and error.
470 | 		rsa unix.Sockaddr
471 | 		err error
472 | 	)
473 | 
474 | 	doErr := c.write(ctx, op, func(fd int) error {
475 | 		if atomic.AddUint32(&progress, 1) == 1 {
476 | 			// First call: initiate connect.
477 | 			return unix.Connect(fd, sa)
478 | 		}
479 | 
480 | 		// Subsequent calls: the runtime network poller indicates fd is
481 | 		// writable. Check for errno.
482 | 		errno, gerr := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_ERROR)
483 | 		if gerr != nil {
484 | 			return gerr
485 | 		}
486 | 		if errno != 0 {
487 | 			// Connection is still not ready or failed. If errno indicates
488 | 			// the socket is not ready, we will wait for the next write
489 | 			// event. Otherwise we propagate this errno back to the as a
490 | 			// permanent error.
491 | 			uerr := unix.Errno(errno)
492 | 			err = uerr
493 | 			return uerr
494 | 		}
495 | 
496 | 		// According to internal/poll, it's possible for the runtime network
497 | 		// poller to spuriously wake us and return errno 0 for SO_ERROR.
498 | 		// Make sure we are actually connected to a peer.
499 | 		peer, err := c.Getpeername()
500 | 		if err != nil {
501 | 			// internal/poll unconditionally goes back to WaitWrite.
502 | 			// Synthesize an error that will do the same for us.
503 | 			return unix.EAGAIN
504 | 		}
505 | 
506 | 		// Connection complete.
507 | 		rsa = peer
508 | 		return nil
509 | 	})
510 | 	if doErr != nil {
511 | 		// internal/poll or context error.
512 | 		return nil, doErr
513 | 	}
514 | 
515 | 	if err == unix.EISCONN {
516 | 		// TODO(mdlayher): is this block obsolete with the addition of the
517 | 		// getsockopt SO_ERROR check above?
518 | 		//
519 | 		// EISCONN is reported if the socket is already established and should
520 | 		// not be treated as an error.
521 | 		//  - Darwin reports this for at least TCP sockets
522 | 		//  - Linux reports this for at least AF_VSOCK sockets
523 | 		return rsa, nil
524 | 	}
525 | 
526 | 	return rsa, os.NewSyscallError(op, err)
527 | }
528 | 
529 | // Getsockname wraps getsockname(2).
530 | func (c *Conn) Getsockname() (unix.Sockaddr, error) {
531 | 	return controlT(c, "getsockname", unix.Getsockname)
532 | }
533 | 
534 | // Getpeername wraps getpeername(2).
535 | func (c *Conn) Getpeername() (unix.Sockaddr, error) {
536 | 	return controlT(c, "getpeername", unix.Getpeername)
537 | }
538 | 
539 | // GetsockoptICMPv6Filter wraps getsockopt(2) for *unix.ICMPv6Filter values.
540 | func (c *Conn) GetsockoptICMPv6Filter(level, opt int) (*unix.ICMPv6Filter, error) {
541 | 	return controlT(c, "getsockopt", func(fd int) (*unix.ICMPv6Filter, error) {
542 | 		return unix.GetsockoptICMPv6Filter(fd, level, opt)
543 | 	})
544 | }
545 | 
546 | // GetsockoptInt wraps getsockopt(2) for integer values.
547 | func (c *Conn) GetsockoptInt(level, opt int) (int, error) {
548 | 	return controlT(c, "getsockopt", func(fd int) (int, error) {
549 | 		return unix.GetsockoptInt(fd, level, opt)
550 | 	})
551 | }
552 | 
553 | // GetsockoptString wraps getsockopt(2) for string values.
554 | func (c *Conn) GetsockoptString(level, opt int) (string, error) {
555 | 	return controlT(c, "getsockopt", func(fd int) (string, error) {
556 | 		return unix.GetsockoptString(fd, level, opt)
557 | 	})
558 | }
559 | 
560 | // Listen wraps listen(2).
561 | func (c *Conn) Listen(n int) error {
562 | 	return c.control("listen", func(fd int) error { return unix.Listen(fd, n) })
563 | }
564 | 
565 | // Recvmsg wraps recvmsg(2).
566 | func (c *Conn) Recvmsg(ctx context.Context, p, oob []byte, flags int) (int, int, int, unix.Sockaddr, error) {
567 | 	type ret struct {
568 | 		n, oobn, recvflags int
569 | 		from               unix.Sockaddr
570 | 	}
571 | 
572 | 	r, err := readT(c, ctx, "recvmsg", func(fd int) (ret, error) {
573 | 		n, oobn, recvflags, from, err := unix.Recvmsg(fd, p, oob, flags)
574 | 		return ret{n, oobn, recvflags, from}, err
575 | 	})
576 | 	if r.n == 0 && err == nil && c.facts.zeroReadIsEOF {
577 | 		return 0, 0, 0, nil, io.EOF
578 | 	}
579 | 
580 | 	return r.n, r.oobn, r.recvflags, r.from, err
581 | }
582 | 
583 | // Recvfrom wraps recvfrom(2).
584 | func (c *Conn) Recvfrom(ctx context.Context, p []byte, flags int) (int, unix.Sockaddr, error) {
585 | 	type ret struct {
586 | 		n    int
587 | 		addr unix.Sockaddr
588 | 	}
589 | 
590 | 	out, err := readT(c, ctx, "recvfrom", func(fd int) (ret, error) {
591 | 		n, addr, err := unix.Recvfrom(fd, p, flags)
592 | 		return ret{n, addr}, err
593 | 	})
594 | 	if out.n == 0 && err == nil && c.facts.zeroReadIsEOF {
595 | 		return 0, nil, io.EOF
596 | 	}
597 | 
598 | 	return out.n, out.addr, err
599 | }
600 | 
601 | // Sendmsg wraps sendmsg(2).
602 | func (c *Conn) Sendmsg(ctx context.Context, p, oob []byte, to unix.Sockaddr, flags int) (int, error) {
603 | 	return writeT(c, ctx, "sendmsg", func(fd int) (int, error) {
604 | 		return unix.SendmsgN(fd, p, oob, to, flags)
605 | 	})
606 | }
607 | 
608 | // Sendto wraps sendto(2).
609 | func (c *Conn) Sendto(ctx context.Context, p []byte, flags int, to unix.Sockaddr) error {
610 | 	return c.write(ctx, "sendto", func(fd int) error {
611 | 		return unix.Sendto(fd, p, flags, to)
612 | 	})
613 | }
614 | 
615 | // SetsockoptICMPv6Filter wraps setsockopt(2) for *unix.ICMPv6Filter values.
616 | func (c *Conn) SetsockoptICMPv6Filter(level, opt int, filter *unix.ICMPv6Filter) error {
617 | 	return c.control("setsockopt", func(fd int) error {
618 | 		return unix.SetsockoptICMPv6Filter(fd, level, opt, filter)
619 | 	})
620 | }
621 | 
622 | // SetsockoptInt wraps setsockopt(2) for integer values.
623 | func (c *Conn) SetsockoptInt(level, opt, value int) error {
624 | 	return c.control("setsockopt", func(fd int) error {
625 | 		return unix.SetsockoptInt(fd, level, opt, value)
626 | 	})
627 | }
628 | 
629 | // SetsockoptString wraps setsockopt(2) for string values.
630 | func (c *Conn) SetsockoptString(level, opt int, value string) error {
631 | 	return c.control("setsockopt", func(fd int) error {
632 | 		return unix.SetsockoptString(fd, level, opt, value)
633 | 	})
634 | }
635 | 
636 | // Shutdown wraps shutdown(2).
637 | func (c *Conn) Shutdown(how int) error {
638 | 	return c.control("shutdown", func(fd int) error { return unix.Shutdown(fd, how) })
639 | }
640 | 
641 | // Conn low-level read/write/control functions. These functions mirror the
642 | // syscall.RawConn APIs but the input closures return errors rather than
643 | // booleans.
644 | 
645 | // read wraps readT to execute a function and capture its error result. This is
646 | // a convenience wrapper for functions which don't return any extra values.
647 | func (c *Conn) read(ctx context.Context, op string, f func(fd int) error) error {
648 | 	_, err := readT(c, ctx, op, func(fd int) (struct{}, error) {
649 | 		return struct{}{}, f(fd)
650 | 	})
651 | 	return err
652 | }
653 | 
654 | // write executes f, a write function, against the associated file descriptor.
655 | // op is used to create an *os.SyscallError if the file descriptor is closed.
656 | func (c *Conn) write(ctx context.Context, op string, f func(fd int) error) error {
657 | 	_, err := writeT(c, ctx, op, func(fd int) (struct{}, error) {
658 | 		return struct{}{}, f(fd)
659 | 	})
660 | 	return err
661 | }
662 | 
663 | // readT executes c.rc.Read for op using the input function, returning a newly
664 | // allocated result T.
665 | func readT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) {
666 | 	return rwT(c, rwContext[T]{
667 | 		Context: ctx,
668 | 		Type:    read,
669 | 		Op:      op,
670 | 		Do:      f,
671 | 	})
672 | }
673 | 
674 | // writeT executes c.rc.Write for op using the input function, returning a newly
675 | // allocated result T.
676 | func writeT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) {
677 | 	return rwT(c, rwContext[T]{
678 | 		Context: ctx,
679 | 		Type:    write,
680 | 		Op:      op,
681 | 		Do:      f,
682 | 	})
683 | }
684 | 
685 | // readWrite indicates if an operation intends to read or write.
686 | type readWrite bool
687 | 
688 | // Possible readWrite values.
689 | const (
690 | 	read  readWrite = false
691 | 	write readWrite = true
692 | )
693 | 
694 | // An rwContext provides arguments to rwT.
695 | type rwContext[T any] struct {
696 | 	// The caller's context passed for cancelation.
697 | 	Context context.Context
698 | 
699 | 	// The type of an operation: read or write.
700 | 	Type readWrite
701 | 
702 | 	// The name of the operation used in errors.
703 | 	Op string
704 | 
705 | 	// The actual function to perform.
706 | 	Do func(fd int) (T, error)
707 | }
708 | 
709 | // rwT executes c.rc.Read or c.rc.Write (depending on the value of rw.Type) for
710 | // rw.Op using the input function, returning a newly allocated result T.
711 | //
712 | // It obeys context cancelation and the rw.Context must not be nil.
713 | func rwT[T any](c *Conn, rw rwContext[T]) (T, error) {
714 | 	if atomic.LoadUint32(&c.closed) != 0 {
715 | 		// If the file descriptor is already closed, do nothing.
716 | 		return *new(T), os.NewSyscallError(rw.Op, unix.EBADF)
717 | 	}
718 | 
719 | 	if err := rw.Context.Err(); err != nil {
720 | 		// Early exit due to context cancel.
721 | 		return *new(T), os.NewSyscallError(rw.Op, err)
722 | 	}
723 | 
724 | 	var (
725 | 		// The read or write function used to access the runtime network poller.
726 | 		poll func(func(uintptr) bool) error
727 | 
728 | 		// The read or write function used to set the matching deadline.
729 | 		deadline func(time.Time) error
730 | 	)
731 | 
732 | 	if rw.Type == write {
733 | 		poll = c.rc.Write
734 | 		deadline = c.SetWriteDeadline
735 | 	} else {
736 | 		poll = c.rc.Read
737 | 		deadline = c.SetReadDeadline
738 | 	}
739 | 
740 | 	var (
741 | 		// Whether or not the context carried a deadline we are actively using
742 | 		// for cancelation.
743 | 		setDeadline bool
744 | 
745 | 		// Signals for the cancelation watcher goroutine.
746 | 		wg    sync.WaitGroup
747 | 		doneC = make(chan struct{})
748 | 
749 | 		// Atomic: reports whether we have to disarm the deadline.
750 | 		needDisarm atomic.Bool
751 | 	)
752 | 
753 | 	// On cancel, clean up the watcher.
754 | 	defer func() {
755 | 		close(doneC)
756 | 		wg.Wait()
757 | 	}()
758 | 
759 | 	if d, ok := rw.Context.Deadline(); ok {
760 | 		// The context has an explicit deadline. We will use it for cancelation
761 | 		// but disarm it after poll for the next call.
762 | 		if err := deadline(d); err != nil {
763 | 			return *new(T), err
764 | 		}
765 | 		setDeadline = true
766 | 		needDisarm.Store(true)
767 | 	} else {
768 | 		// The context does not have an explicit deadline. We have to watch for
769 | 		// cancelation so we can propagate that signal to immediately unblock
770 | 		// the runtime network poller.
771 | 		//
772 | 		// TODO(mdlayher): is it possible to detect a background context vs a
773 | 		// context with possible future cancel?
774 | 		wg.Add(1)
775 | 		go func() {
776 | 			defer wg.Done()
777 | 
778 | 			select {
779 | 			case <-rw.Context.Done():
780 | 				// Cancel the operation. Make the caller disarm after poll
781 | 				// returns.
782 | 				needDisarm.Store(true)
783 | 				_ = deadline(time.Unix(0, 1))
784 | 			case <-doneC:
785 | 				// Nothing to do.
786 | 			}
787 | 		}()
788 | 	}
789 | 
790 | 	var (
791 | 		t   T
792 | 		err error
793 | 	)
794 | 
795 | 	pollErr := poll(func(fd uintptr) bool {
796 | 		t, err = rw.Do(int(fd))
797 | 		return ready(err)
798 | 	})
799 | 
800 | 	if needDisarm.Load() {
801 | 		_ = deadline(time.Time{})
802 | 	}
803 | 
804 | 	if pollErr != nil {
805 | 		if rw.Context.Err() != nil || (setDeadline && errors.Is(pollErr, os.ErrDeadlineExceeded)) {
806 | 			// The caller canceled the operation or we set a deadline internally
807 | 			// and it was reached.
808 | 			//
809 | 			// Unpack a plain context error. We wait for the context to be done
810 | 			// to synchronize state externally. Otherwise we have noticed I/O
811 | 			// timeout wakeups when we set a deadline but the context was not
812 | 			// yet marked done.
813 | 			<-rw.Context.Done()
814 | 			return *new(T), os.NewSyscallError(rw.Op, rw.Context.Err())
815 | 		}
816 | 
817 | 		// Error from syscall.RawConn methods. Conventionally the standard
818 | 		// library does not wrap internal/poll errors in os.NewSyscallError.
819 | 		return *new(T), pollErr
820 | 	}
821 | 
822 | 	// Result from user function.
823 | 	return t, os.NewSyscallError(rw.Op, err)
824 | }
825 | 
826 | // control executes Conn.control for op using the input function.
827 | func (c *Conn) control(op string, f func(fd int) error) error {
828 | 	_, err := controlT(c, op, func(fd int) (struct{}, error) {
829 | 		return struct{}{}, f(fd)
830 | 	})
831 | 	return err
832 | }
833 | 
834 | // controlT executes c.rc.Control for op using the input function, returning a
835 | // newly allocated result T.
836 | func controlT[T any](c *Conn, op string, f func(fd int) (T, error)) (T, error) {
837 | 	if atomic.LoadUint32(&c.closed) != 0 {
838 | 		// If the file descriptor is already closed, do nothing.
839 | 		return *new(T), os.NewSyscallError(op, unix.EBADF)
840 | 	}
841 | 
842 | 	var (
843 | 		t   T
844 | 		err error
845 | 	)
846 | 
847 | 	doErr := c.rc.Control(func(fd uintptr) {
848 | 		// Repeatedly attempt the syscall(s) invoked by f until completion is
849 | 		// indicated by the return value of ready or the context is canceled.
850 | 		//
851 | 		// The last values for t and err are captured outside of the closure for
852 | 		// use when the loop breaks.
853 | 		for {
854 | 			t, err = f(int(fd))
855 | 			if ready(err) {
856 | 				return
857 | 			}
858 | 		}
859 | 	})
860 | 	if doErr != nil {
861 | 		// Error from syscall.RawConn methods. Conventionally the standard
862 | 		// library does not wrap internal/poll errors in os.NewSyscallError.
863 | 		return *new(T), doErr
864 | 	}
865 | 
866 | 	// Result from user function.
867 | 	return t, os.NewSyscallError(op, err)
868 | }
869 | 
870 | // ready indicates readiness based on the value of err.
871 | func ready(err error) bool {
872 | 	switch err {
873 | 	case unix.EAGAIN, unix.EINPROGRESS, unix.EINTR:
874 | 		// When a socket is in non-blocking mode, we might see a variety of errors:
875 | 		//  - EAGAIN: most common case for a socket read not being ready
876 | 		//  - EINPROGRESS: reported by some sockets when first calling connect
877 | 		//  - EINTR: system call interrupted, more frequently occurs in Go 1.14+
878 | 		//    because goroutines can be asynchronously preempted
879 | 		//
880 | 		// Return false to let the poller wait for readiness. See the source code
881 | 		// for internal/poll.FD.RawRead for more details.
882 | 		return false
883 | 	default:
884 | 		// Ready regardless of whether there was an error or no error.
885 | 		return true
886 | 	}
887 | }
888 | 
889 | // Darwin and FreeBSD can't read or write 2GB+ files at a time,
890 | // even on 64-bit systems.
891 | // The same is true of socket implementations on many systems.
892 | // See golang.org/issue/7812 and golang.org/issue/16266.
893 | // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
894 | const maxRW = 1 << 30
895 | 


--------------------------------------------------------------------------------