├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── gate ├── gate.go └── gate_test.go ├── ilist └── list.go ├── rand ├── rand.go └── rand_linux.go ├── sleep ├── commit_amd64.s ├── commit_arm64.s ├── commit_asm.go ├── commit_noasm.go ├── empty.s ├── sleep_test.go └── sleep_unsafe.go ├── tcpip ├── adapters │ └── gonet │ │ ├── gonet.go │ │ └── gonet_test.go ├── buffer │ ├── prependable.go │ ├── view.go │ └── view_test.go ├── checker │ └── checker.go ├── hash │ └── jenkins │ │ ├── jenkins.go │ │ └── jenkins_test.go ├── header │ ├── arp.go │ ├── checksum.go │ ├── eth.go │ ├── gue.go │ ├── icmpv4.go │ ├── icmpv6.go │ ├── interfaces.go │ ├── ipv4.go │ ├── ipv6.go │ ├── ipv6_fragment.go │ ├── ipversion_test.go │ ├── tcp.go │ ├── tcp_test.go │ └── udp.go ├── iptables │ ├── iptables.go │ ├── targets.go │ └── types.go ├── link │ ├── channel │ │ └── channel.go │ ├── fdbased │ │ ├── endpoint.go │ │ ├── endpoint_test.go │ │ ├── endpoint_unsafe.go │ │ ├── mmap.go │ │ ├── mmap_stub.go │ │ ├── mmap_unsafe.go │ │ └── packet_dispatchers.go │ ├── loopback │ │ └── loopback.go │ ├── muxed │ │ ├── injectable.go │ │ └── injectable_test.go │ ├── rawfile │ │ ├── blockingpoll_amd64.s │ │ ├── blockingpoll_arm64.s │ │ ├── blockingpoll_noyield_unsafe.go │ │ ├── blockingpoll_yield_unsafe.go │ │ ├── errors.go │ │ └── rawfile_unsafe.go │ ├── sharedmem │ │ ├── pipe │ │ │ ├── pipe.go │ │ │ ├── pipe_test.go │ │ │ ├── pipe_unsafe.go │ │ │ ├── rx.go │ │ │ └── tx.go │ │ ├── queue │ │ │ ├── queue_test.go │ │ │ ├── rx.go │ │ │ └── tx.go │ │ ├── rx.go │ │ ├── sharedmem.go │ │ ├── sharedmem_test.go │ │ ├── sharedmem_unsafe.go │ │ └── tx.go │ ├── sniffer │ │ ├── pcap.go │ │ └── sniffer.go │ ├── tun │ │ └── tun_unsafe.go │ └── waitable │ │ ├── waitable.go │ │ └── waitable_test.go ├── network │ ├── arp │ │ ├── arp.go │ │ └── arp_test.go │ ├── fragmentation │ │ ├── frag_heap.go │ │ ├── frag_heap_test.go │ │ ├── fragmentation.go │ │ ├── fragmentation_test.go │ │ ├── reassembler.go │ │ ├── reassembler_list.go │ │ └── reassembler_test.go │ ├── hash │ │ └── hash.go │ ├── ip_test.go │ ├── ipv4 │ │ ├── icmp.go │ │ ├── ipv4.go │ │ └── ipv4_test.go │ └── ipv6 │ │ ├── icmp.go │ │ ├── icmp_test.go │ │ ├── ipv6.go │ │ ├── ipv6_test.go │ │ └── ndp_test.go ├── ports │ ├── ports.go │ └── ports_test.go ├── sample │ ├── tun_tcp_connect │ │ └── main.go │ └── tun_tcp_echo │ │ └── main.go ├── seqnum │ └── seqnum.go ├── stack │ ├── icmp_rate_limit.go │ ├── linkaddrcache.go │ ├── linkaddrcache_test.go │ ├── linkaddrentry_list.go │ ├── nic.go │ ├── registration.go │ ├── route.go │ ├── stack.go │ ├── stack_test.go │ ├── transport_demuxer.go │ ├── transport_demuxer_test.go │ └── transport_test.go ├── tcpip.go ├── tcpip_test.go ├── time.s ├── time_unsafe.go └── transport │ ├── icmp │ ├── endpoint.go │ ├── icmp_packet_list.go │ └── protocol.go │ ├── raw │ ├── endpoint.go │ ├── packet_list.go │ └── protocol.go │ ├── tcp │ ├── accept.go │ ├── connect.go │ ├── cubic.go │ ├── dual_stack_test.go │ ├── endpoint.go │ ├── forwarder.go │ ├── protocol.go │ ├── rcv.go │ ├── reno.go │ ├── sack.go │ ├── sack_scoreboard.go │ ├── sack_scoreboard_test.go │ ├── segment.go │ ├── segment_heap.go │ ├── segment_queue.go │ ├── snd.go │ ├── tcp_noracedetector_test.go │ ├── tcp_sack_test.go │ ├── tcp_segment_list.go │ ├── tcp_test.go │ ├── tcp_timestamp_test.go │ ├── testing │ │ └── context │ │ │ └── context.go │ └── timer.go │ ├── tcpconntrack │ ├── tcp_conntrack.go │ └── tcp_conntrack_test.go │ └── udp │ ├── endpoint.go │ ├── forwarder.go │ ├── protocol.go │ ├── udp_packet_list.go │ └── udp_test.go ├── tmutex ├── tmutex.go └── tmutex_test.go └── waiter ├── waiter.go ├── waiter_list.go └── waiter_test.go /AUTHORS: -------------------------------------------------------------------------------- 1 | # Netstack is part of the gVisor project. Please see the gVisor AUTHORS file 2 | # for an official list of authors: 3 | # 4 | # https://gvisor.googlesource.com/gvisor/+/refs/heads/master/AUTHORS 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This repository is a synthetic mirror. 2 | 3 | Contributions to netstack must be submitted by sending pull requests to 4 | [gVisor](https://www.github.com/google/gvisor), where there are complete 5 | [instructions](https://github.com/google/gvisor/blob/master/CONTRIBUTING.md). 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Netstack 2 | 3 | Netstack is a network stack written in Go. 4 | 5 | ## Getting started 6 | 7 | Try it out on Linux by installing the tun_tcp_echo demo: 8 | 9 | ``` 10 | go install github.com/google/netstack/tcpip/sample/tun_tcp_echo 11 | ``` 12 | 13 | Create a TUN device with: 14 | 15 | ``` 16 | [sudo] ip tuntap add user mode tun 17 | [sudo] ip link set up 18 | [sudo] ip addr add / dev 19 | ``` 20 | 21 | Then run with: 22 | 23 | ``` 24 | tun_tcp_echo 25 | ``` 26 | 27 | ## Contributions 28 | 29 | Please see [CONTRIBUTING.md](CONTRIBUTING.md) for more details. 30 | 31 | ## Issues/Bug Reports 32 | 33 | Netstack is primarily developed as part of 34 | [gVisor](http://www.github.com/google/gvisor) and any issues/bugs should be 35 | filed against the gVisor repository as this repo is not actively monitored for 36 | bug reports. 37 | 38 | ### Disclaimer 39 | 40 | This is not an official Google product (experimental or otherwise), it is just 41 | code that happens to be owned by Google. 42 | -------------------------------------------------------------------------------- /gate/gate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package gate provides a usage Gate synchronization primitive. 16 | package gate 17 | 18 | import ( 19 | "sync/atomic" 20 | ) 21 | 22 | const ( 23 | // gateClosed is the bit set in the gate's user count to indicate that 24 | // it has been closed. It is the MSB of the 32-bit field; the other 31 25 | // bits carry the actual count. 26 | gateClosed = 0x80000000 27 | ) 28 | 29 | // Gate is a synchronization primitive that allows concurrent goroutines to 30 | // "enter" it as long as it hasn't been closed yet. Once it's been closed, 31 | // goroutines cannot enter it anymore, but are allowed to leave, and the closer 32 | // will be informed when all goroutines have left. 33 | // 34 | // Many goroutines are allowed to enter the gate concurrently, but only one is 35 | // allowed to close it. 36 | // 37 | // This is similar to a r/w critical section, except that goroutines "entering" 38 | // never block: they either enter immediately or fail to enter. The closer will 39 | // block waiting for all goroutines currently inside the gate to leave. 40 | // 41 | // This function is implemented efficiently. On x86, only one interlocked 42 | // operation is performed on enter, and one on leave. 43 | // 44 | // This is useful, for example, in cases when a goroutine is trying to clean up 45 | // an object for which multiple goroutines have pointers. In such a case, users 46 | // would be required to enter and leave the gates, and the cleaner would wait 47 | // until all users are gone (and no new ones are allowed) before proceeding. 48 | // 49 | // Users: 50 | // 51 | // if !g.Enter() { 52 | // // Gate is closed, we can't use the object. 53 | // return 54 | // } 55 | // 56 | // // Do something with object. 57 | // [...] 58 | // 59 | // g.Leave() 60 | // 61 | // Closer: 62 | // 63 | // // Prevent new users from using the object, and wait for the existing 64 | // // ones to complete. 65 | // g.Close() 66 | // 67 | // // Clean up the object. 68 | // [...] 69 | // 70 | type Gate struct { 71 | userCount uint32 72 | done chan struct{} 73 | } 74 | 75 | // Enter tries to enter the gate. It will succeed if it hasn't been closed yet, 76 | // in which case the caller must eventually call Leave(). 77 | // 78 | // This function is thread-safe. 79 | func (g *Gate) Enter() bool { 80 | if g == nil { 81 | return false 82 | } 83 | 84 | for { 85 | v := atomic.LoadUint32(&g.userCount) 86 | if v&gateClosed != 0 { 87 | return false 88 | } 89 | 90 | if atomic.CompareAndSwapUint32(&g.userCount, v, v+1) { 91 | return true 92 | } 93 | } 94 | } 95 | 96 | // Leave leaves the gate. This must only be called after a successful call to 97 | // Enter(). If the gate has been closed and this is the last one inside the 98 | // gate, it will notify the closer that the gate is done. 99 | // 100 | // This function is thread-safe. 101 | func (g *Gate) Leave() { 102 | for { 103 | v := atomic.LoadUint32(&g.userCount) 104 | if v&^gateClosed == 0 { 105 | panic("leaving a gate with zero usage count") 106 | } 107 | 108 | if atomic.CompareAndSwapUint32(&g.userCount, v, v-1) { 109 | if v == gateClosed+1 { 110 | close(g.done) 111 | } 112 | return 113 | } 114 | } 115 | } 116 | 117 | // Close closes the gate for entering, and waits until all goroutines [that are 118 | // currently inside the gate] leave before returning. 119 | // 120 | // Only one goroutine can call this function. 121 | func (g *Gate) Close() { 122 | for { 123 | v := atomic.LoadUint32(&g.userCount) 124 | if v&^gateClosed != 0 && g.done == nil { 125 | g.done = make(chan struct{}) 126 | } 127 | if atomic.CompareAndSwapUint32(&g.userCount, v, v|gateClosed) { 128 | if v&^gateClosed != 0 { 129 | <-g.done 130 | } 131 | return 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /gate/gate_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gate_test 16 | 17 | import ( 18 | "sync" 19 | "testing" 20 | "time" 21 | 22 | "github.com/google/netstack/gate" 23 | ) 24 | 25 | func TestBasicEnter(t *testing.T) { 26 | var g gate.Gate 27 | 28 | if !g.Enter() { 29 | t.Fatalf("Failed to enter when it should be allowed") 30 | } 31 | 32 | g.Leave() 33 | 34 | g.Close() 35 | 36 | if g.Enter() { 37 | t.Fatalf("Allowed to enter when it should fail") 38 | } 39 | } 40 | 41 | func enterFunc(t *testing.T, g *gate.Gate, enter, leave, reenter chan struct{}, done1, done2, done3 *sync.WaitGroup) { 42 | // Wait until instructed to enter. 43 | <-enter 44 | if !g.Enter() { 45 | t.Errorf("Failed to enter when it should be allowed") 46 | } 47 | 48 | done1.Done() 49 | 50 | // Wait until instructed to leave. 51 | <-leave 52 | g.Leave() 53 | 54 | done2.Done() 55 | 56 | // Wait until instructed to reenter. 57 | <-reenter 58 | if g.Enter() { 59 | t.Errorf("Allowed to enter when it should fail") 60 | } 61 | done3.Done() 62 | } 63 | 64 | func TestConcurrentEnter(t *testing.T) { 65 | var g gate.Gate 66 | var done1, done2, done3 sync.WaitGroup 67 | 68 | // Create 1000 worker goroutines. 69 | enter := make(chan struct{}) 70 | leave := make(chan struct{}) 71 | reenter := make(chan struct{}) 72 | done1.Add(1000) 73 | done2.Add(1000) 74 | done3.Add(1000) 75 | for i := 0; i < 1000; i++ { 76 | go enterFunc(t, &g, enter, leave, reenter, &done1, &done2, &done3) 77 | } 78 | 79 | // Tell them all to enter, then leave. 80 | close(enter) 81 | done1.Wait() 82 | 83 | close(leave) 84 | done2.Wait() 85 | 86 | // Close the gate, then have the workers try to enter again. 87 | g.Close() 88 | close(reenter) 89 | done3.Wait() 90 | } 91 | 92 | func closeFunc(g *gate.Gate, done chan struct{}) { 93 | g.Close() 94 | close(done) 95 | } 96 | 97 | func TestCloseWaits(t *testing.T) { 98 | var g gate.Gate 99 | 100 | // Enter 10 times. 101 | for i := 0; i < 10; i++ { 102 | if !g.Enter() { 103 | t.Fatalf("Failed to enter when it should be allowed") 104 | } 105 | } 106 | 107 | // Launch closer. Check that it doesn't complete. 108 | done := make(chan struct{}) 109 | go closeFunc(&g, done) 110 | 111 | for i := 0; i < 10; i++ { 112 | select { 113 | case <-done: 114 | t.Fatalf("Close function completed too soon") 115 | case <-time.After(100 * time.Millisecond): 116 | } 117 | 118 | g.Leave() 119 | } 120 | 121 | // Now the closer must complete. 122 | <-done 123 | } 124 | 125 | func TestMultipleSerialCloses(t *testing.T) { 126 | var g gate.Gate 127 | 128 | // Enter 10 times. 129 | for i := 0; i < 10; i++ { 130 | if !g.Enter() { 131 | t.Fatalf("Failed to enter when it should be allowed") 132 | } 133 | } 134 | 135 | // Launch closer. Check that it doesn't complete. 136 | done := make(chan struct{}) 137 | go closeFunc(&g, done) 138 | 139 | for i := 0; i < 10; i++ { 140 | select { 141 | case <-done: 142 | t.Fatalf("Close function completed too soon") 143 | case <-time.After(100 * time.Millisecond): 144 | } 145 | 146 | g.Leave() 147 | } 148 | 149 | // Now the closer must complete. 150 | <-done 151 | 152 | // Close again should not block. 153 | done = make(chan struct{}) 154 | go closeFunc(&g, done) 155 | 156 | select { 157 | case <-done: 158 | case <-time.After(2 * time.Second): 159 | t.Fatalf("Second Close is blocking") 160 | } 161 | } 162 | 163 | func worker(g *gate.Gate, done *sync.WaitGroup) { 164 | for { 165 | if !g.Enter() { 166 | break 167 | } 168 | g.Leave() 169 | } 170 | done.Done() 171 | } 172 | 173 | func TestConcurrentAll(t *testing.T) { 174 | var g gate.Gate 175 | var done sync.WaitGroup 176 | 177 | // Launch 1000 goroutines to concurrently enter/leave. 178 | done.Add(1000) 179 | for i := 0; i < 1000; i++ { 180 | go worker(&g, &done) 181 | } 182 | 183 | // Wait for the goroutines to do some work, then close the gate. 184 | time.Sleep(2 * time.Second) 185 | g.Close() 186 | 187 | // Wait for all of them to complete. 188 | done.Wait() 189 | } 190 | -------------------------------------------------------------------------------- /rand/rand.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build !linux 16 | 17 | // Package rand implements a cryptographically secure pseudorandom number 18 | // generator. 19 | package rand 20 | 21 | import "crypto/rand" 22 | 23 | // Reader is the default reader. 24 | var Reader = rand.Reader 25 | 26 | // Read implements io.Reader.Read. 27 | func Read(b []byte) (int, error) { 28 | return rand.Read(b) 29 | } 30 | -------------------------------------------------------------------------------- /rand/rand_linux.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package rand implements a cryptographically secure pseudorandom number 16 | // generator. 17 | package rand 18 | 19 | import ( 20 | "crypto/rand" 21 | "io" 22 | "sync" 23 | 24 | "golang.org/x/sys/unix" 25 | ) 26 | 27 | // reader implements an io.Reader that returns pseudorandom bytes. 28 | type reader struct { 29 | once sync.Once 30 | useGetrandom bool 31 | } 32 | 33 | // Read implements io.Reader.Read. 34 | func (r *reader) Read(p []byte) (int, error) { 35 | r.once.Do(func() { 36 | _, err := unix.Getrandom(p, 0) 37 | if err != unix.ENOSYS { 38 | r.useGetrandom = true 39 | } 40 | }) 41 | 42 | if r.useGetrandom { 43 | return unix.Getrandom(p, 0) 44 | } 45 | return rand.Read(p) 46 | } 47 | 48 | // Reader is the default reader. 49 | var Reader io.Reader = &reader{} 50 | 51 | // Read reads from the default reader. 52 | func Read(b []byte) (int, error) { 53 | return io.ReadFull(Reader, b) 54 | } 55 | 56 | // Init can be called to make sure /dev/urandom is pre-opened on kernels that 57 | // do not support getrandom(2). 58 | func Init() error { 59 | p := make([]byte, 1) 60 | _, err := Read(p) 61 | return err 62 | } 63 | -------------------------------------------------------------------------------- /sleep/commit_amd64.s: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "textflag.h" 16 | 17 | #define preparingG 1 18 | 19 | // See commit_noasm.go for a description of commitSleep. 20 | // 21 | // func commitSleep(g uintptr, waitingG *uintptr) bool 22 | TEXT ·commitSleep(SB),NOSPLIT,$0-24 23 | MOVQ waitingG+8(FP), CX 24 | MOVQ g+0(FP), DX 25 | 26 | // Store the G in waitingG if it's still preparingG. If it's anything 27 | // else it means a waker has aborted the sleep. 28 | MOVQ $preparingG, AX 29 | LOCK 30 | CMPXCHGQ DX, 0(CX) 31 | 32 | SETEQ AX 33 | MOVB AX, ret+16(FP) 34 | 35 | RET 36 | -------------------------------------------------------------------------------- /sleep/commit_arm64.s: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "textflag.h" 16 | 17 | #define preparingG 1 18 | 19 | // See commit_noasm.go for a description of commitSleep. 20 | // 21 | // func commitSleep(g uintptr, waitingG *uintptr) bool 22 | TEXT ·commitSleep(SB),NOSPLIT,$0-24 23 | MOVD waitingG+8(FP), R0 24 | MOVD $preparingG, R1 25 | MOVD G+0(FP), R2 26 | 27 | // Store the G in waitingG if it's still preparingG. If it's anything 28 | // else it means a waker has aborted the sleep. 29 | again: 30 | LDAXR (R0), R3 31 | CMP R1, R3 32 | BNE ok 33 | STLXR R2, (R0), R3 34 | CBNZ R3, again 35 | ok: 36 | CSET EQ, R0 37 | MOVB R0, ret+16(FP) 38 | RET 39 | -------------------------------------------------------------------------------- /sleep/commit_asm.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build amd64 arm64 16 | 17 | package sleep 18 | 19 | // See commit_noasm.go for a description of commitSleep. 20 | func commitSleep(g uintptr, waitingG *uintptr) bool 21 | -------------------------------------------------------------------------------- /sleep/commit_noasm.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build !race 16 | // +build !amd64,!arm64 17 | 18 | package sleep 19 | 20 | import "sync/atomic" 21 | 22 | // commitSleep signals to wakers that the given g is now sleeping. Wakers can 23 | // then fetch it and wake it. 24 | // 25 | // The commit may fail if wakers have been asserted after our last check, in 26 | // which case they will have set s.waitingG to zero. 27 | // 28 | // It is written in assembly because it is called from g0, so it doesn't have 29 | // a race context. 30 | func commitSleep(g uintptr, waitingG *uintptr) bool { 31 | for { 32 | // Check if the wait was aborted. 33 | if atomic.LoadUintptr(waitingG) == 0 { 34 | return false 35 | } 36 | 37 | // Try to store the G so that wakers know who to wake. 38 | if atomic.CompareAndSwapUintptr(waitingG, preparingG, g) { 39 | return true 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /sleep/empty.s: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Empty assembly file so empty func definitions work. 16 | -------------------------------------------------------------------------------- /tcpip/buffer/prependable.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package buffer 16 | 17 | // Prependable is a buffer that grows backwards, that is, more data can be 18 | // prepended to it. It is useful when building networking packets, where each 19 | // protocol adds its own headers to the front of the higher-level protocol 20 | // header and payload; for example, TCP would prepend its header to the payload, 21 | // then IP would prepend its own, then ethernet. 22 | type Prependable struct { 23 | // Buf is the buffer backing the prependable buffer. 24 | buf View 25 | 26 | // usedIdx is the index where the used part of the buffer begins. 27 | usedIdx int 28 | } 29 | 30 | // NewPrependable allocates a new prependable buffer with the given size. 31 | func NewPrependable(size int) Prependable { 32 | return Prependable{buf: NewView(size), usedIdx: size} 33 | } 34 | 35 | // NewPrependableFromView creates an entirely-used Prependable from a View. 36 | // 37 | // NewPrependableFromView takes ownership of v. Note that since the entire 38 | // prependable is used, further attempts to call Prepend will note that size > 39 | // p.usedIdx and return nil. 40 | func NewPrependableFromView(v View) Prependable { 41 | return Prependable{buf: v, usedIdx: 0} 42 | } 43 | 44 | // View returns a View of the backing buffer that contains all prepended 45 | // data so far. 46 | func (p Prependable) View() View { 47 | return p.buf[p.usedIdx:] 48 | } 49 | 50 | // UsedLength returns the number of bytes used so far. 51 | func (p Prependable) UsedLength() int { 52 | return len(p.buf) - p.usedIdx 53 | } 54 | 55 | // AvailableLength returns the number of bytes used so far. 56 | func (p Prependable) AvailableLength() int { 57 | return p.usedIdx 58 | } 59 | 60 | // TrimBack removes size bytes from the end. 61 | func (p *Prependable) TrimBack(size int) { 62 | p.buf = p.buf[:len(p.buf)-size] 63 | } 64 | 65 | // Prepend reserves the requested space in front of the buffer, returning a 66 | // slice that represents the reserved space. 67 | func (p *Prependable) Prepend(size int) []byte { 68 | if size > p.usedIdx { 69 | return nil 70 | } 71 | 72 | p.usedIdx -= size 73 | return p.View()[:size:size] 74 | } 75 | -------------------------------------------------------------------------------- /tcpip/buffer/view.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package buffer provides the implementation of a buffer view. 16 | package buffer 17 | 18 | // View is a slice of a buffer, with convenience methods. 19 | type View []byte 20 | 21 | // NewView allocates a new buffer and returns an initialized view that covers 22 | // the whole buffer. 23 | func NewView(size int) View { 24 | return make(View, size) 25 | } 26 | 27 | // NewViewFromBytes allocates a new buffer and copies in the given bytes. 28 | func NewViewFromBytes(b []byte) View { 29 | return append(View(nil), b...) 30 | } 31 | 32 | // TrimFront removes the first "count" bytes from the visible section of the 33 | // buffer. 34 | func (v *View) TrimFront(count int) { 35 | *v = (*v)[count:] 36 | } 37 | 38 | // CapLength irreversibly reduces the length of the visible section of the 39 | // buffer to the value specified. 40 | func (v *View) CapLength(length int) { 41 | // We also set the slice cap because if we don't, one would be able to 42 | // expand the view back to include the region just excluded. We want to 43 | // prevent that to avoid potential data leak if we have uninitialized 44 | // data in excluded region. 45 | *v = (*v)[:length:length] 46 | } 47 | 48 | // ToVectorisedView returns a VectorisedView containing the receiver. 49 | func (v View) ToVectorisedView() VectorisedView { 50 | return NewVectorisedView(len(v), []View{v}) 51 | } 52 | 53 | // VectorisedView is a vectorised version of View using non contiguous memory. 54 | // It supports all the convenience methods supported by View. 55 | // 56 | // +stateify savable 57 | type VectorisedView struct { 58 | views []View 59 | size int 60 | } 61 | 62 | // NewVectorisedView creates a new vectorised view from an already-allocated slice 63 | // of View and sets its size. 64 | func NewVectorisedView(size int, views []View) VectorisedView { 65 | return VectorisedView{views: views, size: size} 66 | } 67 | 68 | // TrimFront removes the first "count" bytes of the vectorised view. 69 | func (vv *VectorisedView) TrimFront(count int) { 70 | for count > 0 && len(vv.views) > 0 { 71 | if count < len(vv.views[0]) { 72 | vv.size -= count 73 | vv.views[0].TrimFront(count) 74 | return 75 | } 76 | count -= len(vv.views[0]) 77 | vv.RemoveFirst() 78 | } 79 | } 80 | 81 | // CapLength irreversibly reduces the length of the vectorised view. 82 | func (vv *VectorisedView) CapLength(length int) { 83 | if length < 0 { 84 | length = 0 85 | } 86 | if vv.size < length { 87 | return 88 | } 89 | vv.size = length 90 | for i := range vv.views { 91 | v := &vv.views[i] 92 | if len(*v) >= length { 93 | if length == 0 { 94 | vv.views = vv.views[:i] 95 | } else { 96 | v.CapLength(length) 97 | vv.views = vv.views[:i+1] 98 | } 99 | return 100 | } 101 | length -= len(*v) 102 | } 103 | } 104 | 105 | // Clone returns a clone of this VectorisedView. 106 | // If the buffer argument is large enough to contain all the Views of this VectorisedView, 107 | // the method will avoid allocations and use the buffer to store the Views of the clone. 108 | func (vv VectorisedView) Clone(buffer []View) VectorisedView { 109 | return VectorisedView{views: append(buffer[:0], vv.views...), size: vv.size} 110 | } 111 | 112 | // First returns the first view of the vectorised view. 113 | func (vv VectorisedView) First() View { 114 | if len(vv.views) == 0 { 115 | return nil 116 | } 117 | return vv.views[0] 118 | } 119 | 120 | // RemoveFirst removes the first view of the vectorised view. 121 | func (vv *VectorisedView) RemoveFirst() { 122 | if len(vv.views) == 0 { 123 | return 124 | } 125 | vv.size -= len(vv.views[0]) 126 | vv.views = vv.views[1:] 127 | } 128 | 129 | // Size returns the size in bytes of the entire content stored in the vectorised view. 130 | func (vv VectorisedView) Size() int { 131 | return vv.size 132 | } 133 | 134 | // ToView returns a single view containing the content of the vectorised view. 135 | // 136 | // If the vectorised view contains a single view, that view will be returned 137 | // directly. 138 | func (vv VectorisedView) ToView() View { 139 | if len(vv.views) == 1 { 140 | return vv.views[0] 141 | } 142 | u := make([]byte, 0, vv.size) 143 | for _, v := range vv.views { 144 | u = append(u, v...) 145 | } 146 | return u 147 | } 148 | 149 | // Views returns the slice containing the all views. 150 | func (vv VectorisedView) Views() []View { 151 | return vv.views 152 | } 153 | 154 | // Append appends the views in a vectorised view to this vectorised view. 155 | func (vv *VectorisedView) Append(vv2 VectorisedView) { 156 | vv.views = append(vv.views, vv2.views...) 157 | vv.size += vv2.size 158 | } 159 | -------------------------------------------------------------------------------- /tcpip/hash/jenkins/jenkins.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package jenkins implements Jenkins's one_at_a_time, non-cryptographic hash 16 | // functions created by by Bob Jenkins. 17 | // 18 | // See https://en.wikipedia.org/wiki/Jenkins_hash_function#cite_note-dobbsx-1 19 | // 20 | package jenkins 21 | 22 | import ( 23 | "hash" 24 | ) 25 | 26 | // Sum32 represents Jenkins's one_at_a_time hash. 27 | // 28 | // Use the Sum32 type directly (as opposed to New32 below) 29 | // to avoid allocations. 30 | type Sum32 uint32 31 | 32 | // New32 returns a new 32-bit Jenkins's one_at_a_time hash.Hash. 33 | // 34 | // Its Sum method will lay the value out in big-endian byte order. 35 | func New32() hash.Hash32 { 36 | var s Sum32 37 | return &s 38 | } 39 | 40 | // Reset resets the hash to its initial state. 41 | func (s *Sum32) Reset() { *s = 0 } 42 | 43 | // Sum32 returns the hash value 44 | func (s *Sum32) Sum32() uint32 { 45 | hash := *s 46 | 47 | hash += (hash << 3) 48 | hash ^= hash >> 11 49 | hash += hash << 15 50 | 51 | return uint32(hash) 52 | } 53 | 54 | // Write adds more data to the running hash. 55 | // 56 | // It never returns an error. 57 | func (s *Sum32) Write(data []byte) (int, error) { 58 | hash := *s 59 | for _, b := range data { 60 | hash += Sum32(b) 61 | hash += hash << 10 62 | hash ^= hash >> 6 63 | } 64 | *s = hash 65 | return len(data), nil 66 | } 67 | 68 | // Size returns the number of bytes Sum will return. 69 | func (s *Sum32) Size() int { return 4 } 70 | 71 | // BlockSize returns the hash's underlying block size. 72 | func (s *Sum32) BlockSize() int { return 1 } 73 | 74 | // Sum appends the current hash to in and returns the resulting slice. 75 | // 76 | // It does not change the underlying hash state. 77 | func (s *Sum32) Sum(in []byte) []byte { 78 | v := s.Sum32() 79 | return append(in, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 80 | } 81 | -------------------------------------------------------------------------------- /tcpip/hash/jenkins/jenkins_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package jenkins 15 | 16 | import ( 17 | "bytes" 18 | "encoding/binary" 19 | "hash" 20 | "hash/fnv" 21 | "math" 22 | "testing" 23 | ) 24 | 25 | func TestGolden32(t *testing.T) { 26 | var golden32 = []struct { 27 | out []byte 28 | in string 29 | }{ 30 | {[]byte{0x00, 0x00, 0x00, 0x00}, ""}, 31 | {[]byte{0xca, 0x2e, 0x94, 0x42}, "a"}, 32 | {[]byte{0x45, 0xe6, 0x1e, 0x58}, "ab"}, 33 | {[]byte{0xed, 0x13, 0x1f, 0x5b}, "abc"}, 34 | } 35 | 36 | hash := New32() 37 | 38 | for _, g := range golden32 { 39 | hash.Reset() 40 | done, error := hash.Write([]byte(g.in)) 41 | if error != nil { 42 | t.Fatalf("write error: %s", error) 43 | } 44 | if done != len(g.in) { 45 | t.Fatalf("wrote only %d out of %d bytes", done, len(g.in)) 46 | } 47 | if actual := hash.Sum(nil); !bytes.Equal(g.out, actual) { 48 | t.Errorf("hash(%q) = 0x%x want 0x%x", g.in, actual, g.out) 49 | } 50 | } 51 | } 52 | 53 | func TestIntegrity32(t *testing.T) { 54 | data := []byte{'1', '2', 3, 4, 5} 55 | 56 | h := New32() 57 | h.Write(data) 58 | sum := h.Sum(nil) 59 | 60 | if size := h.Size(); size != len(sum) { 61 | t.Fatalf("Size()=%d but len(Sum())=%d", size, len(sum)) 62 | } 63 | 64 | if a := h.Sum(nil); !bytes.Equal(sum, a) { 65 | t.Fatalf("first Sum()=0x%x, second Sum()=0x%x", sum, a) 66 | } 67 | 68 | h.Reset() 69 | h.Write(data) 70 | if a := h.Sum(nil); !bytes.Equal(sum, a) { 71 | t.Fatalf("Sum()=0x%x, but after Reset() Sum()=0x%x", sum, a) 72 | } 73 | 74 | h.Reset() 75 | h.Write(data[:2]) 76 | h.Write(data[2:]) 77 | if a := h.Sum(nil); !bytes.Equal(sum, a) { 78 | t.Fatalf("Sum()=0x%x, but with partial writes, Sum()=0x%x", sum, a) 79 | } 80 | 81 | sum32 := h.(hash.Hash32).Sum32() 82 | if sum32 != binary.BigEndian.Uint32(sum) { 83 | t.Fatalf("Sum()=0x%x, but Sum32()=0x%x", sum, sum32) 84 | } 85 | } 86 | 87 | func BenchmarkJenkins32KB(b *testing.B) { 88 | h := New32() 89 | 90 | b.SetBytes(1024) 91 | data := make([]byte, 1024) 92 | for i := range data { 93 | data[i] = byte(i) 94 | } 95 | in := make([]byte, 0, h.Size()) 96 | 97 | b.ResetTimer() 98 | for i := 0; i < b.N; i++ { 99 | h.Reset() 100 | h.Write(data) 101 | h.Sum(in) 102 | } 103 | } 104 | 105 | func BenchmarkFnv32(b *testing.B) { 106 | arr := make([]int64, 1000) 107 | for i := 0; i < b.N; i++ { 108 | var payload [8]byte 109 | binary.BigEndian.PutUint32(payload[:4], uint32(i)) 110 | binary.BigEndian.PutUint32(payload[4:], uint32(i)) 111 | 112 | h := fnv.New32() 113 | h.Write(payload[:]) 114 | idx := int(h.Sum32()) % len(arr) 115 | arr[idx]++ 116 | } 117 | b.StopTimer() 118 | c := 0 119 | if b.N > 1000000 { 120 | for i := 0; i < len(arr)-1; i++ { 121 | if math.Abs(float64(arr[i]-arr[i+1]))/float64(arr[i]) > float64(0.1) { 122 | if c == 0 { 123 | b.Logf("i %d val[i] %d val[i+1] %d b.N %b\n", i, arr[i], arr[i+1], b.N) 124 | } 125 | c++ 126 | } 127 | } 128 | if c > 0 { 129 | b.Logf("Unbalanced buckets: %d", c) 130 | } 131 | } 132 | } 133 | 134 | func BenchmarkSum32(b *testing.B) { 135 | arr := make([]int64, 1000) 136 | for i := 0; i < b.N; i++ { 137 | var payload [8]byte 138 | binary.BigEndian.PutUint32(payload[:4], uint32(i)) 139 | binary.BigEndian.PutUint32(payload[4:], uint32(i)) 140 | h := Sum32(0) 141 | h.Write(payload[:]) 142 | idx := int(h.Sum32()) % len(arr) 143 | arr[idx]++ 144 | } 145 | b.StopTimer() 146 | if b.N > 1000000 { 147 | for i := 0; i < len(arr)-1; i++ { 148 | if math.Abs(float64(arr[i]-arr[i+1]))/float64(arr[i]) > float64(0.1) { 149 | b.Logf("val[%3d]=%8d\tval[%3d]=%8d\tb.N=%b\n", i, arr[i], i+1, arr[i+1], b.N) 150 | break 151 | } 152 | } 153 | } 154 | } 155 | 156 | func BenchmarkNew32(b *testing.B) { 157 | arr := make([]int64, 1000) 158 | for i := 0; i < b.N; i++ { 159 | var payload [8]byte 160 | binary.BigEndian.PutUint32(payload[:4], uint32(i)) 161 | binary.BigEndian.PutUint32(payload[4:], uint32(i)) 162 | h := New32() 163 | h.Write(payload[:]) 164 | idx := int(h.Sum32()) % len(arr) 165 | arr[idx]++ 166 | } 167 | b.StopTimer() 168 | if b.N > 1000000 { 169 | for i := 0; i < len(arr)-1; i++ { 170 | if math.Abs(float64(arr[i]-arr[i+1]))/float64(arr[i]) > float64(0.1) { 171 | b.Logf("val[%3d]=%8d\tval[%3d]=%8d\tb.N=%b\n", i, arr[i], i+1, arr[i+1], b.N) 172 | break 173 | } 174 | } 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /tcpip/header/arp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header 16 | 17 | import "github.com/google/netstack/tcpip" 18 | 19 | const ( 20 | // ARPProtocolNumber is the ARP network protocol number. 21 | ARPProtocolNumber tcpip.NetworkProtocolNumber = 0x0806 22 | 23 | // ARPSize is the size of an IPv4-over-Ethernet ARP packet. 24 | ARPSize = 2 + 2 + 1 + 1 + 2 + 2*6 + 2*4 25 | ) 26 | 27 | // ARPOp is an ARP opcode. 28 | type ARPOp uint16 29 | 30 | // Typical ARP opcodes defined in RFC 826. 31 | const ( 32 | ARPRequest ARPOp = 1 33 | ARPReply ARPOp = 2 34 | ) 35 | 36 | // ARP is an ARP packet stored in a byte array as described in RFC 826. 37 | type ARP []byte 38 | 39 | func (a ARP) hardwareAddressSpace() uint16 { return uint16(a[0])<<8 | uint16(a[1]) } 40 | func (a ARP) protocolAddressSpace() uint16 { return uint16(a[2])<<8 | uint16(a[3]) } 41 | func (a ARP) hardwareAddressSize() int { return int(a[4]) } 42 | func (a ARP) protocolAddressSize() int { return int(a[5]) } 43 | 44 | // Op is the ARP opcode. 45 | func (a ARP) Op() ARPOp { return ARPOp(a[6])<<8 | ARPOp(a[7]) } 46 | 47 | // SetOp sets the ARP opcode. 48 | func (a ARP) SetOp(op ARPOp) { 49 | a[6] = uint8(op >> 8) 50 | a[7] = uint8(op) 51 | } 52 | 53 | // SetIPv4OverEthernet configures the ARP packet for IPv4-over-Ethernet. 54 | func (a ARP) SetIPv4OverEthernet() { 55 | a[0], a[1] = 0, 1 // htypeEthernet 56 | a[2], a[3] = 0x08, 0x00 // IPv4ProtocolNumber 57 | a[4] = 6 // macSize 58 | a[5] = uint8(IPv4AddressSize) 59 | } 60 | 61 | // HardwareAddressSender is the link address of the sender. 62 | // It is a view on to the ARP packet so it can be used to set the value. 63 | func (a ARP) HardwareAddressSender() []byte { 64 | const s = 8 65 | return a[s : s+6] 66 | } 67 | 68 | // ProtocolAddressSender is the protocol address of the sender. 69 | // It is a view on to the ARP packet so it can be used to set the value. 70 | func (a ARP) ProtocolAddressSender() []byte { 71 | const s = 8 + 6 72 | return a[s : s+4] 73 | } 74 | 75 | // HardwareAddressTarget is the link address of the target. 76 | // It is a view on to the ARP packet so it can be used to set the value. 77 | func (a ARP) HardwareAddressTarget() []byte { 78 | const s = 8 + 6 + 4 79 | return a[s : s+6] 80 | } 81 | 82 | // ProtocolAddressTarget is the protocol address of the target. 83 | // It is a view on to the ARP packet so it can be used to set the value. 84 | func (a ARP) ProtocolAddressTarget() []byte { 85 | const s = 8 + 6 + 4 + 6 86 | return a[s : s+4] 87 | } 88 | 89 | // IsValid reports whether this is an ARP packet for IPv4 over Ethernet. 90 | func (a ARP) IsValid() bool { 91 | if len(a) < ARPSize { 92 | return false 93 | } 94 | const htypeEthernet = 1 95 | const macSize = 6 96 | return a.hardwareAddressSpace() == htypeEthernet && 97 | a.protocolAddressSpace() == uint16(IPv4ProtocolNumber) && 98 | a.hardwareAddressSize() == macSize && 99 | a.protocolAddressSize() == IPv4AddressSize 100 | } 101 | -------------------------------------------------------------------------------- /tcpip/header/checksum.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package header provides the implementation of the encoding and decoding of 16 | // network protocol headers. 17 | package header 18 | 19 | import ( 20 | "encoding/binary" 21 | 22 | "github.com/google/netstack/tcpip" 23 | "github.com/google/netstack/tcpip/buffer" 24 | ) 25 | 26 | func calculateChecksum(buf []byte, initial uint32) uint16 { 27 | v := initial 28 | 29 | l := len(buf) 30 | if l&1 != 0 { 31 | l-- 32 | v += uint32(buf[l]) << 8 33 | } 34 | 35 | for i := 0; i < l; i += 2 { 36 | v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) 37 | } 38 | 39 | return ChecksumCombine(uint16(v), uint16(v>>16)) 40 | } 41 | 42 | // Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the 43 | // given byte array. 44 | // 45 | // The initial checksum must have been computed on an even number of bytes. 46 | func Checksum(buf []byte, initial uint16) uint16 { 47 | return calculateChecksum(buf, uint32(initial)) 48 | } 49 | 50 | // ChecksumVV calculates the checksum (as defined in RFC 1071) of the bytes in 51 | // the given VectorizedView. 52 | // 53 | // The initial checksum must have been computed on an even number of bytes. 54 | func ChecksumVV(vv buffer.VectorisedView, initial uint16) uint16 { 55 | var odd bool 56 | sum := initial 57 | for _, v := range vv.Views() { 58 | if len(v) == 0 { 59 | continue 60 | } 61 | s := uint32(sum) 62 | if odd { 63 | s += uint32(v[0]) 64 | v = v[1:] 65 | } 66 | odd = len(v)&1 != 0 67 | sum = calculateChecksum(v, s) 68 | } 69 | return sum 70 | } 71 | 72 | // ChecksumCombine combines the two uint16 to form their checksum. This is done 73 | // by adding them and the carry. 74 | // 75 | // Note that checksum a must have been computed on an even number of bytes. 76 | func ChecksumCombine(a, b uint16) uint16 { 77 | v := uint32(a) + uint32(b) 78 | return uint16(v + v>>16) 79 | } 80 | 81 | // PseudoHeaderChecksum calculates the pseudo-header checksum for the given 82 | // destination protocol and network address. Pseudo-headers are needed by 83 | // transport layers when calculating their own checksum. 84 | func PseudoHeaderChecksum(protocol tcpip.TransportProtocolNumber, srcAddr tcpip.Address, dstAddr tcpip.Address, totalLen uint16) uint16 { 85 | xsum := Checksum([]byte(srcAddr), 0) 86 | xsum = Checksum([]byte(dstAddr), xsum) 87 | 88 | // Add the length portion of the checksum to the pseudo-checksum. 89 | tmp := make([]byte, 2) 90 | binary.BigEndian.PutUint16(tmp, totalLen) 91 | xsum = Checksum(tmp, xsum) 92 | 93 | return Checksum([]byte{0, uint8(protocol)}, xsum) 94 | } 95 | -------------------------------------------------------------------------------- /tcpip/header/eth.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header 16 | 17 | import ( 18 | "encoding/binary" 19 | 20 | "github.com/google/netstack/tcpip" 21 | ) 22 | 23 | const ( 24 | dstMAC = 0 25 | srcMAC = 6 26 | ethType = 12 27 | ) 28 | 29 | // EthernetFields contains the fields of an ethernet frame header. It is used to 30 | // describe the fields of a frame that needs to be encoded. 31 | type EthernetFields struct { 32 | // SrcAddr is the "MAC source" field of an ethernet frame header. 33 | SrcAddr tcpip.LinkAddress 34 | 35 | // DstAddr is the "MAC destination" field of an ethernet frame header. 36 | DstAddr tcpip.LinkAddress 37 | 38 | // Type is the "ethertype" field of an ethernet frame header. 39 | Type tcpip.NetworkProtocolNumber 40 | } 41 | 42 | // Ethernet represents an ethernet frame header stored in a byte array. 43 | type Ethernet []byte 44 | 45 | const ( 46 | // EthernetMinimumSize is the minimum size of a valid ethernet frame. 47 | EthernetMinimumSize = 14 48 | 49 | // EthernetAddressSize is the size, in bytes, of an ethernet address. 50 | EthernetAddressSize = 6 51 | ) 52 | 53 | // SourceAddress returns the "MAC source" field of the ethernet frame header. 54 | func (b Ethernet) SourceAddress() tcpip.LinkAddress { 55 | return tcpip.LinkAddress(b[srcMAC:][:EthernetAddressSize]) 56 | } 57 | 58 | // DestinationAddress returns the "MAC destination" field of the ethernet frame 59 | // header. 60 | func (b Ethernet) DestinationAddress() tcpip.LinkAddress { 61 | return tcpip.LinkAddress(b[dstMAC:][:EthernetAddressSize]) 62 | } 63 | 64 | // Type returns the "ethertype" field of the ethernet frame header. 65 | func (b Ethernet) Type() tcpip.NetworkProtocolNumber { 66 | return tcpip.NetworkProtocolNumber(binary.BigEndian.Uint16(b[ethType:])) 67 | } 68 | 69 | // Encode encodes all the fields of the ethernet frame header. 70 | func (b Ethernet) Encode(e *EthernetFields) { 71 | binary.BigEndian.PutUint16(b[ethType:], uint16(e.Type)) 72 | copy(b[srcMAC:][:EthernetAddressSize], e.SrcAddr) 73 | copy(b[dstMAC:][:EthernetAddressSize], e.DstAddr) 74 | } 75 | -------------------------------------------------------------------------------- /tcpip/header/gue.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header 16 | 17 | const ( 18 | typeHLen = 0 19 | encapProto = 1 20 | ) 21 | 22 | // GUEFields contains the fields of a GUE packet. It is used to describe the 23 | // fields of a packet that needs to be encoded. 24 | type GUEFields struct { 25 | // Type is the "type" field of the GUE header. 26 | Type uint8 27 | 28 | // Control is the "control" field of the GUE header. 29 | Control bool 30 | 31 | // HeaderLength is the "header length" field of the GUE header. It must 32 | // be at least 4 octets, and a multiple of 4 as well. 33 | HeaderLength uint8 34 | 35 | // Protocol is the "protocol" field of the GUE header. This is one of 36 | // the IPPROTO_* values. 37 | Protocol uint8 38 | } 39 | 40 | // GUE represents a Generic UDP Encapsulation header stored in a byte array, the 41 | // fields are described in https://tools.ietf.org/html/draft-ietf-nvo3-gue-01. 42 | type GUE []byte 43 | 44 | const ( 45 | // GUEMinimumSize is the minimum size of a valid GUE packet. 46 | GUEMinimumSize = 4 47 | ) 48 | 49 | // TypeAndControl returns the GUE packet type (top 3 bits of the first byte, 50 | // which includes the control bit). 51 | func (b GUE) TypeAndControl() uint8 { 52 | return b[typeHLen] >> 5 53 | } 54 | 55 | // HeaderLength returns the total length of the GUE header. 56 | func (b GUE) HeaderLength() uint8 { 57 | return 4 + 4*(b[typeHLen]&0x1f) 58 | } 59 | 60 | // Protocol returns the protocol field of the GUE header. 61 | func (b GUE) Protocol() uint8 { 62 | return b[encapProto] 63 | } 64 | 65 | // Encode encodes all the fields of the GUE header. 66 | func (b GUE) Encode(i *GUEFields) { 67 | ctl := uint8(0) 68 | if i.Control { 69 | ctl = 1 << 5 70 | } 71 | b[typeHLen] = ctl | i.Type<<6 | (i.HeaderLength-4)/4 72 | b[encapProto] = i.Protocol 73 | } 74 | -------------------------------------------------------------------------------- /tcpip/header/interfaces.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header 16 | 17 | import ( 18 | "github.com/google/netstack/tcpip" 19 | ) 20 | 21 | const ( 22 | // MaxIPPacketSize is the maximum supported IP packet size, excluding 23 | // jumbograms. The maximum IPv4 packet size is 64k-1 (total size must fit 24 | // in 16 bits). For IPv6, the payload max size (excluding jumbograms) is 25 | // 64k-1 (also needs to fit in 16 bits). So we use 64k - 1 + 2 * m, where 26 | // m is the minimum IPv6 header size; we leave room for some potential 27 | // IP options. 28 | MaxIPPacketSize = 0xffff + 2*IPv6MinimumSize 29 | ) 30 | 31 | // Transport offers generic methods to query and/or update the fields of the 32 | // header of a transport protocol buffer. 33 | type Transport interface { 34 | // SourcePort returns the value of the "source port" field. 35 | SourcePort() uint16 36 | 37 | // Destination returns the value of the "destination port" field. 38 | DestinationPort() uint16 39 | 40 | // Checksum returns the value of the "checksum" field. 41 | Checksum() uint16 42 | 43 | // SetSourcePort sets the value of the "source port" field. 44 | SetSourcePort(uint16) 45 | 46 | // SetDestinationPort sets the value of the "destination port" field. 47 | SetDestinationPort(uint16) 48 | 49 | // SetChecksum sets the value of the "checksum" field. 50 | SetChecksum(uint16) 51 | 52 | // Payload returns the data carried in the transport buffer. 53 | Payload() []byte 54 | } 55 | 56 | // Network offers generic methods to query and/or update the fields of the 57 | // header of a network protocol buffer. 58 | type Network interface { 59 | // SourceAddress returns the value of the "source address" field. 60 | SourceAddress() tcpip.Address 61 | 62 | // DestinationAddress returns the value of the "destination address" 63 | // field. 64 | DestinationAddress() tcpip.Address 65 | 66 | // Checksum returns the value of the "checksum" field. 67 | Checksum() uint16 68 | 69 | // SetSourceAddress sets the value of the "source address" field. 70 | SetSourceAddress(tcpip.Address) 71 | 72 | // SetDestinationAddress sets the value of the "destination address" 73 | // field. 74 | SetDestinationAddress(tcpip.Address) 75 | 76 | // SetChecksum sets the value of the "checksum" field. 77 | SetChecksum(uint16) 78 | 79 | // TransportProtocol returns the number of the transport protocol 80 | // stored in the payload. 81 | TransportProtocol() tcpip.TransportProtocolNumber 82 | 83 | // Payload returns a byte slice containing the payload of the network 84 | // packet. 85 | Payload() []byte 86 | 87 | // TOS returns the values of the "type of service" and "flow label" fields. 88 | TOS() (uint8, uint32) 89 | 90 | // SetTOS sets the values of the "type of service" and "flow label" fields. 91 | SetTOS(t uint8, l uint32) 92 | } 93 | -------------------------------------------------------------------------------- /tcpip/header/ipv6_fragment.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header 16 | 17 | import ( 18 | "encoding/binary" 19 | 20 | "github.com/google/netstack/tcpip" 21 | ) 22 | 23 | const ( 24 | nextHdrFrag = 0 25 | fragOff = 2 26 | more = 3 27 | idV6 = 4 28 | ) 29 | 30 | // IPv6FragmentFields contains the fields of an IPv6 fragment. It is used to describe the 31 | // fields of a packet that needs to be encoded. 32 | type IPv6FragmentFields struct { 33 | // NextHeader is the "next header" field of an IPv6 fragment. 34 | NextHeader uint8 35 | 36 | // FragmentOffset is the "fragment offset" field of an IPv6 fragment. 37 | FragmentOffset uint16 38 | 39 | // M is the "more" field of an IPv6 fragment. 40 | M bool 41 | 42 | // Identification is the "identification" field of an IPv6 fragment. 43 | Identification uint32 44 | } 45 | 46 | // IPv6Fragment represents an ipv6 fragment header stored in a byte array. 47 | // Most of the methods of IPv6Fragment access to the underlying slice without 48 | // checking the boundaries and could panic because of 'index out of range'. 49 | // Always call IsValid() to validate an instance of IPv6Fragment before using other methods. 50 | type IPv6Fragment []byte 51 | 52 | const ( 53 | // IPv6FragmentHeader header is the number used to specify that the next 54 | // header is a fragment header, per RFC 2460. 55 | IPv6FragmentHeader = 44 56 | 57 | // IPv6FragmentHeaderSize is the size of the fragment header. 58 | IPv6FragmentHeaderSize = 8 59 | ) 60 | 61 | // Encode encodes all the fields of the ipv6 fragment. 62 | func (b IPv6Fragment) Encode(i *IPv6FragmentFields) { 63 | b[nextHdrFrag] = i.NextHeader 64 | binary.BigEndian.PutUint16(b[fragOff:], i.FragmentOffset<<3) 65 | if i.M { 66 | b[more] |= 1 67 | } 68 | binary.BigEndian.PutUint32(b[idV6:], i.Identification) 69 | } 70 | 71 | // IsValid performs basic validation on the fragment header. 72 | func (b IPv6Fragment) IsValid() bool { 73 | return len(b) >= IPv6FragmentHeaderSize 74 | } 75 | 76 | // NextHeader returns the value of the "next header" field of the ipv6 fragment. 77 | func (b IPv6Fragment) NextHeader() uint8 { 78 | return b[nextHdrFrag] 79 | } 80 | 81 | // FragmentOffset returns the "fragment offset" field of the ipv6 fragment. 82 | func (b IPv6Fragment) FragmentOffset() uint16 { 83 | return binary.BigEndian.Uint16(b[fragOff:]) >> 3 84 | } 85 | 86 | // More returns the "more" field of the ipv6 fragment. 87 | func (b IPv6Fragment) More() bool { 88 | return b[more]&1 > 0 89 | } 90 | 91 | // Payload implements Network.Payload. 92 | func (b IPv6Fragment) Payload() []byte { 93 | return b[IPv6FragmentHeaderSize:] 94 | } 95 | 96 | // ID returns the value of the identifier field of the ipv6 fragment. 97 | func (b IPv6Fragment) ID() uint32 { 98 | return binary.BigEndian.Uint32(b[idV6:]) 99 | } 100 | 101 | // TransportProtocol implements Network.TransportProtocol. 102 | func (b IPv6Fragment) TransportProtocol() tcpip.TransportProtocolNumber { 103 | return tcpip.TransportProtocolNumber(b.NextHeader()) 104 | } 105 | 106 | // The functions below have been added only to satisfy the Network interface. 107 | 108 | // Checksum is not supported by IPv6Fragment. 109 | func (b IPv6Fragment) Checksum() uint16 { 110 | panic("not supported") 111 | } 112 | 113 | // SourceAddress is not supported by IPv6Fragment. 114 | func (b IPv6Fragment) SourceAddress() tcpip.Address { 115 | panic("not supported") 116 | } 117 | 118 | // DestinationAddress is not supported by IPv6Fragment. 119 | func (b IPv6Fragment) DestinationAddress() tcpip.Address { 120 | panic("not supported") 121 | } 122 | 123 | // SetSourceAddress is not supported by IPv6Fragment. 124 | func (b IPv6Fragment) SetSourceAddress(tcpip.Address) { 125 | panic("not supported") 126 | } 127 | 128 | // SetDestinationAddress is not supported by IPv6Fragment. 129 | func (b IPv6Fragment) SetDestinationAddress(tcpip.Address) { 130 | panic("not supported") 131 | } 132 | 133 | // SetChecksum is not supported by IPv6Fragment. 134 | func (b IPv6Fragment) SetChecksum(uint16) { 135 | panic("not supported") 136 | } 137 | 138 | // TOS is not supported by IPv6Fragment. 139 | func (b IPv6Fragment) TOS() (uint8, uint32) { 140 | panic("not supported") 141 | } 142 | 143 | // SetTOS is not supported by IPv6Fragment. 144 | func (b IPv6Fragment) SetTOS(t uint8, l uint32) { 145 | panic("not supported") 146 | } 147 | -------------------------------------------------------------------------------- /tcpip/header/ipversion_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header_test 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/google/netstack/tcpip/header" 21 | ) 22 | 23 | func TestIPv4(t *testing.T) { 24 | b := header.IPv4(make([]byte, header.IPv4MinimumSize)) 25 | b.Encode(&header.IPv4Fields{}) 26 | 27 | const want = header.IPv4Version 28 | if v := header.IPVersion(b); v != want { 29 | t.Fatalf("Bad version, want %v, got %v", want, v) 30 | } 31 | } 32 | 33 | func TestIPv6(t *testing.T) { 34 | b := header.IPv6(make([]byte, header.IPv6MinimumSize)) 35 | b.Encode(&header.IPv6Fields{}) 36 | 37 | const want = header.IPv6Version 38 | if v := header.IPVersion(b); v != want { 39 | t.Fatalf("Bad version, want %v, got %v", want, v) 40 | } 41 | } 42 | 43 | func TestOtherVersion(t *testing.T) { 44 | const want = header.IPv4Version + header.IPv6Version 45 | b := make([]byte, 1) 46 | b[0] = want << 4 47 | 48 | if v := header.IPVersion(b); v != want { 49 | t.Fatalf("Bad version, want %v, got %v", want, v) 50 | } 51 | } 52 | 53 | func TestTooShort(t *testing.T) { 54 | b := make([]byte, 1) 55 | b[0] = (header.IPv4Version + header.IPv6Version) << 4 56 | 57 | // Get the version of a zero-length slice. 58 | const want = -1 59 | if v := header.IPVersion(b[:0]); v != want { 60 | t.Fatalf("Bad version, want %v, got %v", want, v) 61 | } 62 | 63 | // Get the version of a nil slice. 64 | if v := header.IPVersion(nil); v != want { 65 | t.Fatalf("Bad version, want %v, got %v", want, v) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /tcpip/header/udp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package header 16 | 17 | import ( 18 | "encoding/binary" 19 | 20 | "github.com/google/netstack/tcpip" 21 | ) 22 | 23 | const ( 24 | udpSrcPort = 0 25 | udpDstPort = 2 26 | udpLength = 4 27 | udpChecksum = 6 28 | ) 29 | 30 | const ( 31 | // UDPMaximumPacketSize is the largest possible UDP packet. 32 | UDPMaximumPacketSize = 0xffff 33 | ) 34 | 35 | // UDPFields contains the fields of a UDP packet. It is used to describe the 36 | // fields of a packet that needs to be encoded. 37 | type UDPFields struct { 38 | // SrcPort is the "source port" field of a UDP packet. 39 | SrcPort uint16 40 | 41 | // DstPort is the "destination port" field of a UDP packet. 42 | DstPort uint16 43 | 44 | // Length is the "length" field of a UDP packet. 45 | Length uint16 46 | 47 | // Checksum is the "checksum" field of a UDP packet. 48 | Checksum uint16 49 | } 50 | 51 | // UDP represents a UDP header stored in a byte array. 52 | type UDP []byte 53 | 54 | const ( 55 | // UDPMinimumSize is the minimum size of a valid UDP packet. 56 | UDPMinimumSize = 8 57 | 58 | // UDPProtocolNumber is UDP's transport protocol number. 59 | UDPProtocolNumber tcpip.TransportProtocolNumber = 17 60 | ) 61 | 62 | // SourcePort returns the "source port" field of the udp header. 63 | func (b UDP) SourcePort() uint16 { 64 | return binary.BigEndian.Uint16(b[udpSrcPort:]) 65 | } 66 | 67 | // DestinationPort returns the "destination port" field of the udp header. 68 | func (b UDP) DestinationPort() uint16 { 69 | return binary.BigEndian.Uint16(b[udpDstPort:]) 70 | } 71 | 72 | // Length returns the "length" field of the udp header. 73 | func (b UDP) Length() uint16 { 74 | return binary.BigEndian.Uint16(b[udpLength:]) 75 | } 76 | 77 | // Payload returns the data contained in the UDP datagram. 78 | func (b UDP) Payload() []byte { 79 | return b[UDPMinimumSize:] 80 | } 81 | 82 | // Checksum returns the "checksum" field of the udp header. 83 | func (b UDP) Checksum() uint16 { 84 | return binary.BigEndian.Uint16(b[udpChecksum:]) 85 | } 86 | 87 | // SetSourcePort sets the "source port" field of the udp header. 88 | func (b UDP) SetSourcePort(port uint16) { 89 | binary.BigEndian.PutUint16(b[udpSrcPort:], port) 90 | } 91 | 92 | // SetDestinationPort sets the "destination port" field of the udp header. 93 | func (b UDP) SetDestinationPort(port uint16) { 94 | binary.BigEndian.PutUint16(b[udpDstPort:], port) 95 | } 96 | 97 | // SetChecksum sets the "checksum" field of the udp header. 98 | func (b UDP) SetChecksum(checksum uint16) { 99 | binary.BigEndian.PutUint16(b[udpChecksum:], checksum) 100 | } 101 | 102 | // CalculateChecksum calculates the checksum of the udp packet, given the 103 | // checksum of the network-layer pseudo-header and the checksum of the payload. 104 | func (b UDP) CalculateChecksum(partialChecksum uint16) uint16 { 105 | // Calculate the rest of the checksum. 106 | return Checksum(b[:UDPMinimumSize], partialChecksum) 107 | } 108 | 109 | // Encode encodes all the fields of the udp header. 110 | func (b UDP) Encode(u *UDPFields) { 111 | binary.BigEndian.PutUint16(b[udpSrcPort:], u.SrcPort) 112 | binary.BigEndian.PutUint16(b[udpDstPort:], u.DstPort) 113 | binary.BigEndian.PutUint16(b[udpLength:], u.Length) 114 | binary.BigEndian.PutUint16(b[udpChecksum:], u.Checksum) 115 | } 116 | -------------------------------------------------------------------------------- /tcpip/iptables/iptables.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package iptables supports packet filtering and manipulation via the iptables 16 | // tool. 17 | package iptables 18 | 19 | const ( 20 | tablenameNat = "nat" 21 | tablenameMangle = "mangle" 22 | ) 23 | 24 | // Chain names as defined by net/ipv4/netfilter/ip_tables.c. 25 | const ( 26 | chainNamePrerouting = "PREROUTING" 27 | chainNameInput = "INPUT" 28 | chainNameForward = "FORWARD" 29 | chainNameOutput = "OUTPUT" 30 | chainNamePostrouting = "POSTROUTING" 31 | ) 32 | 33 | // DefaultTables returns a default set of tables. Each chain is set to accept 34 | // all packets. 35 | func DefaultTables() IPTables { 36 | return IPTables{ 37 | Tables: map[string]Table{ 38 | tablenameNat: Table{ 39 | BuiltinChains: map[Hook]Chain{ 40 | Prerouting: unconditionalAcceptChain(chainNamePrerouting), 41 | Input: unconditionalAcceptChain(chainNameInput), 42 | Output: unconditionalAcceptChain(chainNameOutput), 43 | Postrouting: unconditionalAcceptChain(chainNamePostrouting), 44 | }, 45 | DefaultTargets: map[Hook]Target{ 46 | Prerouting: UnconditionalAcceptTarget{}, 47 | Input: UnconditionalAcceptTarget{}, 48 | Output: UnconditionalAcceptTarget{}, 49 | Postrouting: UnconditionalAcceptTarget{}, 50 | }, 51 | UserChains: map[string]Chain{}, 52 | }, 53 | tablenameMangle: Table{ 54 | BuiltinChains: map[Hook]Chain{ 55 | Prerouting: unconditionalAcceptChain(chainNamePrerouting), 56 | Output: unconditionalAcceptChain(chainNameOutput), 57 | }, 58 | DefaultTargets: map[Hook]Target{ 59 | Prerouting: UnconditionalAcceptTarget{}, 60 | Output: UnconditionalAcceptTarget{}, 61 | }, 62 | UserChains: map[string]Chain{}, 63 | }, 64 | }, 65 | Priorities: map[Hook][]string{ 66 | Prerouting: []string{tablenameMangle, tablenameNat}, 67 | Output: []string{tablenameMangle, tablenameNat}, 68 | }, 69 | } 70 | } 71 | 72 | func unconditionalAcceptChain(name string) Chain { 73 | return Chain{ 74 | Name: name, 75 | Rules: []Rule{ 76 | Rule{ 77 | Target: UnconditionalAcceptTarget{}, 78 | }, 79 | }, 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /tcpip/iptables/targets.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // This file contains various Targets. 16 | 17 | package iptables 18 | 19 | import "github.com/google/netstack/tcpip/buffer" 20 | 21 | // UnconditionalAcceptTarget accepts all packets. 22 | type UnconditionalAcceptTarget struct{} 23 | 24 | // Action implements Target.Action. 25 | func (UnconditionalAcceptTarget) Action(packet buffer.VectorisedView) (Verdict, string) { 26 | return Accept, "" 27 | } 28 | 29 | // UnconditionalDropTarget denies all packets. 30 | type UnconditionalDropTarget struct{} 31 | 32 | // Action implements Target.Action. 33 | func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, string) { 34 | return Drop, "" 35 | } 36 | -------------------------------------------------------------------------------- /tcpip/link/channel/channel.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package channel provides the implemention of channel-based data-link layer 16 | // endpoints. Such endpoints allow injection of inbound packets and store 17 | // outbound packets in a channel. 18 | package channel 19 | 20 | import ( 21 | "github.com/google/netstack/tcpip" 22 | "github.com/google/netstack/tcpip/buffer" 23 | "github.com/google/netstack/tcpip/stack" 24 | ) 25 | 26 | // PacketInfo holds all the information about an outbound packet. 27 | type PacketInfo struct { 28 | Header buffer.View 29 | Payload buffer.View 30 | Proto tcpip.NetworkProtocolNumber 31 | GSO *stack.GSO 32 | } 33 | 34 | // Endpoint is link layer endpoint that stores outbound packets in a channel 35 | // and allows injection of inbound packets. 36 | type Endpoint struct { 37 | dispatcher stack.NetworkDispatcher 38 | mtu uint32 39 | linkAddr tcpip.LinkAddress 40 | GSO bool 41 | 42 | // C is where outbound packets are queued. 43 | C chan PacketInfo 44 | } 45 | 46 | // New creates a new channel endpoint. 47 | func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint { 48 | return &Endpoint{ 49 | C: make(chan PacketInfo, size), 50 | mtu: mtu, 51 | linkAddr: linkAddr, 52 | } 53 | } 54 | 55 | // Drain removes all outbound packets from the channel and counts them. 56 | func (e *Endpoint) Drain() int { 57 | c := 0 58 | for { 59 | select { 60 | case <-e.C: 61 | c++ 62 | default: 63 | return c 64 | } 65 | } 66 | } 67 | 68 | // Inject injects an inbound packet. 69 | func (e *Endpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { 70 | e.InjectLinkAddr(protocol, "", vv) 71 | } 72 | 73 | // InjectLinkAddr injects an inbound packet with a remote link address. 74 | func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, vv buffer.VectorisedView) { 75 | e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil)) 76 | } 77 | 78 | // Attach saves the stack network-layer dispatcher for use later when packets 79 | // are injected. 80 | func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) { 81 | e.dispatcher = dispatcher 82 | } 83 | 84 | // IsAttached implements stack.LinkEndpoint.IsAttached. 85 | func (e *Endpoint) IsAttached() bool { 86 | return e.dispatcher != nil 87 | } 88 | 89 | // MTU implements stack.LinkEndpoint.MTU. It returns the value initialized 90 | // during construction. 91 | func (e *Endpoint) MTU() uint32 { 92 | return e.mtu 93 | } 94 | 95 | // Capabilities implements stack.LinkEndpoint.Capabilities. 96 | func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities { 97 | caps := stack.LinkEndpointCapabilities(0) 98 | if e.GSO { 99 | caps |= stack.CapabilityGSO 100 | } 101 | return caps 102 | } 103 | 104 | // GSOMaxSize returns the maximum GSO packet size. 105 | func (*Endpoint) GSOMaxSize() uint32 { 106 | return 1 << 15 107 | } 108 | 109 | // MaxHeaderLength returns the maximum size of the link layer header. Given it 110 | // doesn't have a header, it just returns 0. 111 | func (*Endpoint) MaxHeaderLength() uint16 { 112 | return 0 113 | } 114 | 115 | // LinkAddress returns the link address of this endpoint. 116 | func (e *Endpoint) LinkAddress() tcpip.LinkAddress { 117 | return e.linkAddr 118 | } 119 | 120 | // WritePacket stores outbound packets into the channel. 121 | func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { 122 | p := PacketInfo{ 123 | Header: hdr.View(), 124 | Proto: protocol, 125 | Payload: payload.ToView(), 126 | GSO: gso, 127 | } 128 | 129 | select { 130 | case e.C <- p: 131 | default: 132 | } 133 | 134 | return nil 135 | } 136 | 137 | // Wait implements stack.LinkEndpoint.Wait. 138 | func (*Endpoint) Wait() {} 139 | -------------------------------------------------------------------------------- /tcpip/link/fdbased/endpoint_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux 16 | 17 | package fdbased 18 | 19 | import ( 20 | "reflect" 21 | "unsafe" 22 | ) 23 | 24 | const virtioNetHdrSize = int(unsafe.Sizeof(virtioNetHdr{})) 25 | 26 | func vnetHdrToByteSlice(hdr *virtioNetHdr) (slice []byte) { 27 | sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) 28 | sh.Data = uintptr(unsafe.Pointer(hdr)) 29 | sh.Len = virtioNetHdrSize 30 | sh.Cap = virtioNetHdrSize 31 | return 32 | } 33 | -------------------------------------------------------------------------------- /tcpip/link/fdbased/mmap_stub.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build !linux !amd64,!arm64 16 | 17 | package fdbased 18 | 19 | // Stubbed out version for non-linux/non-amd64/non-arm64 platforms. 20 | 21 | func newPacketMMapDispatcher(fd int, e *endpoint) (linkDispatcher, error) { 22 | return nil, nil 23 | } 24 | -------------------------------------------------------------------------------- /tcpip/link/fdbased/mmap_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux,amd64 linux,arm64 16 | 17 | package fdbased 18 | 19 | import ( 20 | "fmt" 21 | "sync/atomic" 22 | "syscall" 23 | "unsafe" 24 | 25 | "golang.org/x/sys/unix" 26 | ) 27 | 28 | // tPacketHdrlen is the TPACKET_HDRLEN variable defined in . 29 | var tPacketHdrlen = tPacketAlign(unsafe.Sizeof(tPacketHdr{}) + unsafe.Sizeof(syscall.RawSockaddrLinklayer{})) 30 | 31 | // tpStatus returns the frame status field. 32 | // The status is concurrently updated by the kernel as a result we must 33 | // use atomic operations to prevent races. 34 | func (t tPacketHdr) tpStatus() uint32 { 35 | hdr := unsafe.Pointer(&t[0]) 36 | statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset)) 37 | return atomic.LoadUint32((*uint32)(statusPtr)) 38 | } 39 | 40 | // setTPStatus set's the frame status to the provided status. 41 | // The status is concurrently updated by the kernel as a result we must 42 | // use atomic operations to prevent races. 43 | func (t tPacketHdr) setTPStatus(status uint32) { 44 | hdr := unsafe.Pointer(&t[0]) 45 | statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset)) 46 | atomic.StoreUint32((*uint32)(statusPtr), status) 47 | } 48 | 49 | func newPacketMMapDispatcher(fd int, e *endpoint) (linkDispatcher, error) { 50 | d := &packetMMapDispatcher{ 51 | fd: fd, 52 | e: e, 53 | } 54 | pageSize := unix.Getpagesize() 55 | if tpBlockSize%pageSize != 0 { 56 | return nil, fmt.Errorf("tpBlockSize: %d is not page aligned, pagesize: %d", tpBlockSize, pageSize) 57 | } 58 | tReq := tPacketReq{ 59 | tpBlockSize: uint32(tpBlockSize), 60 | tpBlockNR: uint32(tpBlockNR), 61 | tpFrameSize: uint32(tpFrameSize), 62 | tpFrameNR: uint32(tpFrameNR), 63 | } 64 | // Setup PACKET_RX_RING. 65 | if err := setsockopt(d.fd, syscall.SOL_PACKET, syscall.PACKET_RX_RING, unsafe.Pointer(&tReq), unsafe.Sizeof(tReq)); err != nil { 66 | return nil, fmt.Errorf("failed to enable PACKET_RX_RING: %v", err) 67 | } 68 | // Let's mmap the blocks. 69 | sz := tpBlockSize * tpBlockNR 70 | buf, err := syscall.Mmap(d.fd, 0, sz, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) 71 | if err != nil { 72 | return nil, fmt.Errorf("syscall.Mmap(...,0, %v, ...) failed = %v", sz, err) 73 | } 74 | d.ringBuffer = buf 75 | return d, nil 76 | } 77 | 78 | func setsockopt(fd, level, name int, val unsafe.Pointer, vallen uintptr) error { 79 | if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(fd), uintptr(level), uintptr(name), uintptr(val), vallen, 0); errno != 0 { 80 | return error(errno) 81 | } 82 | 83 | return nil 84 | } 85 | -------------------------------------------------------------------------------- /tcpip/link/loopback/loopback.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package loopback provides the implemention of loopback data-link layer 16 | // endpoints. Such endpoints just turn outbound packets into inbound ones. 17 | // 18 | // Loopback endpoints can be used in the networking stack by calling New() to 19 | // create a new endpoint, and then passing it as an argument to 20 | // Stack.CreateNIC(). 21 | package loopback 22 | 23 | import ( 24 | "github.com/google/netstack/tcpip" 25 | "github.com/google/netstack/tcpip/buffer" 26 | "github.com/google/netstack/tcpip/stack" 27 | ) 28 | 29 | type endpoint struct { 30 | dispatcher stack.NetworkDispatcher 31 | } 32 | 33 | // New creates a new loopback endpoint. This link-layer endpoint just turns 34 | // outbound packets into inbound packets. 35 | func New() stack.LinkEndpoint { 36 | return &endpoint{} 37 | } 38 | 39 | // Attach implements stack.LinkEndpoint.Attach. It just saves the stack network- 40 | // layer dispatcher for later use when packets need to be dispatched. 41 | func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { 42 | e.dispatcher = dispatcher 43 | } 44 | 45 | // IsAttached implements stack.LinkEndpoint.IsAttached. 46 | func (e *endpoint) IsAttached() bool { 47 | return e.dispatcher != nil 48 | } 49 | 50 | // MTU implements stack.LinkEndpoint.MTU. It returns a constant that matches the 51 | // linux loopback interface. 52 | func (*endpoint) MTU() uint32 { 53 | return 65536 54 | } 55 | 56 | // Capabilities implements stack.LinkEndpoint.Capabilities. Loopback advertises 57 | // itself as supporting checksum offload, but in reality it's just omitted. 58 | func (*endpoint) Capabilities() stack.LinkEndpointCapabilities { 59 | return stack.CapabilityRXChecksumOffload | stack.CapabilityTXChecksumOffload | stack.CapabilitySaveRestore | stack.CapabilityLoopback 60 | } 61 | 62 | // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. Given that the 63 | // loopback interface doesn't have a header, it just returns 0. 64 | func (*endpoint) MaxHeaderLength() uint16 { 65 | return 0 66 | } 67 | 68 | // LinkAddress returns the link address of this endpoint. 69 | func (*endpoint) LinkAddress() tcpip.LinkAddress { 70 | return "" 71 | } 72 | 73 | // WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound 74 | // packets to the network-layer dispatcher. 75 | func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { 76 | views := make([]buffer.View, 1, 1+len(payload.Views())) 77 | views[0] = hdr.View() 78 | views = append(views, payload.Views()...) 79 | vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) 80 | 81 | // Because we're immediately turning around and writing the packet back to the 82 | // rx path, we intentionally don't preserve the remote and local link 83 | // addresses from the stack.Route we're passed. 84 | e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv) 85 | 86 | return nil 87 | } 88 | 89 | // Wait implements stack.LinkEndpoint.Wait. 90 | func (*endpoint) Wait() {} 91 | -------------------------------------------------------------------------------- /tcpip/link/muxed/injectable.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package muxed provides a muxed link endpoints. 16 | package muxed 17 | 18 | import ( 19 | "github.com/google/netstack/tcpip" 20 | "github.com/google/netstack/tcpip/buffer" 21 | "github.com/google/netstack/tcpip/stack" 22 | ) 23 | 24 | // InjectableEndpoint is an injectable multi endpoint. The endpoint has 25 | // trivial routing rules that determine which InjectableEndpoint a given packet 26 | // will be written to. Note that HandleLocal works differently for this 27 | // endpoint (see WritePacket). 28 | type InjectableEndpoint struct { 29 | routes map[tcpip.Address]stack.InjectableLinkEndpoint 30 | dispatcher stack.NetworkDispatcher 31 | } 32 | 33 | // MTU implements stack.LinkEndpoint. 34 | func (m *InjectableEndpoint) MTU() uint32 { 35 | minMTU := ^uint32(0) 36 | for _, endpoint := range m.routes { 37 | if endpointMTU := endpoint.MTU(); endpointMTU < minMTU { 38 | minMTU = endpointMTU 39 | } 40 | } 41 | return minMTU 42 | } 43 | 44 | // Capabilities implements stack.LinkEndpoint. 45 | func (m *InjectableEndpoint) Capabilities() stack.LinkEndpointCapabilities { 46 | minCapabilities := stack.LinkEndpointCapabilities(^uint(0)) 47 | for _, endpoint := range m.routes { 48 | minCapabilities &= endpoint.Capabilities() 49 | } 50 | return minCapabilities 51 | } 52 | 53 | // MaxHeaderLength implements stack.LinkEndpoint. 54 | func (m *InjectableEndpoint) MaxHeaderLength() uint16 { 55 | minHeaderLen := ^uint16(0) 56 | for _, endpoint := range m.routes { 57 | if headerLen := endpoint.MaxHeaderLength(); headerLen < minHeaderLen { 58 | minHeaderLen = headerLen 59 | } 60 | } 61 | return minHeaderLen 62 | } 63 | 64 | // LinkAddress implements stack.LinkEndpoint. 65 | func (m *InjectableEndpoint) LinkAddress() tcpip.LinkAddress { 66 | return "" 67 | } 68 | 69 | // Attach implements stack.LinkEndpoint. 70 | func (m *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) { 71 | for _, endpoint := range m.routes { 72 | endpoint.Attach(dispatcher) 73 | } 74 | m.dispatcher = dispatcher 75 | } 76 | 77 | // IsAttached implements stack.LinkEndpoint. 78 | func (m *InjectableEndpoint) IsAttached() bool { 79 | return m.dispatcher != nil 80 | } 81 | 82 | // Inject implements stack.InjectableLinkEndpoint. 83 | func (m *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { 84 | m.dispatcher.DeliverNetworkPacket(m, "" /* remote */, "" /* local */, protocol, vv) 85 | } 86 | 87 | // WritePacket writes outbound packets to the appropriate LinkInjectableEndpoint 88 | // based on the RemoteAddress. HandleLocal only works if r.RemoteAddress has a 89 | // route registered in this endpoint. 90 | func (m *InjectableEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { 91 | if endpoint, ok := m.routes[r.RemoteAddress]; ok { 92 | return endpoint.WritePacket(r, nil /* gso */, hdr, payload, protocol) 93 | } 94 | return tcpip.ErrNoRoute 95 | } 96 | 97 | // WriteRawPacket writes outbound packets to the appropriate 98 | // LinkInjectableEndpoint based on the dest address. 99 | func (m *InjectableEndpoint) WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error { 100 | endpoint, ok := m.routes[dest] 101 | if !ok { 102 | return tcpip.ErrNoRoute 103 | } 104 | return endpoint.WriteRawPacket(dest, packet) 105 | } 106 | 107 | // Wait implements stack.LinkEndpoint.Wait. 108 | func (m *InjectableEndpoint) Wait() { 109 | for _, ep := range m.routes { 110 | ep.Wait() 111 | } 112 | } 113 | 114 | // NewInjectableEndpoint creates a new multi-endpoint injectable endpoint. 115 | func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint { 116 | return &InjectableEndpoint{ 117 | routes: routes, 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /tcpip/link/muxed/injectable_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package muxed 16 | 17 | import ( 18 | "bytes" 19 | "net" 20 | "os" 21 | "syscall" 22 | "testing" 23 | 24 | "github.com/google/netstack/tcpip" 25 | "github.com/google/netstack/tcpip/buffer" 26 | "github.com/google/netstack/tcpip/link/fdbased" 27 | "github.com/google/netstack/tcpip/network/ipv4" 28 | "github.com/google/netstack/tcpip/stack" 29 | ) 30 | 31 | func TestInjectableEndpointRawDispatch(t *testing.T) { 32 | endpoint, sock, dstIP := makeTestInjectableEndpoint(t) 33 | 34 | endpoint.WriteRawPacket(dstIP, []byte{0xFA}) 35 | 36 | buf := make([]byte, ipv4.MaxTotalSize) 37 | bytesRead, err := sock.Read(buf) 38 | if err != nil { 39 | t.Fatalf("Unable to read from socketpair: %v", err) 40 | } 41 | if got, want := buf[:bytesRead], []byte{0xFA}; !bytes.Equal(got, want) { 42 | t.Fatalf("Read %v from the socketpair, wanted %v", got, want) 43 | } 44 | } 45 | 46 | func TestInjectableEndpointDispatch(t *testing.T) { 47 | endpoint, sock, dstIP := makeTestInjectableEndpoint(t) 48 | 49 | hdr := buffer.NewPrependable(1) 50 | hdr.Prepend(1)[0] = 0xFA 51 | packetRoute := stack.Route{RemoteAddress: dstIP} 52 | 53 | endpoint.WritePacket(&packetRoute, nil /* gso */, hdr, 54 | buffer.NewViewFromBytes([]byte{0xFB}).ToVectorisedView(), ipv4.ProtocolNumber) 55 | 56 | buf := make([]byte, 6500) 57 | bytesRead, err := sock.Read(buf) 58 | if err != nil { 59 | t.Fatalf("Unable to read from socketpair: %v", err) 60 | } 61 | if got, want := buf[:bytesRead], []byte{0xFA, 0xFB}; !bytes.Equal(got, want) { 62 | t.Fatalf("Read %v from the socketpair, wanted %v", got, want) 63 | } 64 | } 65 | 66 | func TestInjectableEndpointDispatchHdrOnly(t *testing.T) { 67 | endpoint, sock, dstIP := makeTestInjectableEndpoint(t) 68 | hdr := buffer.NewPrependable(1) 69 | hdr.Prepend(1)[0] = 0xFA 70 | packetRoute := stack.Route{RemoteAddress: dstIP} 71 | endpoint.WritePacket(&packetRoute, nil /* gso */, hdr, 72 | buffer.NewView(0).ToVectorisedView(), ipv4.ProtocolNumber) 73 | buf := make([]byte, 6500) 74 | bytesRead, err := sock.Read(buf) 75 | if err != nil { 76 | t.Fatalf("Unable to read from socketpair: %v", err) 77 | } 78 | if got, want := buf[:bytesRead], []byte{0xFA}; !bytes.Equal(got, want) { 79 | t.Fatalf("Read %v from the socketpair, wanted %v", got, want) 80 | } 81 | } 82 | 83 | func makeTestInjectableEndpoint(t *testing.T) (*InjectableEndpoint, *os.File, tcpip.Address) { 84 | dstIP := tcpip.Address(net.ParseIP("1.2.3.4").To4()) 85 | pair, err := syscall.Socketpair(syscall.AF_UNIX, 86 | syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC|syscall.SOCK_NONBLOCK, 0) 87 | if err != nil { 88 | t.Fatal("Failed to create socket pair:", err) 89 | } 90 | underlyingEndpoint := fdbased.NewInjectable(pair[1], 6500, stack.CapabilityNone) 91 | routes := map[tcpip.Address]stack.InjectableLinkEndpoint{dstIP: underlyingEndpoint} 92 | endpoint := NewInjectableEndpoint(routes) 93 | return endpoint, os.NewFile(uintptr(pair[0]), "test route end"), dstIP 94 | } 95 | -------------------------------------------------------------------------------- /tcpip/link/rawfile/blockingpoll_amd64.s: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "textflag.h" 16 | 17 | // BlockingPoll makes the ppoll() syscall while calling the version of 18 | // entersyscall that relinquishes the P so that other Gs can run. This is meant 19 | // to be called in cases when the syscall is expected to block. 20 | // 21 | // func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (n int, err syscall.Errno) 22 | TEXT ·BlockingPoll(SB),NOSPLIT,$0-40 23 | CALL ·callEntersyscallblock(SB) 24 | MOVQ fds+0(FP), DI 25 | MOVQ nfds+8(FP), SI 26 | MOVQ timeout+16(FP), DX 27 | MOVQ $0x0, R10 // sigmask parameter which isn't used here 28 | MOVQ $0x10f, AX // SYS_PPOLL 29 | SYSCALL 30 | CMPQ AX, $0xfffffffffffff001 31 | JLS ok 32 | MOVQ $-1, n+24(FP) 33 | NEGQ AX 34 | MOVQ AX, err+32(FP) 35 | CALL ·callExitsyscall(SB) 36 | RET 37 | ok: 38 | MOVQ AX, n+24(FP) 39 | MOVQ $0, err+32(FP) 40 | CALL ·callExitsyscall(SB) 41 | RET 42 | -------------------------------------------------------------------------------- /tcpip/link/rawfile/blockingpoll_arm64.s: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "textflag.h" 16 | 17 | // BlockingPoll makes the ppoll() syscall while calling the version of 18 | // entersyscall that relinquishes the P so that other Gs can run. This is meant 19 | // to be called in cases when the syscall is expected to block. 20 | // 21 | // func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (n int, err syscall.Errno) 22 | TEXT ·BlockingPoll(SB),NOSPLIT,$0-40 23 | BL ·callEntersyscallblock(SB) 24 | MOVD fds+0(FP), R0 25 | MOVD nfds+8(FP), R1 26 | MOVD timeout+16(FP), R2 27 | MOVD $0x0, R3 // sigmask parameter which isn't used here 28 | MOVD $0x49, R8 // SYS_PPOLL 29 | SVC 30 | CMP $0xfffffffffffff001, R0 31 | BLS ok 32 | MOVD $-1, R1 33 | MOVD R1, n+24(FP) 34 | NEG R0, R0 35 | MOVD R0, err+32(FP) 36 | BL ·callExitsyscall(SB) 37 | RET 38 | ok: 39 | MOVD R0, n+24(FP) 40 | MOVD $0, err+32(FP) 41 | BL ·callExitsyscall(SB) 42 | RET 43 | -------------------------------------------------------------------------------- /tcpip/link/rawfile/blockingpoll_noyield_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux,!amd64,!arm64 16 | 17 | package rawfile 18 | 19 | import ( 20 | "syscall" 21 | "unsafe" 22 | ) 23 | 24 | // BlockingPoll is just a stub function that forwards to the ppoll() system call 25 | // on non-amd64 and non-arm64 platforms. 26 | func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (int, syscall.Errno) { 27 | n, _, e := syscall.Syscall6(syscall.SYS_PPOLL, uintptr(unsafe.Pointer(fds)), 28 | uintptr(nfds), uintptr(unsafe.Pointer(timeout)), 0, 0, 0) 29 | 30 | return int(n), e 31 | } 32 | -------------------------------------------------------------------------------- /tcpip/link/rawfile/blockingpoll_yield_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux,amd64 linux,arm64 16 | // +build go1.12 17 | // +build !go1.14 18 | 19 | // Check go:linkname function signatures when updating Go version. 20 | 21 | package rawfile 22 | 23 | import ( 24 | "syscall" 25 | _ "unsafe" // for go:linkname 26 | ) 27 | 28 | // BlockingPoll on amd64/arm64 makes the ppoll() syscall while calling the 29 | // version of entersyscall that relinquishes the P so that other Gs can 30 | // run. This is meant to be called in cases when the syscall is expected to 31 | // block. On non amd64/arm64 platforms it just forwards to the ppoll() system 32 | // call. 33 | // 34 | //go:noescape 35 | func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (int, syscall.Errno) 36 | 37 | // Use go:linkname to call into the runtime. As of Go 1.12 this has to 38 | // be done from Go code so that we make an ABIInternal call to an 39 | // ABIInternal function; see https://golang.org/issue/27539. 40 | 41 | // We need to call both entersyscallblock and exitsyscall this way so 42 | // that the runtime's check on the stack pointer lines up. 43 | 44 | // Note that calling an unexported function in the runtime package is 45 | // unsafe and this hack is likely to break in future Go releases. 46 | 47 | //go:linkname entersyscallblock runtime.entersyscallblock 48 | func entersyscallblock() 49 | 50 | //go:linkname exitsyscall runtime.exitsyscall 51 | func exitsyscall() 52 | 53 | // These forwarding functions must be nosplit because 1) we must 54 | // disallow preemption between entersyscallblock and exitsyscall, and 55 | // 2) we have an untyped assembly frame on the stack which can not be 56 | // grown or moved. 57 | 58 | //go:nosplit 59 | func callEntersyscallblock() { 60 | entersyscallblock() 61 | } 62 | 63 | //go:nosplit 64 | func callExitsyscall() { 65 | exitsyscall() 66 | } 67 | -------------------------------------------------------------------------------- /tcpip/link/rawfile/errors.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux 16 | 17 | package rawfile 18 | 19 | import ( 20 | "fmt" 21 | "syscall" 22 | 23 | "github.com/google/netstack/tcpip" 24 | ) 25 | 26 | const maxErrno = 134 27 | 28 | var translations [maxErrno]*tcpip.Error 29 | 30 | // TranslateErrno translate an errno from the syscall package into a 31 | // *tcpip.Error. 32 | // 33 | // Valid, but unrecognized errnos will be translated to 34 | // tcpip.ErrInvalidEndpointState (EINVAL). Panics on invalid errnos. 35 | func TranslateErrno(e syscall.Errno) *tcpip.Error { 36 | if err := translations[e]; err != nil { 37 | return err 38 | } 39 | return tcpip.ErrInvalidEndpointState 40 | } 41 | 42 | func addTranslation(host syscall.Errno, trans *tcpip.Error) { 43 | if translations[host] != nil { 44 | panic(fmt.Sprintf("duplicate translation for host errno %q (%d)", host.Error(), host)) 45 | } 46 | translations[host] = trans 47 | } 48 | 49 | func init() { 50 | addTranslation(syscall.EEXIST, tcpip.ErrDuplicateAddress) 51 | addTranslation(syscall.ENETUNREACH, tcpip.ErrNoRoute) 52 | addTranslation(syscall.EINVAL, tcpip.ErrInvalidEndpointState) 53 | addTranslation(syscall.EALREADY, tcpip.ErrAlreadyConnecting) 54 | addTranslation(syscall.EISCONN, tcpip.ErrAlreadyConnected) 55 | addTranslation(syscall.EADDRINUSE, tcpip.ErrPortInUse) 56 | addTranslation(syscall.EADDRNOTAVAIL, tcpip.ErrBadLocalAddress) 57 | addTranslation(syscall.EPIPE, tcpip.ErrClosedForSend) 58 | addTranslation(syscall.EWOULDBLOCK, tcpip.ErrWouldBlock) 59 | addTranslation(syscall.ECONNREFUSED, tcpip.ErrConnectionRefused) 60 | addTranslation(syscall.ETIMEDOUT, tcpip.ErrTimeout) 61 | addTranslation(syscall.EINPROGRESS, tcpip.ErrConnectStarted) 62 | addTranslation(syscall.EDESTADDRREQ, tcpip.ErrDestinationRequired) 63 | addTranslation(syscall.ENOTSUP, tcpip.ErrNotSupported) 64 | addTranslation(syscall.ENOTTY, tcpip.ErrQueueSizeNotSupported) 65 | addTranslation(syscall.ENOTCONN, tcpip.ErrNotConnected) 66 | addTranslation(syscall.ECONNRESET, tcpip.ErrConnectionReset) 67 | addTranslation(syscall.ECONNABORTED, tcpip.ErrConnectionAborted) 68 | addTranslation(syscall.EMSGSIZE, tcpip.ErrMessageTooLong) 69 | addTranslation(syscall.ENOBUFS, tcpip.ErrNoBufferSpace) 70 | } 71 | -------------------------------------------------------------------------------- /tcpip/link/sharedmem/pipe/pipe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package pipe implements a shared memory ring buffer on which a single reader 16 | // and a single writer can operate (read/write) concurrently. The ring buffer 17 | // allows for data of different sizes to be written, and preserves the boundary 18 | // of the written data. 19 | // 20 | // Example usage is as follows: 21 | // 22 | // wb := t.Push(20) 23 | // // Write data to wb. 24 | // t.Flush() 25 | // 26 | // rb := r.Pull() 27 | // // Do something with data in rb. 28 | // t.Flush() 29 | package pipe 30 | 31 | import ( 32 | "math" 33 | ) 34 | 35 | const ( 36 | jump uint64 = math.MaxUint32 + 1 37 | offsetMask uint64 = math.MaxUint32 38 | revolutionMask uint64 = ^offsetMask 39 | 40 | sizeOfSlotHeader = 8 // sizeof(uint64) 41 | slotFree uint64 = 1 << 63 42 | slotSizeMask uint64 = math.MaxUint32 43 | ) 44 | 45 | // payloadToSlotSize calculates the total size of a slot based on its payload 46 | // size. The total size is the header size, plus the payload size, plus padding 47 | // if necessary to make the total size a multiple of sizeOfSlotHeader. 48 | func payloadToSlotSize(payloadSize uint64) uint64 { 49 | s := sizeOfSlotHeader + payloadSize 50 | return (s + sizeOfSlotHeader - 1) &^ (sizeOfSlotHeader - 1) 51 | } 52 | 53 | // slotToPayloadSize calculates the payload size of a slot based on the total 54 | // size of the slot. This is only meant to be used when creating slots that 55 | // don't carry information (e.g., free slots or wrap slots). 56 | func slotToPayloadSize(offset uint64) uint64 { 57 | return offset - sizeOfSlotHeader 58 | } 59 | 60 | // pipe is a basic data structure used by both (transmit & receive) ends of a 61 | // pipe. Indices into this pipe are split into two fields: offset, which counts 62 | // the number of bytes from the beginning of the buffer, and revolution, which 63 | // counts the number of times the index has wrapped around. 64 | type pipe struct { 65 | buffer []byte 66 | } 67 | 68 | // init initializes the pipe buffer such that its size is a multiple of the size 69 | // of the slot header. 70 | func (p *pipe) init(b []byte) { 71 | p.buffer = b[:len(b)&^(sizeOfSlotHeader-1)] 72 | } 73 | 74 | // data returns a section of the buffer starting at the given index (which may 75 | // include revolution information) and with the given size. 76 | func (p *pipe) data(idx uint64, size uint64) []byte { 77 | return p.buffer[(idx&offsetMask)+sizeOfSlotHeader:][:size] 78 | } 79 | -------------------------------------------------------------------------------- /tcpip/link/sharedmem/pipe/pipe_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package pipe 16 | 17 | import ( 18 | "sync/atomic" 19 | "unsafe" 20 | ) 21 | 22 | func (p *pipe) write(idx uint64, v uint64) { 23 | ptr := (*uint64)(unsafe.Pointer(&p.buffer[idx&offsetMask:][:8][0])) 24 | *ptr = v 25 | } 26 | 27 | func (p *pipe) writeAtomic(idx uint64, v uint64) { 28 | ptr := (*uint64)(unsafe.Pointer(&p.buffer[idx&offsetMask:][:8][0])) 29 | atomic.StoreUint64(ptr, v) 30 | } 31 | 32 | func (p *pipe) readAtomic(idx uint64) uint64 { 33 | ptr := (*uint64)(unsafe.Pointer(&p.buffer[idx&offsetMask:][:8][0])) 34 | return atomic.LoadUint64(ptr) 35 | } 36 | -------------------------------------------------------------------------------- /tcpip/link/sharedmem/pipe/rx.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package pipe 16 | 17 | // Rx is the receive side of the shared memory ring buffer. 18 | type Rx struct { 19 | p pipe 20 | 21 | tail uint64 22 | head uint64 23 | } 24 | 25 | // Init initializes the receive end of the pipe. In the initial state, the next 26 | // slot to be inspected is the very first one. 27 | func (r *Rx) Init(b []byte) { 28 | r.p.init(b) 29 | r.tail = 0xfffffffe * jump 30 | r.head = r.tail 31 | } 32 | 33 | // Pull reads the next buffer from the pipe, returning nil if there isn't one 34 | // currently available. 35 | // 36 | // The returned slice is available until Flush() is next called. After that, it 37 | // must not be touched. 38 | func (r *Rx) Pull() []byte { 39 | if r.head == r.tail+jump { 40 | // We've already pulled the whole pipe. 41 | return nil 42 | } 43 | 44 | header := r.p.readAtomic(r.head) 45 | if header&slotFree != 0 { 46 | // The next slot is free, we can't pull it yet. 47 | return nil 48 | } 49 | 50 | payloadSize := header & slotSizeMask 51 | newHead := r.head + payloadToSlotSize(payloadSize) 52 | headWrap := (r.head & revolutionMask) | uint64(len(r.p.buffer)) 53 | 54 | // Check if this is a wrapping slot. If that's the case, it carries no 55 | // data, so we just skip it and try again from the first slot. 56 | if int64(newHead-headWrap) >= 0 { 57 | if int64(newHead-headWrap) > int64(jump) || newHead&offsetMask != 0 { 58 | return nil 59 | } 60 | 61 | if r.tail == r.head { 62 | // If this is the first pull since the last Flush() 63 | // call, we flush the state so that the sender can use 64 | // this space if it needs to. 65 | r.p.writeAtomic(r.head, slotFree|slotToPayloadSize(newHead-r.head)) 66 | r.tail = newHead 67 | } 68 | 69 | r.head = newHead 70 | return r.Pull() 71 | } 72 | 73 | // Grab the buffer before updating r.head. 74 | b := r.p.data(r.head, payloadSize) 75 | r.head = newHead 76 | return b 77 | } 78 | 79 | // Flush tells the transmitter that all buffers pulled since the last Flush() 80 | // have been used, so the transmitter is free to used their slots for further 81 | // transmission. 82 | func (r *Rx) Flush() { 83 | if r.head == r.tail { 84 | return 85 | } 86 | r.p.writeAtomic(r.tail, slotFree|slotToPayloadSize(r.head-r.tail)) 87 | r.tail = r.head 88 | } 89 | 90 | // Bytes returns the byte slice on which the pipe operates. 91 | func (r *Rx) Bytes() []byte { 92 | return r.p.buffer 93 | } 94 | -------------------------------------------------------------------------------- /tcpip/link/sharedmem/queue/tx.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package queue 16 | 17 | import ( 18 | "encoding/binary" 19 | 20 | "github.com/google/netstack/tcpip/link/sharedmem/pipe" 21 | "log" 22 | ) 23 | 24 | const ( 25 | // Offsets within a packet header. 26 | packetID = 0 27 | packetSize = 8 28 | packetReserved = 12 29 | 30 | sizeOfPacketHeader = 16 31 | 32 | // Offsets with a buffer descriptor 33 | bufferOffset = 0 34 | bufferSize = 8 35 | 36 | sizeOfBufferDescriptor = 12 37 | ) 38 | 39 | // TxBuffer is the descriptor of a transmit buffer. 40 | type TxBuffer struct { 41 | Next *TxBuffer 42 | Offset uint64 43 | Size uint32 44 | } 45 | 46 | // Tx is a transmit queue. It is implemented with one tx and one rx pipe: the 47 | // tx pipe is used to request the transmission of packets, while the rx pipe 48 | // is used to receive which transmissions have completed. 49 | // 50 | // This struct is thread-compatible. 51 | type Tx struct { 52 | tx pipe.Tx 53 | rx pipe.Rx 54 | } 55 | 56 | // Init initializes the transmit queue with the given pipes. 57 | func (t *Tx) Init(tx, rx []byte) { 58 | t.tx.Init(tx) 59 | t.rx.Init(rx) 60 | } 61 | 62 | // Enqueue queues the given linked list of buffers for transmission as one 63 | // packet. While it is queued, the caller must not modify them. 64 | func (t *Tx) Enqueue(id uint64, totalDataLen, bufferCount uint32, buffer *TxBuffer) bool { 65 | // Reserve room in the tx pipe. 66 | totalLen := sizeOfPacketHeader + uint64(bufferCount)*sizeOfBufferDescriptor 67 | 68 | b := t.tx.Push(totalLen) 69 | if b == nil { 70 | return false 71 | } 72 | 73 | // Initialize the packet and buffer descriptors. 74 | binary.LittleEndian.PutUint64(b[packetID:], id) 75 | binary.LittleEndian.PutUint32(b[packetSize:], totalDataLen) 76 | binary.LittleEndian.PutUint32(b[packetReserved:], 0) 77 | 78 | offset := sizeOfPacketHeader 79 | for i := bufferCount; i != 0; i-- { 80 | binary.LittleEndian.PutUint64(b[offset+bufferOffset:], buffer.Offset) 81 | binary.LittleEndian.PutUint32(b[offset+bufferSize:], buffer.Size) 82 | offset += sizeOfBufferDescriptor 83 | buffer = buffer.Next 84 | } 85 | 86 | t.tx.Flush() 87 | 88 | return true 89 | } 90 | 91 | // CompletedPacket returns the id of the last completed transmission. The 92 | // returned id, if any, refers to a value passed on a previous call to 93 | // Enqueue(). 94 | func (t *Tx) CompletedPacket() (id uint64, ok bool) { 95 | for { 96 | b := t.rx.Pull() 97 | if b == nil { 98 | return 0, false 99 | } 100 | 101 | if len(b) != 8 { 102 | t.rx.Flush() 103 | log.Printf("Ignoring completed packet: size (%v) is less than expected (%v)", len(b), 8) 104 | continue 105 | } 106 | 107 | v := binary.LittleEndian.Uint64(b) 108 | 109 | t.rx.Flush() 110 | 111 | return v, true 112 | } 113 | } 114 | 115 | // Bytes returns the byte slices on which the queue operates. 116 | func (t *Tx) Bytes() (tx, rx []byte) { 117 | return t.tx.Bytes(), t.rx.Bytes() 118 | } 119 | 120 | // TxPacketInfo holds information about a packet sent on a tx queue. 121 | type TxPacketInfo struct { 122 | ID uint64 123 | Size uint32 124 | Reserved uint32 125 | BufferCount int 126 | } 127 | 128 | // DecodeTxPacketHeader decodes the header of a packet sent over a tx queue. 129 | func DecodeTxPacketHeader(b []byte) TxPacketInfo { 130 | return TxPacketInfo{ 131 | ID: binary.LittleEndian.Uint64(b[packetID:]), 132 | Size: binary.LittleEndian.Uint32(b[packetSize:]), 133 | Reserved: binary.LittleEndian.Uint32(b[packetReserved:]), 134 | BufferCount: (len(b) - sizeOfPacketHeader) / sizeOfBufferDescriptor, 135 | } 136 | } 137 | 138 | // DecodeTxBufferHeader decodes the header of the i-th buffer of a packet sent 139 | // over a tx queue. 140 | func DecodeTxBufferHeader(b []byte, i int) TxBuffer { 141 | b = b[sizeOfPacketHeader+i*sizeOfBufferDescriptor:] 142 | return TxBuffer{ 143 | Offset: binary.LittleEndian.Uint64(b[bufferOffset:]), 144 | Size: binary.LittleEndian.Uint32(b[bufferSize:]), 145 | } 146 | } 147 | 148 | // EncodeTxCompletion encodes a tx completion header. 149 | func EncodeTxCompletion(b []byte, id uint64) { 150 | binary.LittleEndian.PutUint64(b, id) 151 | } 152 | -------------------------------------------------------------------------------- /tcpip/link/sharedmem/rx.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux 16 | 17 | package sharedmem 18 | 19 | import ( 20 | "sync/atomic" 21 | "syscall" 22 | 23 | "github.com/google/netstack/tcpip/link/rawfile" 24 | "github.com/google/netstack/tcpip/link/sharedmem/queue" 25 | ) 26 | 27 | // rx holds all state associated with an rx queue. 28 | type rx struct { 29 | data []byte 30 | sharedData []byte 31 | q queue.Rx 32 | eventFD int 33 | } 34 | 35 | // init initializes all state needed by the rx queue based on the information 36 | // provided. 37 | // 38 | // The caller always retains ownership of all file descriptors passed in. The 39 | // queue implementation will duplicate any that it may need in the future. 40 | func (r *rx) init(mtu uint32, c *QueueConfig) error { 41 | // Map in all buffers. 42 | txPipe, err := getBuffer(c.TxPipeFD) 43 | if err != nil { 44 | return err 45 | } 46 | 47 | rxPipe, err := getBuffer(c.RxPipeFD) 48 | if err != nil { 49 | syscall.Munmap(txPipe) 50 | return err 51 | } 52 | 53 | data, err := getBuffer(c.DataFD) 54 | if err != nil { 55 | syscall.Munmap(txPipe) 56 | syscall.Munmap(rxPipe) 57 | return err 58 | } 59 | 60 | sharedData, err := getBuffer(c.SharedDataFD) 61 | if err != nil { 62 | syscall.Munmap(txPipe) 63 | syscall.Munmap(rxPipe) 64 | syscall.Munmap(data) 65 | return err 66 | } 67 | 68 | // Duplicate the eventFD so that caller can close it but we can still 69 | // use it. 70 | efd, err := syscall.Dup(c.EventFD) 71 | if err != nil { 72 | syscall.Munmap(txPipe) 73 | syscall.Munmap(rxPipe) 74 | syscall.Munmap(data) 75 | syscall.Munmap(sharedData) 76 | return err 77 | } 78 | 79 | // Set the eventfd as non-blocking. 80 | if err := syscall.SetNonblock(efd, true); err != nil { 81 | syscall.Munmap(txPipe) 82 | syscall.Munmap(rxPipe) 83 | syscall.Munmap(data) 84 | syscall.Munmap(sharedData) 85 | syscall.Close(efd) 86 | return err 87 | } 88 | 89 | // Initialize state based on buffers. 90 | r.q.Init(txPipe, rxPipe, sharedDataPointer(sharedData)) 91 | r.data = data 92 | r.eventFD = efd 93 | r.sharedData = sharedData 94 | 95 | return nil 96 | } 97 | 98 | // cleanup releases all resources allocated during init(). It must only be 99 | // called if init() has previously succeeded. 100 | func (r *rx) cleanup() { 101 | a, b := r.q.Bytes() 102 | syscall.Munmap(a) 103 | syscall.Munmap(b) 104 | 105 | syscall.Munmap(r.data) 106 | syscall.Munmap(r.sharedData) 107 | syscall.Close(r.eventFD) 108 | } 109 | 110 | // postAndReceive posts the provided buffers (if any), and then tries to read 111 | // from the receive queue. 112 | // 113 | // Capacity permitting, it reuses the posted buffer slice to store the buffers 114 | // that were read as well. 115 | // 116 | // This function will block if there aren't any available packets. 117 | func (r *rx) postAndReceive(b []queue.RxBuffer, stopRequested *uint32) ([]queue.RxBuffer, uint32) { 118 | // Post the buffers first. If we cannot post, sleep until we can. We 119 | // never post more than will fit concurrently, so it's safe to wait 120 | // until enough room is available. 121 | if len(b) != 0 && !r.q.PostBuffers(b) { 122 | r.q.EnableNotification() 123 | for !r.q.PostBuffers(b) { 124 | var tmp [8]byte 125 | rawfile.BlockingRead(r.eventFD, tmp[:]) 126 | if atomic.LoadUint32(stopRequested) != 0 { 127 | r.q.DisableNotification() 128 | return nil, 0 129 | } 130 | } 131 | r.q.DisableNotification() 132 | } 133 | 134 | // Read the next set of descriptors. 135 | b, n := r.q.Dequeue(b[:0]) 136 | if len(b) != 0 { 137 | return b, n 138 | } 139 | 140 | // Data isn't immediately available. Enable eventfd notifications. 141 | r.q.EnableNotification() 142 | for { 143 | b, n = r.q.Dequeue(b) 144 | if len(b) != 0 { 145 | break 146 | } 147 | 148 | // Wait for notification. 149 | var tmp [8]byte 150 | rawfile.BlockingRead(r.eventFD, tmp[:]) 151 | if atomic.LoadUint32(stopRequested) != 0 { 152 | r.q.DisableNotification() 153 | return nil, 0 154 | } 155 | } 156 | r.q.DisableNotification() 157 | 158 | return b, n 159 | } 160 | -------------------------------------------------------------------------------- /tcpip/link/sharedmem/sharedmem_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package sharedmem 16 | 17 | import ( 18 | "unsafe" 19 | ) 20 | 21 | // sharedDataPointer converts the shared data slice into a pointer so that it 22 | // can be used in atomic operations. 23 | func sharedDataPointer(sharedData []byte) *uint32 { 24 | return (*uint32)(unsafe.Pointer(&sharedData[0:4][0])) 25 | } 26 | -------------------------------------------------------------------------------- /tcpip/link/sniffer/pcap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package sniffer 16 | 17 | import "time" 18 | 19 | type pcapHeader struct { 20 | // MagicNumber is the file magic number. 21 | MagicNumber uint32 22 | 23 | // VersionMajor is the major version number. 24 | VersionMajor uint16 25 | 26 | // VersionMinor is the minor version number. 27 | VersionMinor uint16 28 | 29 | // Thiszone is the GMT to local correction. 30 | Thiszone int32 31 | 32 | // Sigfigs is the accuracy of timestamps. 33 | Sigfigs uint32 34 | 35 | // Snaplen is the max length of captured packets, in octets. 36 | Snaplen uint32 37 | 38 | // Network is the data link type. 39 | Network uint32 40 | } 41 | 42 | const pcapPacketHeaderLen = 16 43 | 44 | type pcapPacketHeader struct { 45 | // Seconds is the timestamp seconds. 46 | Seconds uint32 47 | 48 | // Microseconds is the timestamp microseconds. 49 | Microseconds uint32 50 | 51 | // IncludedLength is the number of octets of packet saved in file. 52 | IncludedLength uint32 53 | 54 | // OriginalLength is the actual length of packet. 55 | OriginalLength uint32 56 | } 57 | 58 | func newPCAPPacketHeader(incLen, orgLen uint32) pcapPacketHeader { 59 | now := time.Now() 60 | return pcapPacketHeader{ 61 | Seconds: uint32(now.Unix()), 62 | Microseconds: uint32(now.Nanosecond() / 1000), 63 | IncludedLength: incLen, 64 | OriginalLength: orgLen, 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /tcpip/link/tun/tun_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build linux 16 | 17 | // Package tun contains methods to open TAP and TUN devices. 18 | package tun 19 | 20 | import ( 21 | "syscall" 22 | "unsafe" 23 | ) 24 | 25 | // Open opens the specified TUN device, sets it to non-blocking mode, and 26 | // returns its file descriptor. 27 | func Open(name string) (int, error) { 28 | return open(name, syscall.IFF_TUN|syscall.IFF_NO_PI) 29 | } 30 | 31 | // OpenTAP opens the specified TAP device, sets it to non-blocking mode, and 32 | // returns its file descriptor. 33 | func OpenTAP(name string) (int, error) { 34 | return open(name, syscall.IFF_TAP|syscall.IFF_NO_PI) 35 | } 36 | 37 | func open(name string, flags uint16) (int, error) { 38 | fd, err := syscall.Open("/dev/net/tun", syscall.O_RDWR, 0) 39 | if err != nil { 40 | return -1, err 41 | } 42 | 43 | var ifr struct { 44 | name [16]byte 45 | flags uint16 46 | _ [22]byte 47 | } 48 | 49 | copy(ifr.name[:], name) 50 | ifr.flags = flags 51 | _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETIFF, uintptr(unsafe.Pointer(&ifr))) 52 | if errno != 0 { 53 | syscall.Close(fd) 54 | return -1, errno 55 | } 56 | 57 | if err = syscall.SetNonblock(fd, true); err != nil { 58 | syscall.Close(fd) 59 | return -1, err 60 | } 61 | 62 | return fd, nil 63 | } 64 | -------------------------------------------------------------------------------- /tcpip/link/waitable/waitable.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package waitable provides the implementation of data-link layer endpoints 16 | // that wrap other endpoints, and can wait for inflight calls to WritePacket or 17 | // DeliverNetworkPacket to finish (and new ones to be prevented). 18 | // 19 | // Waitable endpoints can be used in the networking stack by calling New(eID) to 20 | // create a new endpoint, where eID is the ID of the endpoint being wrapped, 21 | // and then passing it as an argument to Stack.CreateNIC(). 22 | package waitable 23 | 24 | import ( 25 | "github.com/google/netstack/gate" 26 | "github.com/google/netstack/tcpip" 27 | "github.com/google/netstack/tcpip/buffer" 28 | "github.com/google/netstack/tcpip/stack" 29 | ) 30 | 31 | // Endpoint is a waitable link-layer endpoint. 32 | type Endpoint struct { 33 | dispatchGate gate.Gate 34 | dispatcher stack.NetworkDispatcher 35 | 36 | writeGate gate.Gate 37 | lower stack.LinkEndpoint 38 | } 39 | 40 | // New creates a new waitable link-layer endpoint. It wraps around another 41 | // endpoint and allows the caller to block new write/dispatch calls and wait for 42 | // the inflight ones to finish before returning. 43 | func New(lower stack.LinkEndpoint) *Endpoint { 44 | return &Endpoint{ 45 | lower: lower, 46 | } 47 | } 48 | 49 | // DeliverNetworkPacket implements stack.NetworkDispatcher.DeliverNetworkPacket. 50 | // It is called by the link-layer endpoint being wrapped when a packet arrives, 51 | // and only forwards to the actual dispatcher if Wait or WaitDispatch haven't 52 | // been called. 53 | func (e *Endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { 54 | if !e.dispatchGate.Enter() { 55 | return 56 | } 57 | 58 | e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv) 59 | e.dispatchGate.Leave() 60 | } 61 | 62 | // Attach implements stack.LinkEndpoint.Attach. It saves the dispatcher and 63 | // registers with the lower endpoint as its dispatcher so that "e" is called 64 | // for inbound packets. 65 | func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) { 66 | e.dispatcher = dispatcher 67 | e.lower.Attach(e) 68 | } 69 | 70 | // IsAttached implements stack.LinkEndpoint.IsAttached. 71 | func (e *Endpoint) IsAttached() bool { 72 | return e.dispatcher != nil 73 | } 74 | 75 | // MTU implements stack.LinkEndpoint.MTU. It just forwards the request to the 76 | // lower endpoint. 77 | func (e *Endpoint) MTU() uint32 { 78 | return e.lower.MTU() 79 | } 80 | 81 | // Capabilities implements stack.LinkEndpoint.Capabilities. It just forwards the 82 | // request to the lower endpoint. 83 | func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities { 84 | return e.lower.Capabilities() 85 | } 86 | 87 | // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. It just 88 | // forwards the request to the lower endpoint. 89 | func (e *Endpoint) MaxHeaderLength() uint16 { 90 | return e.lower.MaxHeaderLength() 91 | } 92 | 93 | // LinkAddress implements stack.LinkEndpoint.LinkAddress. It just forwards the 94 | // request to the lower endpoint. 95 | func (e *Endpoint) LinkAddress() tcpip.LinkAddress { 96 | return e.lower.LinkAddress() 97 | } 98 | 99 | // WritePacket implements stack.LinkEndpoint.WritePacket. It is called by 100 | // higher-level protocols to write packets. It only forwards packets to the 101 | // lower endpoint if Wait or WaitWrite haven't been called. 102 | func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { 103 | if !e.writeGate.Enter() { 104 | return nil 105 | } 106 | 107 | err := e.lower.WritePacket(r, gso, hdr, payload, protocol) 108 | e.writeGate.Leave() 109 | return err 110 | } 111 | 112 | // WaitWrite prevents new calls to WritePacket from reaching the lower endpoint, 113 | // and waits for inflight ones to finish before returning. 114 | func (e *Endpoint) WaitWrite() { 115 | e.writeGate.Close() 116 | } 117 | 118 | // WaitDispatch prevents new calls to DeliverNetworkPacket from reaching the 119 | // actual dispatcher, and waits for inflight ones to finish before returning. 120 | func (e *Endpoint) WaitDispatch() { 121 | e.dispatchGate.Close() 122 | } 123 | 124 | // Wait implements stack.LinkEndpoint.Wait. 125 | func (e *Endpoint) Wait() {} 126 | -------------------------------------------------------------------------------- /tcpip/network/arp/arp_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package arp_test 16 | 17 | import ( 18 | "strconv" 19 | "testing" 20 | "time" 21 | 22 | "github.com/google/netstack/tcpip" 23 | "github.com/google/netstack/tcpip/buffer" 24 | "github.com/google/netstack/tcpip/header" 25 | "github.com/google/netstack/tcpip/link/channel" 26 | "github.com/google/netstack/tcpip/link/sniffer" 27 | "github.com/google/netstack/tcpip/network/arp" 28 | "github.com/google/netstack/tcpip/network/ipv4" 29 | "github.com/google/netstack/tcpip/stack" 30 | "github.com/google/netstack/tcpip/transport/icmp" 31 | ) 32 | 33 | const ( 34 | stackLinkAddr = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c") 35 | stackAddr1 = tcpip.Address("\x0a\x00\x00\x01") 36 | stackAddr2 = tcpip.Address("\x0a\x00\x00\x02") 37 | stackAddrBad = tcpip.Address("\x0a\x00\x00\x03") 38 | ) 39 | 40 | type testContext struct { 41 | t *testing.T 42 | linkEP *channel.Endpoint 43 | s *stack.Stack 44 | } 45 | 46 | func newTestContext(t *testing.T) *testContext { 47 | s := stack.New(stack.Options{ 48 | NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()}, 49 | TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4()}, 50 | }) 51 | 52 | const defaultMTU = 65536 53 | ep := channel.New(256, defaultMTU, stackLinkAddr) 54 | wep := stack.LinkEndpoint(ep) 55 | 56 | if testing.Verbose() { 57 | wep = sniffer.New(ep) 58 | } 59 | if err := s.CreateNIC(1, wep); err != nil { 60 | t.Fatalf("CreateNIC failed: %v", err) 61 | } 62 | 63 | if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr1); err != nil { 64 | t.Fatalf("AddAddress for ipv4 failed: %v", err) 65 | } 66 | if err := s.AddAddress(1, ipv4.ProtocolNumber, stackAddr2); err != nil { 67 | t.Fatalf("AddAddress for ipv4 failed: %v", err) 68 | } 69 | if err := s.AddAddress(1, arp.ProtocolNumber, arp.ProtocolAddress); err != nil { 70 | t.Fatalf("AddAddress for arp failed: %v", err) 71 | } 72 | 73 | s.SetRouteTable([]tcpip.Route{{ 74 | Destination: header.IPv4EmptySubnet, 75 | NIC: 1, 76 | }}) 77 | 78 | return &testContext{ 79 | t: t, 80 | s: s, 81 | linkEP: ep, 82 | } 83 | } 84 | 85 | func (c *testContext) cleanup() { 86 | close(c.linkEP.C) 87 | } 88 | 89 | func TestDirectRequest(t *testing.T) { 90 | c := newTestContext(t) 91 | defer c.cleanup() 92 | 93 | const senderMAC = "\x01\x02\x03\x04\x05\x06" 94 | const senderIPv4 = "\x0a\x00\x00\x02" 95 | 96 | v := make(buffer.View, header.ARPSize) 97 | h := header.ARP(v) 98 | h.SetIPv4OverEthernet() 99 | h.SetOp(header.ARPRequest) 100 | copy(h.HardwareAddressSender(), senderMAC) 101 | copy(h.ProtocolAddressSender(), senderIPv4) 102 | 103 | inject := func(addr tcpip.Address) { 104 | copy(h.ProtocolAddressTarget(), addr) 105 | c.linkEP.Inject(arp.ProtocolNumber, v.ToVectorisedView()) 106 | } 107 | 108 | for i, address := range []tcpip.Address{stackAddr1, stackAddr2} { 109 | t.Run(strconv.Itoa(i), func(t *testing.T) { 110 | inject(address) 111 | pkt := <-c.linkEP.C 112 | if pkt.Proto != arp.ProtocolNumber { 113 | t.Fatalf("expected ARP response, got network protocol number %d", pkt.Proto) 114 | } 115 | rep := header.ARP(pkt.Header) 116 | if !rep.IsValid() { 117 | t.Fatalf("invalid ARP response len(pkt.Header)=%d", len(pkt.Header)) 118 | } 119 | if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want { 120 | t.Errorf("got HardwareAddressSender = %s, want = %s", got, want) 121 | } 122 | if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want { 123 | t.Errorf("got ProtocolAddressSender = %s, want = %s", got, want) 124 | } 125 | if got, want := tcpip.LinkAddress(rep.HardwareAddressTarget()), tcpip.LinkAddress(h.HardwareAddressSender()); got != want { 126 | t.Errorf("got HardwareAddressTarget = %s, want = %s", got, want) 127 | } 128 | if got, want := tcpip.Address(rep.ProtocolAddressTarget()), tcpip.Address(h.ProtocolAddressSender()); got != want { 129 | t.Errorf("got ProtocolAddressTarget = %s, want = %s", got, want) 130 | } 131 | }) 132 | } 133 | 134 | inject(stackAddrBad) 135 | select { 136 | case pkt := <-c.linkEP.C: 137 | t.Errorf("stackAddrBad: unexpected packet sent, Proto=%v", pkt.Proto) 138 | case <-time.After(100 * time.Millisecond): 139 | // Sleep tests are gross, but this will only potentially flake 140 | // if there's a bug. If there is no bug this will reliably 141 | // succeed. 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/frag_heap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fragmentation 16 | 17 | import ( 18 | "container/heap" 19 | "fmt" 20 | 21 | "github.com/google/netstack/tcpip/buffer" 22 | ) 23 | 24 | type fragment struct { 25 | offset uint16 26 | vv buffer.VectorisedView 27 | } 28 | 29 | type fragHeap []fragment 30 | 31 | func (h *fragHeap) Len() int { 32 | return len(*h) 33 | } 34 | 35 | func (h *fragHeap) Less(i, j int) bool { 36 | return (*h)[i].offset < (*h)[j].offset 37 | } 38 | 39 | func (h *fragHeap) Swap(i, j int) { 40 | (*h)[i], (*h)[j] = (*h)[j], (*h)[i] 41 | } 42 | 43 | func (h *fragHeap) Push(x interface{}) { 44 | *h = append(*h, x.(fragment)) 45 | } 46 | 47 | func (h *fragHeap) Pop() interface{} { 48 | old := *h 49 | n := len(old) 50 | x := old[n-1] 51 | *h = old[:n-1] 52 | return x 53 | } 54 | 55 | // reassamble empties the heap and returns a VectorisedView 56 | // containing a reassambled version of the fragments inside the heap. 57 | func (h *fragHeap) reassemble() (buffer.VectorisedView, error) { 58 | curr := heap.Pop(h).(fragment) 59 | views := curr.vv.Views() 60 | size := curr.vv.Size() 61 | 62 | if curr.offset != 0 { 63 | return buffer.VectorisedView{}, fmt.Errorf("offset of the first packet is != 0 (%d)", curr.offset) 64 | } 65 | 66 | for h.Len() > 0 { 67 | curr := heap.Pop(h).(fragment) 68 | if int(curr.offset) < size { 69 | curr.vv.TrimFront(size - int(curr.offset)) 70 | } else if int(curr.offset) > size { 71 | return buffer.VectorisedView{}, fmt.Errorf("packet has a hole, expected offset %d, got %d", size, curr.offset) 72 | } 73 | size += curr.vv.Size() 74 | views = append(views, curr.vv.Views()...) 75 | } 76 | return buffer.NewVectorisedView(size, views), nil 77 | } 78 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/frag_heap_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fragmentation 16 | 17 | import ( 18 | "container/heap" 19 | "reflect" 20 | "testing" 21 | 22 | "github.com/google/netstack/tcpip/buffer" 23 | ) 24 | 25 | var reassambleTestCases = []struct { 26 | comment string 27 | in []fragment 28 | want buffer.VectorisedView 29 | }{ 30 | { 31 | comment: "Non-overlapping in-order", 32 | in: []fragment{ 33 | {offset: 0, vv: vv(1, "0")}, 34 | {offset: 1, vv: vv(1, "1")}, 35 | }, 36 | want: vv(2, "0", "1"), 37 | }, 38 | { 39 | comment: "Non-overlapping out-of-order", 40 | in: []fragment{ 41 | {offset: 1, vv: vv(1, "1")}, 42 | {offset: 0, vv: vv(1, "0")}, 43 | }, 44 | want: vv(2, "0", "1"), 45 | }, 46 | { 47 | comment: "Duplicated packets", 48 | in: []fragment{ 49 | {offset: 0, vv: vv(1, "0")}, 50 | {offset: 0, vv: vv(1, "0")}, 51 | }, 52 | want: vv(1, "0"), 53 | }, 54 | { 55 | comment: "Overlapping in-order", 56 | in: []fragment{ 57 | {offset: 0, vv: vv(2, "01")}, 58 | {offset: 1, vv: vv(2, "12")}, 59 | }, 60 | want: vv(3, "01", "2"), 61 | }, 62 | { 63 | comment: "Overlapping out-of-order", 64 | in: []fragment{ 65 | {offset: 1, vv: vv(2, "12")}, 66 | {offset: 0, vv: vv(2, "01")}, 67 | }, 68 | want: vv(3, "01", "2"), 69 | }, 70 | { 71 | comment: "Overlapping subset in-order", 72 | in: []fragment{ 73 | {offset: 0, vv: vv(3, "012")}, 74 | {offset: 1, vv: vv(1, "1")}, 75 | }, 76 | want: vv(3, "012"), 77 | }, 78 | { 79 | comment: "Overlapping subset out-of-order", 80 | in: []fragment{ 81 | {offset: 1, vv: vv(1, "1")}, 82 | {offset: 0, vv: vv(3, "012")}, 83 | }, 84 | want: vv(3, "012"), 85 | }, 86 | } 87 | 88 | func TestReassamble(t *testing.T) { 89 | for _, c := range reassambleTestCases { 90 | t.Run(c.comment, func(t *testing.T) { 91 | h := make(fragHeap, 0, 8) 92 | heap.Init(&h) 93 | for _, f := range c.in { 94 | heap.Push(&h, f) 95 | } 96 | got, err := h.reassemble() 97 | if err != nil { 98 | t.Fatal(err) 99 | } 100 | if !reflect.DeepEqual(got, c.want) { 101 | t.Errorf("got reassemble(%+v) = %v, want = %v", c.in, got, c.want) 102 | } 103 | }) 104 | } 105 | } 106 | 107 | func TestReassambleFailsForNonZeroOffset(t *testing.T) { 108 | h := make(fragHeap, 0, 8) 109 | heap.Init(&h) 110 | heap.Push(&h, fragment{offset: 1, vv: vv(1, "0")}) 111 | _, err := h.reassemble() 112 | if err == nil { 113 | t.Errorf("reassemble() did not fail when the first packet had offset != 0") 114 | } 115 | } 116 | 117 | func TestReassambleFailsForHoles(t *testing.T) { 118 | h := make(fragHeap, 0, 8) 119 | heap.Init(&h) 120 | heap.Push(&h, fragment{offset: 0, vv: vv(1, "0")}) 121 | heap.Push(&h, fragment{offset: 2, vv: vv(1, "1")}) 122 | _, err := h.reassemble() 123 | if err == nil { 124 | t.Errorf("reassemble() did not fail when there was a hole in the packet") 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/fragmentation.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package fragmentation contains the implementation of IP fragmentation. 16 | // It is based on RFC 791 and RFC 815. 17 | package fragmentation 18 | 19 | import ( 20 | "log" 21 | "sync" 22 | "time" 23 | 24 | "github.com/google/netstack/tcpip/buffer" 25 | ) 26 | 27 | // DefaultReassembleTimeout is based on the linux stack: net.ipv4.ipfrag_time. 28 | const DefaultReassembleTimeout = 30 * time.Second 29 | 30 | // HighFragThreshold is the threshold at which we start trimming old 31 | // fragmented packets. Linux uses a default value of 4 MB. See 32 | // net.ipv4.ipfrag_high_thresh for more information. 33 | const HighFragThreshold = 4 << 20 // 4MB 34 | 35 | // LowFragThreshold is the threshold we reach to when we start dropping 36 | // older fragmented packets. It's important that we keep enough room for newer 37 | // packets to be re-assembled. Hence, this needs to be lower than 38 | // HighFragThreshold enough. Linux uses a default value of 3 MB. See 39 | // net.ipv4.ipfrag_low_thresh for more information. 40 | const LowFragThreshold = 3 << 20 // 3MB 41 | 42 | // Fragmentation is the main structure that other modules 43 | // of the stack should use to implement IP Fragmentation. 44 | type Fragmentation struct { 45 | mu sync.Mutex 46 | highLimit int 47 | lowLimit int 48 | reassemblers map[uint32]*reassembler 49 | rList reassemblerList 50 | size int 51 | timeout time.Duration 52 | } 53 | 54 | // NewFragmentation creates a new Fragmentation. 55 | // 56 | // highMemoryLimit specifies the limit on the memory consumed 57 | // by the fragments stored by Fragmentation (overhead of internal data-structures 58 | // is not accounted). Fragments are dropped when the limit is reached. 59 | // 60 | // lowMemoryLimit specifies the limit on which we will reach by dropping 61 | // fragments after reaching highMemoryLimit. 62 | // 63 | // reassemblingTimeout specifies the maximum time allowed to reassemble a packet. 64 | // Fragments are lazily evicted only when a new a packet with an 65 | // already existing fragmentation-id arrives after the timeout. 66 | func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation { 67 | if lowMemoryLimit >= highMemoryLimit { 68 | lowMemoryLimit = highMemoryLimit 69 | } 70 | 71 | if lowMemoryLimit < 0 { 72 | lowMemoryLimit = 0 73 | } 74 | 75 | return &Fragmentation{ 76 | reassemblers: make(map[uint32]*reassembler), 77 | highLimit: highMemoryLimit, 78 | lowLimit: lowMemoryLimit, 79 | timeout: reassemblingTimeout, 80 | } 81 | } 82 | 83 | // Process processes an incoming fragment belonging to an ID 84 | // and returns a complete packet when all the packets belonging to that ID have been received. 85 | func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool) { 86 | f.mu.Lock() 87 | r, ok := f.reassemblers[id] 88 | if ok && r.tooOld(f.timeout) { 89 | // This is very likely to be an id-collision or someone performing a slow-rate attack. 90 | f.release(r) 91 | ok = false 92 | } 93 | if !ok { 94 | r = newReassembler(id) 95 | f.reassemblers[id] = r 96 | f.rList.PushFront(r) 97 | } 98 | f.mu.Unlock() 99 | 100 | res, done, consumed := r.process(first, last, more, vv) 101 | 102 | f.mu.Lock() 103 | f.size += consumed 104 | if done { 105 | f.release(r) 106 | } 107 | // Evict reassemblers if we are consuming more memory than highLimit until 108 | // we reach lowLimit. 109 | if f.size > f.highLimit { 110 | tail := f.rList.Back() 111 | for f.size > f.lowLimit && tail != nil { 112 | f.release(tail) 113 | tail = tail.Prev() 114 | } 115 | } 116 | f.mu.Unlock() 117 | return res, done 118 | } 119 | 120 | func (f *Fragmentation) release(r *reassembler) { 121 | // Before releasing a fragment we need to check if r is already marked as done. 122 | // Otherwise, we would delete it twice. 123 | if r.checkDoneOrMark() { 124 | return 125 | } 126 | 127 | delete(f.reassemblers, r.id) 128 | f.rList.Remove(r) 129 | f.size -= r.size 130 | if f.size < 0 { 131 | log.Printf("memory counter < 0 (%d), this is an accounting bug that requires investigation", f.size) 132 | f.size = 0 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/fragmentation_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fragmentation 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | "time" 21 | 22 | "github.com/google/netstack/tcpip/buffer" 23 | ) 24 | 25 | // vv is a helper to build VectorisedView from different strings. 26 | func vv(size int, pieces ...string) buffer.VectorisedView { 27 | views := make([]buffer.View, len(pieces)) 28 | for i, p := range pieces { 29 | views[i] = []byte(p) 30 | } 31 | 32 | return buffer.NewVectorisedView(size, views) 33 | } 34 | 35 | type processInput struct { 36 | id uint32 37 | first uint16 38 | last uint16 39 | more bool 40 | vv buffer.VectorisedView 41 | } 42 | 43 | type processOutput struct { 44 | vv buffer.VectorisedView 45 | done bool 46 | } 47 | 48 | var processTestCases = []struct { 49 | comment string 50 | in []processInput 51 | out []processOutput 52 | }{ 53 | { 54 | comment: "One ID", 55 | in: []processInput{ 56 | {id: 0, first: 0, last: 1, more: true, vv: vv(2, "01")}, 57 | {id: 0, first: 2, last: 3, more: false, vv: vv(2, "23")}, 58 | }, 59 | out: []processOutput{ 60 | {vv: buffer.VectorisedView{}, done: false}, 61 | {vv: vv(4, "01", "23"), done: true}, 62 | }, 63 | }, 64 | { 65 | comment: "Two IDs", 66 | in: []processInput{ 67 | {id: 0, first: 0, last: 1, more: true, vv: vv(2, "01")}, 68 | {id: 1, first: 0, last: 1, more: true, vv: vv(2, "ab")}, 69 | {id: 1, first: 2, last: 3, more: false, vv: vv(2, "cd")}, 70 | {id: 0, first: 2, last: 3, more: false, vv: vv(2, "23")}, 71 | }, 72 | out: []processOutput{ 73 | {vv: buffer.VectorisedView{}, done: false}, 74 | {vv: buffer.VectorisedView{}, done: false}, 75 | {vv: vv(4, "ab", "cd"), done: true}, 76 | {vv: vv(4, "01", "23"), done: true}, 77 | }, 78 | }, 79 | } 80 | 81 | func TestFragmentationProcess(t *testing.T) { 82 | for _, c := range processTestCases { 83 | t.Run(c.comment, func(t *testing.T) { 84 | f := NewFragmentation(1024, 512, DefaultReassembleTimeout) 85 | for i, in := range c.in { 86 | vv, done := f.Process(in.id, in.first, in.last, in.more, in.vv) 87 | if !reflect.DeepEqual(vv, c.out[i].vv) { 88 | t.Errorf("got Process(%d) = %+v, want = %+v", i, vv, c.out[i].vv) 89 | } 90 | if done != c.out[i].done { 91 | t.Errorf("got Process(%d) = %+v, want = %+v", i, done, c.out[i].done) 92 | } 93 | if c.out[i].done { 94 | if _, ok := f.reassemblers[in.id]; ok { 95 | t.Errorf("Process(%d) did not remove buffer from reassemblers", i) 96 | } 97 | for n := f.rList.Front(); n != nil; n = n.Next() { 98 | if n.id == in.id { 99 | t.Errorf("Process(%d) did not remove buffer from rList", i) 100 | } 101 | } 102 | } 103 | } 104 | }) 105 | } 106 | } 107 | 108 | func TestReassemblingTimeout(t *testing.T) { 109 | timeout := time.Millisecond 110 | f := NewFragmentation(1024, 512, timeout) 111 | // Send first fragment with id = 0, first = 0, last = 0, and more = true. 112 | f.Process(0, 0, 0, true, vv(1, "0")) 113 | // Sleep more than the timeout. 114 | time.Sleep(2 * timeout) 115 | // Send another fragment that completes a packet. 116 | // However, no packet should be reassembled because the fragment arrived after the timeout. 117 | _, done := f.Process(0, 1, 1, false, vv(1, "1")) 118 | if done { 119 | t.Errorf("Fragmentation does not respect the reassembling timeout.") 120 | } 121 | } 122 | 123 | func TestMemoryLimits(t *testing.T) { 124 | f := NewFragmentation(3, 1, DefaultReassembleTimeout) 125 | // Send first fragment with id = 0. 126 | f.Process(0, 0, 0, true, vv(1, "0")) 127 | // Send first fragment with id = 1. 128 | f.Process(1, 0, 0, true, vv(1, "1")) 129 | // Send first fragment with id = 2. 130 | f.Process(2, 0, 0, true, vv(1, "2")) 131 | 132 | // Send first fragment with id = 3. This should caused id = 0 and id = 1 to be 133 | // evicted. 134 | f.Process(3, 0, 0, true, vv(1, "3")) 135 | 136 | if _, ok := f.reassemblers[0]; ok { 137 | t.Errorf("Memory limits are not respected: id=0 has not been evicted.") 138 | } 139 | if _, ok := f.reassemblers[1]; ok { 140 | t.Errorf("Memory limits are not respected: id=1 has not been evicted.") 141 | } 142 | if _, ok := f.reassemblers[3]; !ok { 143 | t.Errorf("Implementation of memory limits is wrong: id=3 is not present.") 144 | } 145 | } 146 | 147 | func TestMemoryLimitsIgnoresDuplicates(t *testing.T) { 148 | f := NewFragmentation(1, 0, DefaultReassembleTimeout) 149 | // Send first fragment with id = 0. 150 | f.Process(0, 0, 0, true, vv(1, "0")) 151 | // Send the same packet again. 152 | f.Process(0, 0, 0, true, vv(1, "0")) 153 | 154 | got := f.size 155 | want := 1 156 | if got != want { 157 | t.Errorf("Wrong size, duplicates are not handled correctly: got=%d, want=%d.", got, want) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/reassembler.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fragmentation 16 | 17 | import ( 18 | "container/heap" 19 | "fmt" 20 | "math" 21 | "sync" 22 | "time" 23 | 24 | "github.com/google/netstack/tcpip/buffer" 25 | ) 26 | 27 | type hole struct { 28 | first uint16 29 | last uint16 30 | deleted bool 31 | } 32 | 33 | type reassembler struct { 34 | reassemblerEntry 35 | id uint32 36 | size int 37 | mu sync.Mutex 38 | holes []hole 39 | deleted int 40 | heap fragHeap 41 | done bool 42 | creationTime time.Time 43 | } 44 | 45 | func newReassembler(id uint32) *reassembler { 46 | r := &reassembler{ 47 | id: id, 48 | holes: make([]hole, 0, 16), 49 | deleted: 0, 50 | heap: make(fragHeap, 0, 8), 51 | creationTime: time.Now(), 52 | } 53 | r.holes = append(r.holes, hole{ 54 | first: 0, 55 | last: math.MaxUint16, 56 | deleted: false}) 57 | return r 58 | } 59 | 60 | // updateHoles updates the list of holes for an incoming fragment and 61 | // returns true iff the fragment filled at least part of an existing hole. 62 | func (r *reassembler) updateHoles(first, last uint16, more bool) bool { 63 | used := false 64 | for i := range r.holes { 65 | if r.holes[i].deleted || first > r.holes[i].last || last < r.holes[i].first { 66 | continue 67 | } 68 | used = true 69 | r.deleted++ 70 | r.holes[i].deleted = true 71 | if first > r.holes[i].first { 72 | r.holes = append(r.holes, hole{r.holes[i].first, first - 1, false}) 73 | } 74 | if last < r.holes[i].last && more { 75 | r.holes = append(r.holes, hole{last + 1, r.holes[i].last, false}) 76 | } 77 | } 78 | return used 79 | } 80 | 81 | func (r *reassembler) process(first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool, int) { 82 | r.mu.Lock() 83 | defer r.mu.Unlock() 84 | consumed := 0 85 | if r.done { 86 | // A concurrent goroutine might have already reassembled 87 | // the packet and emptied the heap while this goroutine 88 | // was waiting on the mutex. We don't have to do anything in this case. 89 | return buffer.VectorisedView{}, false, consumed 90 | } 91 | if r.updateHoles(first, last, more) { 92 | // We store the incoming packet only if it filled some holes. 93 | heap.Push(&r.heap, fragment{offset: first, vv: vv.Clone(nil)}) 94 | consumed = vv.Size() 95 | r.size += consumed 96 | } 97 | // Check if all the holes have been deleted and we are ready to reassamble. 98 | if r.deleted < len(r.holes) { 99 | return buffer.VectorisedView{}, false, consumed 100 | } 101 | res, err := r.heap.reassemble() 102 | if err != nil { 103 | panic(fmt.Sprintf("reassemble failed with: %v. There is probably a bug in the code handling the holes.", err)) 104 | } 105 | return res, true, consumed 106 | } 107 | 108 | func (r *reassembler) tooOld(timeout time.Duration) bool { 109 | return time.Now().Sub(r.creationTime) > timeout 110 | } 111 | 112 | func (r *reassembler) checkDoneOrMark() bool { 113 | r.mu.Lock() 114 | prev := r.done 115 | r.done = true 116 | r.mu.Unlock() 117 | return prev 118 | } 119 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/reassembler_list.go: -------------------------------------------------------------------------------- 1 | package fragmentation 2 | 3 | // ElementMapper provides an identity mapping by default. 4 | // 5 | // This can be replaced to provide a struct that maps elements to linker 6 | // objects, if they are not the same. An ElementMapper is not typically 7 | // required if: Linker is left as is, Element is left as is, or Linker and 8 | // Element are the same type. 9 | type reassemblerElementMapper struct{} 10 | 11 | // linkerFor maps an Element to a Linker. 12 | // 13 | // This default implementation should be inlined. 14 | // 15 | //go:nosplit 16 | func (reassemblerElementMapper) linkerFor(elem *reassembler) *reassembler { return elem } 17 | 18 | // List is an intrusive list. Entries can be added to or removed from the list 19 | // in O(1) time and with no additional memory allocations. 20 | // 21 | // The zero value for List is an empty list ready to use. 22 | // 23 | // To iterate over a list (where l is a List): 24 | // for e := l.Front(); e != nil; e = e.Next() { 25 | // // do something with e. 26 | // } 27 | // 28 | // +stateify savable 29 | type reassemblerList struct { 30 | head *reassembler 31 | tail *reassembler 32 | } 33 | 34 | // Reset resets list l to the empty state. 35 | func (l *reassemblerList) Reset() { 36 | l.head = nil 37 | l.tail = nil 38 | } 39 | 40 | // Empty returns true iff the list is empty. 41 | func (l *reassemblerList) Empty() bool { 42 | return l.head == nil 43 | } 44 | 45 | // Front returns the first element of list l or nil. 46 | func (l *reassemblerList) Front() *reassembler { 47 | return l.head 48 | } 49 | 50 | // Back returns the last element of list l or nil. 51 | func (l *reassemblerList) Back() *reassembler { 52 | return l.tail 53 | } 54 | 55 | // PushFront inserts the element e at the front of list l. 56 | func (l *reassemblerList) PushFront(e *reassembler) { 57 | reassemblerElementMapper{}.linkerFor(e).SetNext(l.head) 58 | reassemblerElementMapper{}.linkerFor(e).SetPrev(nil) 59 | 60 | if l.head != nil { 61 | reassemblerElementMapper{}.linkerFor(l.head).SetPrev(e) 62 | } else { 63 | l.tail = e 64 | } 65 | 66 | l.head = e 67 | } 68 | 69 | // PushBack inserts the element e at the back of list l. 70 | func (l *reassemblerList) PushBack(e *reassembler) { 71 | reassemblerElementMapper{}.linkerFor(e).SetNext(nil) 72 | reassemblerElementMapper{}.linkerFor(e).SetPrev(l.tail) 73 | 74 | if l.tail != nil { 75 | reassemblerElementMapper{}.linkerFor(l.tail).SetNext(e) 76 | } else { 77 | l.head = e 78 | } 79 | 80 | l.tail = e 81 | } 82 | 83 | // PushBackList inserts list m at the end of list l, emptying m. 84 | func (l *reassemblerList) PushBackList(m *reassemblerList) { 85 | if l.head == nil { 86 | l.head = m.head 87 | l.tail = m.tail 88 | } else if m.head != nil { 89 | reassemblerElementMapper{}.linkerFor(l.tail).SetNext(m.head) 90 | reassemblerElementMapper{}.linkerFor(m.head).SetPrev(l.tail) 91 | 92 | l.tail = m.tail 93 | } 94 | 95 | m.head = nil 96 | m.tail = nil 97 | } 98 | 99 | // InsertAfter inserts e after b. 100 | func (l *reassemblerList) InsertAfter(b, e *reassembler) { 101 | a := reassemblerElementMapper{}.linkerFor(b).Next() 102 | reassemblerElementMapper{}.linkerFor(e).SetNext(a) 103 | reassemblerElementMapper{}.linkerFor(e).SetPrev(b) 104 | reassemblerElementMapper{}.linkerFor(b).SetNext(e) 105 | 106 | if a != nil { 107 | reassemblerElementMapper{}.linkerFor(a).SetPrev(e) 108 | } else { 109 | l.tail = e 110 | } 111 | } 112 | 113 | // InsertBefore inserts e before a. 114 | func (l *reassemblerList) InsertBefore(a, e *reassembler) { 115 | b := reassemblerElementMapper{}.linkerFor(a).Prev() 116 | reassemblerElementMapper{}.linkerFor(e).SetNext(a) 117 | reassemblerElementMapper{}.linkerFor(e).SetPrev(b) 118 | reassemblerElementMapper{}.linkerFor(a).SetPrev(e) 119 | 120 | if b != nil { 121 | reassemblerElementMapper{}.linkerFor(b).SetNext(e) 122 | } else { 123 | l.head = e 124 | } 125 | } 126 | 127 | // Remove removes e from l. 128 | func (l *reassemblerList) Remove(e *reassembler) { 129 | prev := reassemblerElementMapper{}.linkerFor(e).Prev() 130 | next := reassemblerElementMapper{}.linkerFor(e).Next() 131 | 132 | if prev != nil { 133 | reassemblerElementMapper{}.linkerFor(prev).SetNext(next) 134 | } else { 135 | l.head = next 136 | } 137 | 138 | if next != nil { 139 | reassemblerElementMapper{}.linkerFor(next).SetPrev(prev) 140 | } else { 141 | l.tail = prev 142 | } 143 | } 144 | 145 | // Entry is a default implementation of Linker. Users can add anonymous fields 146 | // of this type to their structs to make them automatically implement the 147 | // methods needed by List. 148 | // 149 | // +stateify savable 150 | type reassemblerEntry struct { 151 | next *reassembler 152 | prev *reassembler 153 | } 154 | 155 | // Next returns the entry that follows e in the list. 156 | func (e *reassemblerEntry) Next() *reassembler { 157 | return e.next 158 | } 159 | 160 | // Prev returns the entry that precedes e in the list. 161 | func (e *reassemblerEntry) Prev() *reassembler { 162 | return e.prev 163 | } 164 | 165 | // SetNext assigns 'entry' as the entry that follows e in the list. 166 | func (e *reassemblerEntry) SetNext(elem *reassembler) { 167 | e.next = elem 168 | } 169 | 170 | // SetPrev assigns 'entry' as the entry that precedes e in the list. 171 | func (e *reassemblerEntry) SetPrev(elem *reassembler) { 172 | e.prev = elem 173 | } 174 | -------------------------------------------------------------------------------- /tcpip/network/fragmentation/reassembler_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fragmentation 16 | 17 | import ( 18 | "math" 19 | "reflect" 20 | "testing" 21 | ) 22 | 23 | type updateHolesInput struct { 24 | first uint16 25 | last uint16 26 | more bool 27 | } 28 | 29 | var holesTestCases = []struct { 30 | comment string 31 | in []updateHolesInput 32 | want []hole 33 | }{ 34 | { 35 | comment: "No fragments. Expected holes: {[0 -> inf]}.", 36 | in: []updateHolesInput{}, 37 | want: []hole{{first: 0, last: math.MaxUint16, deleted: false}}, 38 | }, 39 | { 40 | comment: "One fragment at beginning. Expected holes: {[2, inf]}.", 41 | in: []updateHolesInput{{first: 0, last: 1, more: true}}, 42 | want: []hole{ 43 | {first: 0, last: math.MaxUint16, deleted: true}, 44 | {first: 2, last: math.MaxUint16, deleted: false}, 45 | }, 46 | }, 47 | { 48 | comment: "One fragment in the middle. Expected holes: {[0, 0], [3, inf]}.", 49 | in: []updateHolesInput{{first: 1, last: 2, more: true}}, 50 | want: []hole{ 51 | {first: 0, last: math.MaxUint16, deleted: true}, 52 | {first: 0, last: 0, deleted: false}, 53 | {first: 3, last: math.MaxUint16, deleted: false}, 54 | }, 55 | }, 56 | { 57 | comment: "One fragment at the end. Expected holes: {[0, 0]}.", 58 | in: []updateHolesInput{{first: 1, last: 2, more: false}}, 59 | want: []hole{ 60 | {first: 0, last: math.MaxUint16, deleted: true}, 61 | {first: 0, last: 0, deleted: false}, 62 | }, 63 | }, 64 | { 65 | comment: "One fragment completing a packet. Expected holes: {}.", 66 | in: []updateHolesInput{{first: 0, last: 1, more: false}}, 67 | want: []hole{ 68 | {first: 0, last: math.MaxUint16, deleted: true}, 69 | }, 70 | }, 71 | { 72 | comment: "Two non-overlapping fragments completing a packet. Expected holes: {}.", 73 | in: []updateHolesInput{ 74 | {first: 0, last: 1, more: true}, 75 | {first: 2, last: 3, more: false}, 76 | }, 77 | want: []hole{ 78 | {first: 0, last: math.MaxUint16, deleted: true}, 79 | {first: 2, last: math.MaxUint16, deleted: true}, 80 | }, 81 | }, 82 | { 83 | comment: "Two overlapping fragments completing a packet. Expected holes: {}.", 84 | in: []updateHolesInput{ 85 | {first: 0, last: 2, more: true}, 86 | {first: 2, last: 3, more: false}, 87 | }, 88 | want: []hole{ 89 | {first: 0, last: math.MaxUint16, deleted: true}, 90 | {first: 3, last: math.MaxUint16, deleted: true}, 91 | }, 92 | }, 93 | } 94 | 95 | func TestUpdateHoles(t *testing.T) { 96 | for _, c := range holesTestCases { 97 | r := newReassembler(0) 98 | for _, i := range c.in { 99 | r.updateHoles(i.first, i.last, i.more) 100 | } 101 | if !reflect.DeepEqual(r.holes, c.want) { 102 | t.Errorf("Test \"%s\" produced unexepetced holes. Got %v. Want %v", c.comment, r.holes, c.want) 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /tcpip/network/hash/hash.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package hash contains utility functions for hashing. 16 | package hash 17 | 18 | import ( 19 | "encoding/binary" 20 | 21 | "github.com/google/netstack/rand" 22 | "github.com/google/netstack/tcpip/header" 23 | ) 24 | 25 | var hashIV = RandN32(1)[0] 26 | 27 | // RandN32 generates a slice of n cryptographic random 32-bit numbers. 28 | func RandN32(n int) []uint32 { 29 | b := make([]byte, 4*n) 30 | if _, err := rand.Read(b); err != nil { 31 | panic("unable to get random numbers: " + err.Error()) 32 | } 33 | r := make([]uint32, n) 34 | for i := range r { 35 | r[i] = binary.LittleEndian.Uint32(b[4*i : (4*i + 4)]) 36 | } 37 | return r 38 | } 39 | 40 | // Hash3Words calculates the Jenkins hash of 3 32-bit words. This is adapted 41 | // from linux. 42 | func Hash3Words(a, b, c, initval uint32) uint32 { 43 | const iv = 0xdeadbeef + (3 << 2) 44 | initval += iv 45 | 46 | a += initval 47 | b += initval 48 | c += initval 49 | 50 | c ^= b 51 | c -= rol32(b, 14) 52 | a ^= c 53 | a -= rol32(c, 11) 54 | b ^= a 55 | b -= rol32(a, 25) 56 | c ^= b 57 | c -= rol32(b, 16) 58 | a ^= c 59 | a -= rol32(c, 4) 60 | b ^= a 61 | b -= rol32(a, 14) 62 | c ^= b 63 | c -= rol32(b, 24) 64 | 65 | return c 66 | } 67 | 68 | // IPv4FragmentHash computes the hash of the IPv4 fragment as suggested in RFC 791. 69 | func IPv4FragmentHash(h header.IPv4) uint32 { 70 | x := uint32(h.ID())<<16 | uint32(h.Protocol()) 71 | t := h.SourceAddress() 72 | y := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24 73 | t = h.DestinationAddress() 74 | z := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24 75 | return Hash3Words(x, y, z, hashIV) 76 | } 77 | 78 | // IPv6FragmentHash computes the hash of the ipv6 fragment. 79 | // Unlike IPv4, the protocol is not used to compute the hash. 80 | // RFC 2640 (sec 4.5) is not very sharp on this aspect. 81 | // As a reference, also Linux ignores the protocol to compute 82 | // the hash (inet6_hash_frag). 83 | func IPv6FragmentHash(h header.IPv6, f header.IPv6Fragment) uint32 { 84 | t := h.SourceAddress() 85 | y := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24 86 | t = h.DestinationAddress() 87 | z := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24 88 | return Hash3Words(f.ID(), y, z, hashIV) 89 | } 90 | 91 | func rol32(v, shift uint32) uint32 { 92 | return (v << shift) | (v >> ((-shift) & 31)) 93 | } 94 | -------------------------------------------------------------------------------- /tcpip/seqnum/seqnum.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package seqnum defines the types and methods for TCP sequence numbers such 16 | // that they fit in 32-bit words and work properly when overflows occur. 17 | package seqnum 18 | 19 | // Value represents the value of a sequence number. 20 | type Value uint32 21 | 22 | // Size represents the size (length) of a sequence number window. 23 | type Size uint32 24 | 25 | // LessThan checks if v is before w, i.e., v < w. 26 | func (v Value) LessThan(w Value) bool { 27 | return int32(v-w) < 0 28 | } 29 | 30 | // LessThanEq returns true if v==w or v is before i.e., v < w. 31 | func (v Value) LessThanEq(w Value) bool { 32 | if v == w { 33 | return true 34 | } 35 | return v.LessThan(w) 36 | } 37 | 38 | // InRange checks if v is in the range [a,b), i.e., a <= v < b. 39 | func (v Value) InRange(a, b Value) bool { 40 | return v-a < b-a 41 | } 42 | 43 | // InWindow checks if v is in the window that starts at 'first' and spans 'size' 44 | // sequence numbers. 45 | func (v Value) InWindow(first Value, size Size) bool { 46 | return v.InRange(first, first.Add(size)) 47 | } 48 | 49 | // Overlap checks if the window [a,a+b) overlaps with the window [x, x+y). 50 | func Overlap(a Value, b Size, x Value, y Size) bool { 51 | return a.LessThan(x.Add(y)) && x.LessThan(a.Add(b)) 52 | } 53 | 54 | // Add calculates the sequence number following the [v, v+s) window. 55 | func (v Value) Add(s Size) Value { 56 | return v + Value(s) 57 | } 58 | 59 | // Size calculates the size of the window defined by [v, w). 60 | func (v Value) Size(w Value) Size { 61 | return Size(w - v) 62 | } 63 | 64 | // UpdateForward updates v such that it becomes v + s. 65 | func (v *Value) UpdateForward(s Size) { 66 | *v += Value(s) 67 | } 68 | -------------------------------------------------------------------------------- /tcpip/stack/icmp_rate_limit.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package stack 16 | 17 | import ( 18 | "golang.org/x/time/rate" 19 | ) 20 | 21 | const ( 22 | // icmpLimit is the default maximum number of ICMP messages permitted by this 23 | // rate limiter. 24 | icmpLimit = 1000 25 | 26 | // icmpBurst is the default number of ICMP messages that can be sent in a single 27 | // burst. 28 | icmpBurst = 50 29 | ) 30 | 31 | // ICMPRateLimiter is a global rate limiter that controls the generation of 32 | // ICMP messages generated by the stack. 33 | type ICMPRateLimiter struct { 34 | *rate.Limiter 35 | } 36 | 37 | // NewICMPRateLimiter returns a global rate limiter for controlling the rate 38 | // at which ICMP messages are generated by the stack. 39 | func NewICMPRateLimiter() *ICMPRateLimiter { 40 | return &ICMPRateLimiter{Limiter: rate.NewLimiter(icmpLimit, icmpBurst)} 41 | } 42 | -------------------------------------------------------------------------------- /tcpip/time.s: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Empty assembly file so empty func definitions work. 16 | -------------------------------------------------------------------------------- /tcpip/time_unsafe.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // +build go1.9 16 | // +build !go1.14 17 | 18 | // Check go:linkname function signatures when updating Go version. 19 | 20 | package tcpip 21 | 22 | import ( 23 | _ "time" // Used with go:linkname. 24 | _ "unsafe" // Required for go:linkname. 25 | ) 26 | 27 | // StdClock implements Clock with the time package. 28 | type StdClock struct{} 29 | 30 | var _ Clock = (*StdClock)(nil) 31 | 32 | //go:linkname now time.now 33 | func now() (sec int64, nsec int32, mono int64) 34 | 35 | // NowNanoseconds implements Clock.NowNanoseconds. 36 | func (*StdClock) NowNanoseconds() int64 { 37 | sec, nsec, _ := now() 38 | return sec*1e9 + int64(nsec) 39 | } 40 | 41 | // NowMonotonic implements Clock.NowMonotonic. 42 | func (*StdClock) NowMonotonic() int64 { 43 | _, _, mono := now() 44 | return mono 45 | } 46 | -------------------------------------------------------------------------------- /tcpip/transport/icmp/icmp_packet_list.go: -------------------------------------------------------------------------------- 1 | package icmp 2 | 3 | // ElementMapper provides an identity mapping by default. 4 | // 5 | // This can be replaced to provide a struct that maps elements to linker 6 | // objects, if they are not the same. An ElementMapper is not typically 7 | // required if: Linker is left as is, Element is left as is, or Linker and 8 | // Element are the same type. 9 | type icmpPacketElementMapper struct{} 10 | 11 | // linkerFor maps an Element to a Linker. 12 | // 13 | // This default implementation should be inlined. 14 | // 15 | //go:nosplit 16 | func (icmpPacketElementMapper) linkerFor(elem *icmpPacket) *icmpPacket { return elem } 17 | 18 | // List is an intrusive list. Entries can be added to or removed from the list 19 | // in O(1) time and with no additional memory allocations. 20 | // 21 | // The zero value for List is an empty list ready to use. 22 | // 23 | // To iterate over a list (where l is a List): 24 | // for e := l.Front(); e != nil; e = e.Next() { 25 | // // do something with e. 26 | // } 27 | // 28 | // +stateify savable 29 | type icmpPacketList struct { 30 | head *icmpPacket 31 | tail *icmpPacket 32 | } 33 | 34 | // Reset resets list l to the empty state. 35 | func (l *icmpPacketList) Reset() { 36 | l.head = nil 37 | l.tail = nil 38 | } 39 | 40 | // Empty returns true iff the list is empty. 41 | func (l *icmpPacketList) Empty() bool { 42 | return l.head == nil 43 | } 44 | 45 | // Front returns the first element of list l or nil. 46 | func (l *icmpPacketList) Front() *icmpPacket { 47 | return l.head 48 | } 49 | 50 | // Back returns the last element of list l or nil. 51 | func (l *icmpPacketList) Back() *icmpPacket { 52 | return l.tail 53 | } 54 | 55 | // PushFront inserts the element e at the front of list l. 56 | func (l *icmpPacketList) PushFront(e *icmpPacket) { 57 | icmpPacketElementMapper{}.linkerFor(e).SetNext(l.head) 58 | icmpPacketElementMapper{}.linkerFor(e).SetPrev(nil) 59 | 60 | if l.head != nil { 61 | icmpPacketElementMapper{}.linkerFor(l.head).SetPrev(e) 62 | } else { 63 | l.tail = e 64 | } 65 | 66 | l.head = e 67 | } 68 | 69 | // PushBack inserts the element e at the back of list l. 70 | func (l *icmpPacketList) PushBack(e *icmpPacket) { 71 | icmpPacketElementMapper{}.linkerFor(e).SetNext(nil) 72 | icmpPacketElementMapper{}.linkerFor(e).SetPrev(l.tail) 73 | 74 | if l.tail != nil { 75 | icmpPacketElementMapper{}.linkerFor(l.tail).SetNext(e) 76 | } else { 77 | l.head = e 78 | } 79 | 80 | l.tail = e 81 | } 82 | 83 | // PushBackList inserts list m at the end of list l, emptying m. 84 | func (l *icmpPacketList) PushBackList(m *icmpPacketList) { 85 | if l.head == nil { 86 | l.head = m.head 87 | l.tail = m.tail 88 | } else if m.head != nil { 89 | icmpPacketElementMapper{}.linkerFor(l.tail).SetNext(m.head) 90 | icmpPacketElementMapper{}.linkerFor(m.head).SetPrev(l.tail) 91 | 92 | l.tail = m.tail 93 | } 94 | 95 | m.head = nil 96 | m.tail = nil 97 | } 98 | 99 | // InsertAfter inserts e after b. 100 | func (l *icmpPacketList) InsertAfter(b, e *icmpPacket) { 101 | a := icmpPacketElementMapper{}.linkerFor(b).Next() 102 | icmpPacketElementMapper{}.linkerFor(e).SetNext(a) 103 | icmpPacketElementMapper{}.linkerFor(e).SetPrev(b) 104 | icmpPacketElementMapper{}.linkerFor(b).SetNext(e) 105 | 106 | if a != nil { 107 | icmpPacketElementMapper{}.linkerFor(a).SetPrev(e) 108 | } else { 109 | l.tail = e 110 | } 111 | } 112 | 113 | // InsertBefore inserts e before a. 114 | func (l *icmpPacketList) InsertBefore(a, e *icmpPacket) { 115 | b := icmpPacketElementMapper{}.linkerFor(a).Prev() 116 | icmpPacketElementMapper{}.linkerFor(e).SetNext(a) 117 | icmpPacketElementMapper{}.linkerFor(e).SetPrev(b) 118 | icmpPacketElementMapper{}.linkerFor(a).SetPrev(e) 119 | 120 | if b != nil { 121 | icmpPacketElementMapper{}.linkerFor(b).SetNext(e) 122 | } else { 123 | l.head = e 124 | } 125 | } 126 | 127 | // Remove removes e from l. 128 | func (l *icmpPacketList) Remove(e *icmpPacket) { 129 | prev := icmpPacketElementMapper{}.linkerFor(e).Prev() 130 | next := icmpPacketElementMapper{}.linkerFor(e).Next() 131 | 132 | if prev != nil { 133 | icmpPacketElementMapper{}.linkerFor(prev).SetNext(next) 134 | } else { 135 | l.head = next 136 | } 137 | 138 | if next != nil { 139 | icmpPacketElementMapper{}.linkerFor(next).SetPrev(prev) 140 | } else { 141 | l.tail = prev 142 | } 143 | } 144 | 145 | // Entry is a default implementation of Linker. Users can add anonymous fields 146 | // of this type to their structs to make them automatically implement the 147 | // methods needed by List. 148 | // 149 | // +stateify savable 150 | type icmpPacketEntry struct { 151 | next *icmpPacket 152 | prev *icmpPacket 153 | } 154 | 155 | // Next returns the entry that follows e in the list. 156 | func (e *icmpPacketEntry) Next() *icmpPacket { 157 | return e.next 158 | } 159 | 160 | // Prev returns the entry that precedes e in the list. 161 | func (e *icmpPacketEntry) Prev() *icmpPacket { 162 | return e.prev 163 | } 164 | 165 | // SetNext assigns 'entry' as the entry that follows e in the list. 166 | func (e *icmpPacketEntry) SetNext(elem *icmpPacket) { 167 | e.next = elem 168 | } 169 | 170 | // SetPrev assigns 'entry' as the entry that precedes e in the list. 171 | func (e *icmpPacketEntry) SetPrev(elem *icmpPacket) { 172 | e.prev = elem 173 | } 174 | -------------------------------------------------------------------------------- /tcpip/transport/icmp/protocol.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package icmp contains the implementation of the ICMP and IPv6-ICMP transport 16 | // protocols for use in ping. To use it in the networking stack, this package 17 | // must be added to the project, and activated on the stack by passing 18 | // icmp.NewProtocol4() and/or icmp.NewProtocol6() as one of the transport 19 | // protocols when calling stack.New(). Then endpoints can be created by passing 20 | // icmp.ProtocolNumber or icmp.ProtocolNumber6 as the transport protocol number 21 | // when calling Stack.NewEndpoint(). 22 | package icmp 23 | 24 | import ( 25 | "fmt" 26 | 27 | "github.com/google/netstack/tcpip" 28 | "github.com/google/netstack/tcpip/buffer" 29 | "github.com/google/netstack/tcpip/header" 30 | "github.com/google/netstack/tcpip/stack" 31 | "github.com/google/netstack/tcpip/transport/raw" 32 | "github.com/google/netstack/waiter" 33 | ) 34 | 35 | const ( 36 | // ProtocolNumber4 is the ICMP protocol number. 37 | ProtocolNumber4 = header.ICMPv4ProtocolNumber 38 | 39 | // ProtocolNumber6 is the IPv6-ICMP protocol number. 40 | ProtocolNumber6 = header.ICMPv6ProtocolNumber 41 | ) 42 | 43 | // protocol implements stack.TransportProtocol. 44 | type protocol struct { 45 | number tcpip.TransportProtocolNumber 46 | } 47 | 48 | // Number returns the ICMP protocol number. 49 | func (p *protocol) Number() tcpip.TransportProtocolNumber { 50 | return p.number 51 | } 52 | 53 | func (p *protocol) netProto() tcpip.NetworkProtocolNumber { 54 | switch p.number { 55 | case ProtocolNumber4: 56 | return header.IPv4ProtocolNumber 57 | case ProtocolNumber6: 58 | return header.IPv6ProtocolNumber 59 | } 60 | panic(fmt.Sprint("unknown protocol number: ", p.number)) 61 | } 62 | 63 | // NewEndpoint creates a new icmp endpoint. It implements 64 | // stack.TransportProtocol.NewEndpoint. 65 | func (p *protocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 66 | if netProto != p.netProto() { 67 | return nil, tcpip.ErrUnknownProtocol 68 | } 69 | return newEndpoint(stack, netProto, p.number, waiterQueue) 70 | } 71 | 72 | // NewRawEndpoint creates a new raw icmp endpoint. It implements 73 | // stack.TransportProtocol.NewRawEndpoint. 74 | func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 75 | if netProto != p.netProto() { 76 | return nil, tcpip.ErrUnknownProtocol 77 | } 78 | return raw.NewEndpoint(stack, netProto, p.number, waiterQueue) 79 | } 80 | 81 | // MinimumPacketSize returns the minimum valid icmp packet size. 82 | func (p *protocol) MinimumPacketSize() int { 83 | switch p.number { 84 | case ProtocolNumber4: 85 | return header.ICMPv4MinimumSize 86 | case ProtocolNumber6: 87 | return header.ICMPv6MinimumSize 88 | } 89 | panic(fmt.Sprint("unknown protocol number: ", p.number)) 90 | } 91 | 92 | // ParsePorts in case of ICMP sets src to 0, dst to ICMP ID, and err to nil. 93 | func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { 94 | switch p.number { 95 | case ProtocolNumber4: 96 | hdr := header.ICMPv4(v) 97 | return 0, hdr.Ident(), nil 98 | case ProtocolNumber6: 99 | hdr := header.ICMPv6(v) 100 | return 0, hdr.Ident(), nil 101 | } 102 | panic(fmt.Sprint("unknown protocol number: ", p.number)) 103 | } 104 | 105 | // HandleUnknownDestinationPacket handles packets targeted at this protocol but 106 | // that don't match any existing endpoint. 107 | func (p *protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, buffer.View, buffer.VectorisedView) bool { 108 | return true 109 | } 110 | 111 | // SetOption implements TransportProtocol.SetOption. 112 | func (p *protocol) SetOption(option interface{}) *tcpip.Error { 113 | return tcpip.ErrUnknownProtocolOption 114 | } 115 | 116 | // Option implements TransportProtocol.Option. 117 | func (p *protocol) Option(option interface{}) *tcpip.Error { 118 | return tcpip.ErrUnknownProtocolOption 119 | } 120 | 121 | // NewProtocol4 returns an ICMPv4 transport protocol. 122 | func NewProtocol4() stack.TransportProtocol { 123 | return &protocol{ProtocolNumber4} 124 | } 125 | 126 | // NewProtocol6 returns an ICMPv6 transport protocol. 127 | func NewProtocol6() stack.TransportProtocol { 128 | return &protocol{ProtocolNumber6} 129 | } 130 | -------------------------------------------------------------------------------- /tcpip/transport/raw/packet_list.go: -------------------------------------------------------------------------------- 1 | package raw 2 | 3 | // ElementMapper provides an identity mapping by default. 4 | // 5 | // This can be replaced to provide a struct that maps elements to linker 6 | // objects, if they are not the same. An ElementMapper is not typically 7 | // required if: Linker is left as is, Element is left as is, or Linker and 8 | // Element are the same type. 9 | type packetElementMapper struct{} 10 | 11 | // linkerFor maps an Element to a Linker. 12 | // 13 | // This default implementation should be inlined. 14 | // 15 | //go:nosplit 16 | func (packetElementMapper) linkerFor(elem *packet) *packet { return elem } 17 | 18 | // List is an intrusive list. Entries can be added to or removed from the list 19 | // in O(1) time and with no additional memory allocations. 20 | // 21 | // The zero value for List is an empty list ready to use. 22 | // 23 | // To iterate over a list (where l is a List): 24 | // for e := l.Front(); e != nil; e = e.Next() { 25 | // // do something with e. 26 | // } 27 | // 28 | // +stateify savable 29 | type packetList struct { 30 | head *packet 31 | tail *packet 32 | } 33 | 34 | // Reset resets list l to the empty state. 35 | func (l *packetList) Reset() { 36 | l.head = nil 37 | l.tail = nil 38 | } 39 | 40 | // Empty returns true iff the list is empty. 41 | func (l *packetList) Empty() bool { 42 | return l.head == nil 43 | } 44 | 45 | // Front returns the first element of list l or nil. 46 | func (l *packetList) Front() *packet { 47 | return l.head 48 | } 49 | 50 | // Back returns the last element of list l or nil. 51 | func (l *packetList) Back() *packet { 52 | return l.tail 53 | } 54 | 55 | // PushFront inserts the element e at the front of list l. 56 | func (l *packetList) PushFront(e *packet) { 57 | packetElementMapper{}.linkerFor(e).SetNext(l.head) 58 | packetElementMapper{}.linkerFor(e).SetPrev(nil) 59 | 60 | if l.head != nil { 61 | packetElementMapper{}.linkerFor(l.head).SetPrev(e) 62 | } else { 63 | l.tail = e 64 | } 65 | 66 | l.head = e 67 | } 68 | 69 | // PushBack inserts the element e at the back of list l. 70 | func (l *packetList) PushBack(e *packet) { 71 | packetElementMapper{}.linkerFor(e).SetNext(nil) 72 | packetElementMapper{}.linkerFor(e).SetPrev(l.tail) 73 | 74 | if l.tail != nil { 75 | packetElementMapper{}.linkerFor(l.tail).SetNext(e) 76 | } else { 77 | l.head = e 78 | } 79 | 80 | l.tail = e 81 | } 82 | 83 | // PushBackList inserts list m at the end of list l, emptying m. 84 | func (l *packetList) PushBackList(m *packetList) { 85 | if l.head == nil { 86 | l.head = m.head 87 | l.tail = m.tail 88 | } else if m.head != nil { 89 | packetElementMapper{}.linkerFor(l.tail).SetNext(m.head) 90 | packetElementMapper{}.linkerFor(m.head).SetPrev(l.tail) 91 | 92 | l.tail = m.tail 93 | } 94 | 95 | m.head = nil 96 | m.tail = nil 97 | } 98 | 99 | // InsertAfter inserts e after b. 100 | func (l *packetList) InsertAfter(b, e *packet) { 101 | a := packetElementMapper{}.linkerFor(b).Next() 102 | packetElementMapper{}.linkerFor(e).SetNext(a) 103 | packetElementMapper{}.linkerFor(e).SetPrev(b) 104 | packetElementMapper{}.linkerFor(b).SetNext(e) 105 | 106 | if a != nil { 107 | packetElementMapper{}.linkerFor(a).SetPrev(e) 108 | } else { 109 | l.tail = e 110 | } 111 | } 112 | 113 | // InsertBefore inserts e before a. 114 | func (l *packetList) InsertBefore(a, e *packet) { 115 | b := packetElementMapper{}.linkerFor(a).Prev() 116 | packetElementMapper{}.linkerFor(e).SetNext(a) 117 | packetElementMapper{}.linkerFor(e).SetPrev(b) 118 | packetElementMapper{}.linkerFor(a).SetPrev(e) 119 | 120 | if b != nil { 121 | packetElementMapper{}.linkerFor(b).SetNext(e) 122 | } else { 123 | l.head = e 124 | } 125 | } 126 | 127 | // Remove removes e from l. 128 | func (l *packetList) Remove(e *packet) { 129 | prev := packetElementMapper{}.linkerFor(e).Prev() 130 | next := packetElementMapper{}.linkerFor(e).Next() 131 | 132 | if prev != nil { 133 | packetElementMapper{}.linkerFor(prev).SetNext(next) 134 | } else { 135 | l.head = next 136 | } 137 | 138 | if next != nil { 139 | packetElementMapper{}.linkerFor(next).SetPrev(prev) 140 | } else { 141 | l.tail = prev 142 | } 143 | } 144 | 145 | // Entry is a default implementation of Linker. Users can add anonymous fields 146 | // of this type to their structs to make them automatically implement the 147 | // methods needed by List. 148 | // 149 | // +stateify savable 150 | type packetEntry struct { 151 | next *packet 152 | prev *packet 153 | } 154 | 155 | // Next returns the entry that follows e in the list. 156 | func (e *packetEntry) Next() *packet { 157 | return e.next 158 | } 159 | 160 | // Prev returns the entry that precedes e in the list. 161 | func (e *packetEntry) Prev() *packet { 162 | return e.prev 163 | } 164 | 165 | // SetNext assigns 'entry' as the entry that follows e in the list. 166 | func (e *packetEntry) SetNext(elem *packet) { 167 | e.next = elem 168 | } 169 | 170 | // SetPrev assigns 'entry' as the entry that precedes e in the list. 171 | func (e *packetEntry) SetPrev(elem *packet) { 172 | e.prev = elem 173 | } 174 | -------------------------------------------------------------------------------- /tcpip/transport/raw/protocol.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package raw 16 | 17 | import ( 18 | "github.com/google/netstack/tcpip" 19 | "github.com/google/netstack/tcpip/stack" 20 | "github.com/google/netstack/waiter" 21 | ) 22 | 23 | // EndpointFactory implements stack.UnassociatedEndpointFactory. 24 | type EndpointFactory struct{} 25 | 26 | // NewUnassociatedRawEndpoint implements stack.UnassociatedEndpointFactory. 27 | func (EndpointFactory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 28 | return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */) 29 | } 30 | -------------------------------------------------------------------------------- /tcpip/transport/tcp/reno.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tcp 16 | 17 | // renoState stores the variables related to TCP New Reno congestion 18 | // control algorithm. 19 | // 20 | // +stateify savable 21 | type renoState struct { 22 | s *sender 23 | } 24 | 25 | // newRenoCC initializes the state for the NewReno congestion control algorithm. 26 | func newRenoCC(s *sender) *renoState { 27 | return &renoState{s: s} 28 | } 29 | 30 | // updateSlowStart will update the congestion window as per the slow-start 31 | // algorithm used by NewReno. If after adjusting the congestion window 32 | // we cross the SSthreshold then it will return the number of packets that 33 | // must be consumed in congestion avoidance mode. 34 | func (r *renoState) updateSlowStart(packetsAcked int) int { 35 | // Don't let the congestion window cross into the congestion 36 | // avoidance range. 37 | newcwnd := r.s.sndCwnd + packetsAcked 38 | if newcwnd >= r.s.sndSsthresh { 39 | newcwnd = r.s.sndSsthresh 40 | r.s.sndCAAckCount = 0 41 | } 42 | 43 | packetsAcked -= newcwnd - r.s.sndCwnd 44 | r.s.sndCwnd = newcwnd 45 | return packetsAcked 46 | } 47 | 48 | // updateCongestionAvoidance will update congestion window in congestion 49 | // avoidance mode as described in RFC5681 section 3.1 50 | func (r *renoState) updateCongestionAvoidance(packetsAcked int) { 51 | // Consume the packets in congestion avoidance mode. 52 | r.s.sndCAAckCount += packetsAcked 53 | if r.s.sndCAAckCount >= r.s.sndCwnd { 54 | r.s.sndCwnd += r.s.sndCAAckCount / r.s.sndCwnd 55 | r.s.sndCAAckCount = r.s.sndCAAckCount % r.s.sndCwnd 56 | } 57 | } 58 | 59 | // reduceSlowStartThreshold reduces the slow-start threshold per RFC 5681, 60 | // page 6, eq. 4. It is called when we detect congestion in the network. 61 | func (r *renoState) reduceSlowStartThreshold() { 62 | r.s.sndSsthresh = r.s.outstanding / 2 63 | if r.s.sndSsthresh < 2 { 64 | r.s.sndSsthresh = 2 65 | } 66 | 67 | } 68 | 69 | // Update updates the congestion state based on the number of packets that 70 | // were acknowledged. 71 | // Update implements congestionControl.Update. 72 | func (r *renoState) Update(packetsAcked int) { 73 | if r.s.sndCwnd < r.s.sndSsthresh { 74 | packetsAcked = r.updateSlowStart(packetsAcked) 75 | if packetsAcked == 0 { 76 | return 77 | } 78 | } 79 | r.updateCongestionAvoidance(packetsAcked) 80 | } 81 | 82 | // HandleNDupAcks implements congestionControl.HandleNDupAcks. 83 | func (r *renoState) HandleNDupAcks() { 84 | // A retransmit was triggered due to nDupAckThreshold 85 | // being hit. Reduce our slow start threshold. 86 | r.reduceSlowStartThreshold() 87 | } 88 | 89 | // HandleRTOExpired implements congestionControl.HandleRTOExpired. 90 | func (r *renoState) HandleRTOExpired() { 91 | // We lost a packet, so reduce ssthresh. 92 | r.reduceSlowStartThreshold() 93 | 94 | // Reduce the congestion window to 1, i.e., enter slow-start. Per 95 | // RFC 5681, page 7, we must use 1 regardless of the value of the 96 | // initial congestion window. 97 | r.s.sndCwnd = 1 98 | } 99 | 100 | // PostRecovery implements congestionControl.PostRecovery. 101 | func (r *renoState) PostRecovery() { 102 | // noop. 103 | } 104 | -------------------------------------------------------------------------------- /tcpip/transport/tcp/sack.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tcp 16 | 17 | import ( 18 | "github.com/google/netstack/tcpip/header" 19 | "github.com/google/netstack/tcpip/seqnum" 20 | ) 21 | 22 | const ( 23 | // MaxSACKBlocks is the maximum number of SACK blocks stored 24 | // at receiver side. 25 | MaxSACKBlocks = 6 26 | ) 27 | 28 | // UpdateSACKBlocks updates the list of SACK blocks to include the segment 29 | // specified by segStart->segEnd. If the segment happens to be an out of order 30 | // delivery then the first block in the sack.blocks always includes the 31 | // segment identified by segStart->segEnd. 32 | func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value, rcvNxt seqnum.Value) { 33 | newSB := header.SACKBlock{Start: segStart, End: segEnd} 34 | 35 | // Ignore any invalid SACK blocks or blocks that are before rcvNxt as 36 | // those bytes have already been acked. 37 | if newSB.End.LessThanEq(newSB.Start) || newSB.End.LessThan(rcvNxt) { 38 | return 39 | } 40 | 41 | if sack.NumBlocks == 0 { 42 | sack.Blocks[0] = newSB 43 | sack.NumBlocks = 1 44 | return 45 | } 46 | var n = 0 47 | for i := 0; i < sack.NumBlocks; i++ { 48 | start, end := sack.Blocks[i].Start, sack.Blocks[i].End 49 | if end.LessThanEq(rcvNxt) { 50 | // Discard any sack blocks that are before rcvNxt as 51 | // those have already been acked. 52 | continue 53 | } 54 | if newSB.Start.LessThanEq(end) && start.LessThanEq(newSB.End) { 55 | // Merge this SACK block into newSB and discard this SACK 56 | // block. 57 | if start.LessThan(newSB.Start) { 58 | newSB.Start = start 59 | } 60 | if newSB.End.LessThan(end) { 61 | newSB.End = end 62 | } 63 | } else { 64 | // Save this block. 65 | sack.Blocks[n] = sack.Blocks[i] 66 | n++ 67 | } 68 | } 69 | if rcvNxt.LessThan(newSB.Start) { 70 | // If this was an out of order segment then make sure that the 71 | // first SACK block is the one that includes the segment. 72 | // 73 | // See the first bullet point in 74 | // https://tools.ietf.org/html/rfc2018#section-4 75 | if n == MaxSACKBlocks { 76 | // If the number of SACK blocks is equal to 77 | // MaxSACKBlocks then discard the last SACK block. 78 | n-- 79 | } 80 | for i := n - 1; i >= 0; i-- { 81 | sack.Blocks[i+1] = sack.Blocks[i] 82 | } 83 | sack.Blocks[0] = newSB 84 | n++ 85 | } 86 | sack.NumBlocks = n 87 | } 88 | 89 | // TrimSACKBlockList updates the sack block list by removing/modifying any block 90 | // where start is < rcvNxt. 91 | func TrimSACKBlockList(sack *SACKInfo, rcvNxt seqnum.Value) { 92 | n := 0 93 | for i := 0; i < sack.NumBlocks; i++ { 94 | if sack.Blocks[i].End.LessThanEq(rcvNxt) { 95 | continue 96 | } 97 | if sack.Blocks[i].Start.LessThan(rcvNxt) { 98 | // Shrink this SACK block. 99 | sack.Blocks[i].Start = rcvNxt 100 | } 101 | sack.Blocks[n] = sack.Blocks[i] 102 | n++ 103 | } 104 | sack.NumBlocks = n 105 | } 106 | -------------------------------------------------------------------------------- /tcpip/transport/tcp/segment_heap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tcp 16 | 17 | type segmentHeap []*segment 18 | 19 | // Len returns the length of h. 20 | func (h segmentHeap) Len() int { 21 | return len(h) 22 | } 23 | 24 | // Less determines whether the i-th element of h is less than the j-th element. 25 | func (h segmentHeap) Less(i, j int) bool { 26 | return h[i].sequenceNumber.LessThan(h[j].sequenceNumber) 27 | } 28 | 29 | // Swap swaps the i-th and j-th elements of h. 30 | func (h segmentHeap) Swap(i, j int) { 31 | h[i], h[j] = h[j], h[i] 32 | } 33 | 34 | // Push adds x as the last element of h. 35 | func (h *segmentHeap) Push(x interface{}) { 36 | *h = append(*h, x.(*segment)) 37 | } 38 | 39 | // Pop removes the last element of h and returns it. 40 | func (h *segmentHeap) Pop() interface{} { 41 | old := *h 42 | n := len(old) 43 | x := old[n-1] 44 | *h = old[:n-1] 45 | return x 46 | } 47 | -------------------------------------------------------------------------------- /tcpip/transport/tcp/segment_queue.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tcp 16 | 17 | import ( 18 | "sync" 19 | ) 20 | 21 | // segmentQueue is a bounded, thread-safe queue of TCP segments. 22 | // 23 | // +stateify savable 24 | type segmentQueue struct { 25 | mu sync.Mutex 26 | list segmentList 27 | limit int 28 | used int 29 | } 30 | 31 | // empty determines if the queue is empty. 32 | func (q *segmentQueue) empty() bool { 33 | q.mu.Lock() 34 | r := q.used == 0 35 | q.mu.Unlock() 36 | 37 | return r 38 | } 39 | 40 | // setLimit updates the limit. No segments are immediately dropped in case the 41 | // queue becomes full due to the new limit. 42 | func (q *segmentQueue) setLimit(limit int) { 43 | q.mu.Lock() 44 | q.limit = limit 45 | q.mu.Unlock() 46 | } 47 | 48 | // enqueue adds the given segment to the queue. 49 | // 50 | // Returns true when the segment is successfully added to the queue, in which 51 | // case ownership of the reference is transferred to the queue. And returns 52 | // false if the queue is full, in which case ownership is retained by the 53 | // caller. 54 | func (q *segmentQueue) enqueue(s *segment) bool { 55 | q.mu.Lock() 56 | r := q.used < q.limit 57 | if r { 58 | q.list.PushBack(s) 59 | q.used++ 60 | } 61 | q.mu.Unlock() 62 | 63 | return r 64 | } 65 | 66 | // dequeue removes and returns the next segment from queue, if one exists. 67 | // Ownership is transferred to the caller, who is responsible for decrementing 68 | // the ref count when done. 69 | func (q *segmentQueue) dequeue() *segment { 70 | q.mu.Lock() 71 | s := q.list.Front() 72 | if s != nil { 73 | q.list.Remove(s) 74 | q.used-- 75 | } 76 | q.mu.Unlock() 77 | 78 | return s 79 | } 80 | -------------------------------------------------------------------------------- /tcpip/transport/tcp/tcp_segment_list.go: -------------------------------------------------------------------------------- 1 | package tcp 2 | 3 | // ElementMapper provides an identity mapping by default. 4 | // 5 | // This can be replaced to provide a struct that maps elements to linker 6 | // objects, if they are not the same. An ElementMapper is not typically 7 | // required if: Linker is left as is, Element is left as is, or Linker and 8 | // Element are the same type. 9 | type segmentElementMapper struct{} 10 | 11 | // linkerFor maps an Element to a Linker. 12 | // 13 | // This default implementation should be inlined. 14 | // 15 | //go:nosplit 16 | func (segmentElementMapper) linkerFor(elem *segment) *segment { return elem } 17 | 18 | // List is an intrusive list. Entries can be added to or removed from the list 19 | // in O(1) time and with no additional memory allocations. 20 | // 21 | // The zero value for List is an empty list ready to use. 22 | // 23 | // To iterate over a list (where l is a List): 24 | // for e := l.Front(); e != nil; e = e.Next() { 25 | // // do something with e. 26 | // } 27 | // 28 | // +stateify savable 29 | type segmentList struct { 30 | head *segment 31 | tail *segment 32 | } 33 | 34 | // Reset resets list l to the empty state. 35 | func (l *segmentList) Reset() { 36 | l.head = nil 37 | l.tail = nil 38 | } 39 | 40 | // Empty returns true iff the list is empty. 41 | func (l *segmentList) Empty() bool { 42 | return l.head == nil 43 | } 44 | 45 | // Front returns the first element of list l or nil. 46 | func (l *segmentList) Front() *segment { 47 | return l.head 48 | } 49 | 50 | // Back returns the last element of list l or nil. 51 | func (l *segmentList) Back() *segment { 52 | return l.tail 53 | } 54 | 55 | // PushFront inserts the element e at the front of list l. 56 | func (l *segmentList) PushFront(e *segment) { 57 | segmentElementMapper{}.linkerFor(e).SetNext(l.head) 58 | segmentElementMapper{}.linkerFor(e).SetPrev(nil) 59 | 60 | if l.head != nil { 61 | segmentElementMapper{}.linkerFor(l.head).SetPrev(e) 62 | } else { 63 | l.tail = e 64 | } 65 | 66 | l.head = e 67 | } 68 | 69 | // PushBack inserts the element e at the back of list l. 70 | func (l *segmentList) PushBack(e *segment) { 71 | segmentElementMapper{}.linkerFor(e).SetNext(nil) 72 | segmentElementMapper{}.linkerFor(e).SetPrev(l.tail) 73 | 74 | if l.tail != nil { 75 | segmentElementMapper{}.linkerFor(l.tail).SetNext(e) 76 | } else { 77 | l.head = e 78 | } 79 | 80 | l.tail = e 81 | } 82 | 83 | // PushBackList inserts list m at the end of list l, emptying m. 84 | func (l *segmentList) PushBackList(m *segmentList) { 85 | if l.head == nil { 86 | l.head = m.head 87 | l.tail = m.tail 88 | } else if m.head != nil { 89 | segmentElementMapper{}.linkerFor(l.tail).SetNext(m.head) 90 | segmentElementMapper{}.linkerFor(m.head).SetPrev(l.tail) 91 | 92 | l.tail = m.tail 93 | } 94 | 95 | m.head = nil 96 | m.tail = nil 97 | } 98 | 99 | // InsertAfter inserts e after b. 100 | func (l *segmentList) InsertAfter(b, e *segment) { 101 | a := segmentElementMapper{}.linkerFor(b).Next() 102 | segmentElementMapper{}.linkerFor(e).SetNext(a) 103 | segmentElementMapper{}.linkerFor(e).SetPrev(b) 104 | segmentElementMapper{}.linkerFor(b).SetNext(e) 105 | 106 | if a != nil { 107 | segmentElementMapper{}.linkerFor(a).SetPrev(e) 108 | } else { 109 | l.tail = e 110 | } 111 | } 112 | 113 | // InsertBefore inserts e before a. 114 | func (l *segmentList) InsertBefore(a, e *segment) { 115 | b := segmentElementMapper{}.linkerFor(a).Prev() 116 | segmentElementMapper{}.linkerFor(e).SetNext(a) 117 | segmentElementMapper{}.linkerFor(e).SetPrev(b) 118 | segmentElementMapper{}.linkerFor(a).SetPrev(e) 119 | 120 | if b != nil { 121 | segmentElementMapper{}.linkerFor(b).SetNext(e) 122 | } else { 123 | l.head = e 124 | } 125 | } 126 | 127 | // Remove removes e from l. 128 | func (l *segmentList) Remove(e *segment) { 129 | prev := segmentElementMapper{}.linkerFor(e).Prev() 130 | next := segmentElementMapper{}.linkerFor(e).Next() 131 | 132 | if prev != nil { 133 | segmentElementMapper{}.linkerFor(prev).SetNext(next) 134 | } else { 135 | l.head = next 136 | } 137 | 138 | if next != nil { 139 | segmentElementMapper{}.linkerFor(next).SetPrev(prev) 140 | } else { 141 | l.tail = prev 142 | } 143 | } 144 | 145 | // Entry is a default implementation of Linker. Users can add anonymous fields 146 | // of this type to their structs to make them automatically implement the 147 | // methods needed by List. 148 | // 149 | // +stateify savable 150 | type segmentEntry struct { 151 | next *segment 152 | prev *segment 153 | } 154 | 155 | // Next returns the entry that follows e in the list. 156 | func (e *segmentEntry) Next() *segment { 157 | return e.next 158 | } 159 | 160 | // Prev returns the entry that precedes e in the list. 161 | func (e *segmentEntry) Prev() *segment { 162 | return e.prev 163 | } 164 | 165 | // SetNext assigns 'entry' as the entry that follows e in the list. 166 | func (e *segmentEntry) SetNext(elem *segment) { 167 | e.next = elem 168 | } 169 | 170 | // SetPrev assigns 'entry' as the entry that precedes e in the list. 171 | func (e *segmentEntry) SetPrev(elem *segment) { 172 | e.prev = elem 173 | } 174 | -------------------------------------------------------------------------------- /tcpip/transport/tcp/timer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tcp 16 | 17 | import ( 18 | "time" 19 | 20 | "github.com/google/netstack/sleep" 21 | ) 22 | 23 | type timerState int 24 | 25 | const ( 26 | timerStateDisabled timerState = iota 27 | timerStateEnabled 28 | timerStateOrphaned 29 | ) 30 | 31 | // timer is a timer implementation that reduces the interactions with the 32 | // runtime timer infrastructure by letting timers run (and potentially 33 | // eventually expire) even if they are stopped. It makes it cheaper to 34 | // disable/reenable timers at the expense of spurious wakes. This is useful for 35 | // cases when the same timer is disabled/reenabled repeatedly with relatively 36 | // long timeouts farther into the future. 37 | // 38 | // TCP retransmit timers benefit from this because they the timeouts are long 39 | // (currently at least 200ms), and get disabled when acks are received, and 40 | // reenabled when new pending segments are sent. 41 | // 42 | // It is advantageous to avoid interacting with the runtime because it acquires 43 | // a global mutex and performs O(log n) operations, where n is the global number 44 | // of timers, whenever a timer is enabled or disabled, and may make a syscall. 45 | // 46 | // This struct is thread-compatible. 47 | type timer struct { 48 | // state is the current state of the timer, it can be one of the 49 | // following values: 50 | // disabled - the timer is disabled. 51 | // orphaned - the timer is disabled, but the runtime timer is 52 | // enabled, which means that it will evetually cause a 53 | // spurious wake (unless it gets enabled again before 54 | // then). 55 | // enabled - the timer is enabled, but the runtime timer may be set 56 | // to an earlier expiration time due to a previous 57 | // orphaned state. 58 | state timerState 59 | 60 | // target is the expiration time of the current timer. It is only 61 | // meaningful in the enabled state. 62 | target time.Time 63 | 64 | // runtimeTarget is the expiration time of the runtime timer. It is 65 | // meaningful in the enabled and orphaned states. 66 | runtimeTarget time.Time 67 | 68 | // timer is the runtime timer used to wait on. 69 | timer *time.Timer 70 | } 71 | 72 | // init initializes the timer. Once it expires, it the given waker will be 73 | // asserted. 74 | func (t *timer) init(w *sleep.Waker) { 75 | t.state = timerStateDisabled 76 | 77 | // Initialize a runtime timer that will assert the waker, then 78 | // immediately stop it. 79 | t.timer = time.AfterFunc(time.Hour, func() { 80 | w.Assert() 81 | }) 82 | t.timer.Stop() 83 | } 84 | 85 | // cleanup frees all resources associated with the timer. 86 | func (t *timer) cleanup() { 87 | t.timer.Stop() 88 | } 89 | 90 | // checkExpiration checks if the given timer has actually expired, it should be 91 | // called whenever a sleeper wakes up due to the waker being asserted, and is 92 | // used to check if it's a supurious wake (due to a previously orphaned timer) 93 | // or a legitimate one. 94 | func (t *timer) checkExpiration() bool { 95 | // Transition to fully disabled state if we're just consuming an 96 | // orphaned timer. 97 | if t.state == timerStateOrphaned { 98 | t.state = timerStateDisabled 99 | return false 100 | } 101 | 102 | // The timer is enabled, but it may have expired early. Check if that's 103 | // the case, and if so, reset the runtime timer to the correct time. 104 | now := time.Now() 105 | if now.Before(t.target) { 106 | t.runtimeTarget = t.target 107 | t.timer.Reset(t.target.Sub(now)) 108 | return false 109 | } 110 | 111 | // The timer has actually expired, disable it for now and inform the 112 | // caller. 113 | t.state = timerStateDisabled 114 | return true 115 | } 116 | 117 | // disable disables the timer, leaving it in an orphaned state if it wasn't 118 | // already disabled. 119 | func (t *timer) disable() { 120 | if t.state != timerStateDisabled { 121 | t.state = timerStateOrphaned 122 | } 123 | } 124 | 125 | // enabled returns true if the timer is currently enabled, false otherwise. 126 | func (t *timer) enabled() bool { 127 | return t.state == timerStateEnabled 128 | } 129 | 130 | // enable enables the timer, programming the runtime timer if necessary. 131 | func (t *timer) enable(d time.Duration) { 132 | t.target = time.Now().Add(d) 133 | 134 | // Check if we need to set the runtime timer. 135 | if t.state == timerStateDisabled || t.target.Before(t.runtimeTarget) { 136 | t.runtimeTarget = t.target 137 | t.timer.Reset(d) 138 | } 139 | 140 | t.state = timerStateEnabled 141 | } 142 | -------------------------------------------------------------------------------- /tcpip/transport/udp/forwarder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package udp 16 | 17 | import ( 18 | "github.com/google/netstack/tcpip" 19 | "github.com/google/netstack/tcpip/buffer" 20 | "github.com/google/netstack/tcpip/stack" 21 | "github.com/google/netstack/waiter" 22 | ) 23 | 24 | // Forwarder is a session request forwarder, which allows clients to decide 25 | // what to do with a session request, for example: ignore it, or process it. 26 | // 27 | // The canonical way of using it is to pass the Forwarder.HandlePacket function 28 | // to stack.SetTransportProtocolHandler. 29 | type Forwarder struct { 30 | handler func(*ForwarderRequest) 31 | 32 | stack *stack.Stack 33 | } 34 | 35 | // NewForwarder allocates and initializes a new forwarder. 36 | func NewForwarder(s *stack.Stack, handler func(*ForwarderRequest)) *Forwarder { 37 | return &Forwarder{ 38 | stack: s, 39 | handler: handler, 40 | } 41 | } 42 | 43 | // HandlePacket handles all packets. 44 | // 45 | // This function is expected to be passed as an argument to the 46 | // stack.SetTransportProtocolHandler function. 47 | func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool { 48 | f.handler(&ForwarderRequest{ 49 | stack: f.stack, 50 | route: r, 51 | id: id, 52 | vv: vv, 53 | }) 54 | 55 | return true 56 | } 57 | 58 | // ForwarderRequest represents a session request received by the forwarder and 59 | // passed to the client. Clients may optionally create an endpoint to represent 60 | // it via CreateEndpoint. 61 | type ForwarderRequest struct { 62 | stack *stack.Stack 63 | route *stack.Route 64 | id stack.TransportEndpointID 65 | vv buffer.VectorisedView 66 | } 67 | 68 | // ID returns the 4-tuple (src address, src port, dst address, dst port) that 69 | // represents the session request. 70 | func (r *ForwarderRequest) ID() stack.TransportEndpointID { 71 | return r.id 72 | } 73 | 74 | // CreateEndpoint creates a connected UDP endpoint for the session request. 75 | func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 76 | ep := newEndpoint(r.stack, r.route.NetProto, queue) 77 | if err := r.stack.RegisterTransportEndpoint(r.route.NICID(), []tcpip.NetworkProtocolNumber{r.route.NetProto}, ProtocolNumber, r.id, ep, ep.reusePort, ep.bindToDevice); err != nil { 78 | ep.Close() 79 | return nil, err 80 | } 81 | 82 | ep.id = r.id 83 | ep.route = r.route.Clone() 84 | ep.dstPort = r.id.RemotePort 85 | ep.regNICID = r.route.NICID() 86 | 87 | ep.state = StateConnected 88 | 89 | ep.rcvMu.Lock() 90 | ep.rcvReady = true 91 | ep.rcvMu.Unlock() 92 | 93 | ep.HandlePacket(r.route, r.id, r.vv) 94 | 95 | return ep, nil 96 | } 97 | -------------------------------------------------------------------------------- /tcpip/transport/udp/udp_packet_list.go: -------------------------------------------------------------------------------- 1 | package udp 2 | 3 | // ElementMapper provides an identity mapping by default. 4 | // 5 | // This can be replaced to provide a struct that maps elements to linker 6 | // objects, if they are not the same. An ElementMapper is not typically 7 | // required if: Linker is left as is, Element is left as is, or Linker and 8 | // Element are the same type. 9 | type udpPacketElementMapper struct{} 10 | 11 | // linkerFor maps an Element to a Linker. 12 | // 13 | // This default implementation should be inlined. 14 | // 15 | //go:nosplit 16 | func (udpPacketElementMapper) linkerFor(elem *udpPacket) *udpPacket { return elem } 17 | 18 | // List is an intrusive list. Entries can be added to or removed from the list 19 | // in O(1) time and with no additional memory allocations. 20 | // 21 | // The zero value for List is an empty list ready to use. 22 | // 23 | // To iterate over a list (where l is a List): 24 | // for e := l.Front(); e != nil; e = e.Next() { 25 | // // do something with e. 26 | // } 27 | // 28 | // +stateify savable 29 | type udpPacketList struct { 30 | head *udpPacket 31 | tail *udpPacket 32 | } 33 | 34 | // Reset resets list l to the empty state. 35 | func (l *udpPacketList) Reset() { 36 | l.head = nil 37 | l.tail = nil 38 | } 39 | 40 | // Empty returns true iff the list is empty. 41 | func (l *udpPacketList) Empty() bool { 42 | return l.head == nil 43 | } 44 | 45 | // Front returns the first element of list l or nil. 46 | func (l *udpPacketList) Front() *udpPacket { 47 | return l.head 48 | } 49 | 50 | // Back returns the last element of list l or nil. 51 | func (l *udpPacketList) Back() *udpPacket { 52 | return l.tail 53 | } 54 | 55 | // PushFront inserts the element e at the front of list l. 56 | func (l *udpPacketList) PushFront(e *udpPacket) { 57 | udpPacketElementMapper{}.linkerFor(e).SetNext(l.head) 58 | udpPacketElementMapper{}.linkerFor(e).SetPrev(nil) 59 | 60 | if l.head != nil { 61 | udpPacketElementMapper{}.linkerFor(l.head).SetPrev(e) 62 | } else { 63 | l.tail = e 64 | } 65 | 66 | l.head = e 67 | } 68 | 69 | // PushBack inserts the element e at the back of list l. 70 | func (l *udpPacketList) PushBack(e *udpPacket) { 71 | udpPacketElementMapper{}.linkerFor(e).SetNext(nil) 72 | udpPacketElementMapper{}.linkerFor(e).SetPrev(l.tail) 73 | 74 | if l.tail != nil { 75 | udpPacketElementMapper{}.linkerFor(l.tail).SetNext(e) 76 | } else { 77 | l.head = e 78 | } 79 | 80 | l.tail = e 81 | } 82 | 83 | // PushBackList inserts list m at the end of list l, emptying m. 84 | func (l *udpPacketList) PushBackList(m *udpPacketList) { 85 | if l.head == nil { 86 | l.head = m.head 87 | l.tail = m.tail 88 | } else if m.head != nil { 89 | udpPacketElementMapper{}.linkerFor(l.tail).SetNext(m.head) 90 | udpPacketElementMapper{}.linkerFor(m.head).SetPrev(l.tail) 91 | 92 | l.tail = m.tail 93 | } 94 | 95 | m.head = nil 96 | m.tail = nil 97 | } 98 | 99 | // InsertAfter inserts e after b. 100 | func (l *udpPacketList) InsertAfter(b, e *udpPacket) { 101 | a := udpPacketElementMapper{}.linkerFor(b).Next() 102 | udpPacketElementMapper{}.linkerFor(e).SetNext(a) 103 | udpPacketElementMapper{}.linkerFor(e).SetPrev(b) 104 | udpPacketElementMapper{}.linkerFor(b).SetNext(e) 105 | 106 | if a != nil { 107 | udpPacketElementMapper{}.linkerFor(a).SetPrev(e) 108 | } else { 109 | l.tail = e 110 | } 111 | } 112 | 113 | // InsertBefore inserts e before a. 114 | func (l *udpPacketList) InsertBefore(a, e *udpPacket) { 115 | b := udpPacketElementMapper{}.linkerFor(a).Prev() 116 | udpPacketElementMapper{}.linkerFor(e).SetNext(a) 117 | udpPacketElementMapper{}.linkerFor(e).SetPrev(b) 118 | udpPacketElementMapper{}.linkerFor(a).SetPrev(e) 119 | 120 | if b != nil { 121 | udpPacketElementMapper{}.linkerFor(b).SetNext(e) 122 | } else { 123 | l.head = e 124 | } 125 | } 126 | 127 | // Remove removes e from l. 128 | func (l *udpPacketList) Remove(e *udpPacket) { 129 | prev := udpPacketElementMapper{}.linkerFor(e).Prev() 130 | next := udpPacketElementMapper{}.linkerFor(e).Next() 131 | 132 | if prev != nil { 133 | udpPacketElementMapper{}.linkerFor(prev).SetNext(next) 134 | } else { 135 | l.head = next 136 | } 137 | 138 | if next != nil { 139 | udpPacketElementMapper{}.linkerFor(next).SetPrev(prev) 140 | } else { 141 | l.tail = prev 142 | } 143 | } 144 | 145 | // Entry is a default implementation of Linker. Users can add anonymous fields 146 | // of this type to their structs to make them automatically implement the 147 | // methods needed by List. 148 | // 149 | // +stateify savable 150 | type udpPacketEntry struct { 151 | next *udpPacket 152 | prev *udpPacket 153 | } 154 | 155 | // Next returns the entry that follows e in the list. 156 | func (e *udpPacketEntry) Next() *udpPacket { 157 | return e.next 158 | } 159 | 160 | // Prev returns the entry that precedes e in the list. 161 | func (e *udpPacketEntry) Prev() *udpPacket { 162 | return e.prev 163 | } 164 | 165 | // SetNext assigns 'entry' as the entry that follows e in the list. 166 | func (e *udpPacketEntry) SetNext(elem *udpPacket) { 167 | e.next = elem 168 | } 169 | 170 | // SetPrev assigns 'entry' as the entry that precedes e in the list. 171 | func (e *udpPacketEntry) SetPrev(elem *udpPacket) { 172 | e.prev = elem 173 | } 174 | -------------------------------------------------------------------------------- /tmutex/tmutex.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Package tmutex provides the implementation of a mutex that implements an 16 | // efficient TryLock function in addition to Lock and Unlock. 17 | package tmutex 18 | 19 | import ( 20 | "sync/atomic" 21 | ) 22 | 23 | // Mutex is a mutual exclusion primitive that implements TryLock in addition 24 | // to Lock and Unlock. 25 | type Mutex struct { 26 | v int32 27 | ch chan struct{} 28 | } 29 | 30 | // Init initializes the mutex. 31 | func (m *Mutex) Init() { 32 | m.v = 1 33 | m.ch = make(chan struct{}, 1) 34 | } 35 | 36 | // Lock acquires the mutex. If it is currently held by another goroutine, Lock 37 | // will wait until it has a chance to acquire it. 38 | func (m *Mutex) Lock() { 39 | // Uncontended case. 40 | if atomic.AddInt32(&m.v, -1) == 0 { 41 | return 42 | } 43 | 44 | for { 45 | // Try to acquire the mutex again, at the same time making sure 46 | // that m.v is negative, which indicates to the owner of the 47 | // lock that it is contended, which will force it to try to wake 48 | // someone up when it releases the mutex. 49 | if v := atomic.LoadInt32(&m.v); v >= 0 && atomic.SwapInt32(&m.v, -1) == 1 { 50 | return 51 | } 52 | 53 | // Wait for the mutex to be released before trying again. 54 | <-m.ch 55 | } 56 | } 57 | 58 | // TryLock attempts to acquire the mutex without blocking. If the mutex is 59 | // currently held by another goroutine, it fails to acquire it and returns 60 | // false. 61 | func (m *Mutex) TryLock() bool { 62 | v := atomic.LoadInt32(&m.v) 63 | if v <= 0 { 64 | return false 65 | } 66 | return atomic.CompareAndSwapInt32(&m.v, 1, 0) 67 | } 68 | 69 | // Unlock releases the mutex. 70 | func (m *Mutex) Unlock() { 71 | if atomic.SwapInt32(&m.v, 1) == 0 { 72 | // There were no pending waiters. 73 | return 74 | } 75 | 76 | // Wake some waiter up. 77 | select { 78 | case m.ch <- struct{}{}: 79 | default: 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /waiter/waiter_list.go: -------------------------------------------------------------------------------- 1 | package waiter 2 | 3 | // ElementMapper provides an identity mapping by default. 4 | // 5 | // This can be replaced to provide a struct that maps elements to linker 6 | // objects, if they are not the same. An ElementMapper is not typically 7 | // required if: Linker is left as is, Element is left as is, or Linker and 8 | // Element are the same type. 9 | type waiterElementMapper struct{} 10 | 11 | // linkerFor maps an Element to a Linker. 12 | // 13 | // This default implementation should be inlined. 14 | // 15 | //go:nosplit 16 | func (waiterElementMapper) linkerFor(elem *Entry) *Entry { return elem } 17 | 18 | // List is an intrusive list. Entries can be added to or removed from the list 19 | // in O(1) time and with no additional memory allocations. 20 | // 21 | // The zero value for List is an empty list ready to use. 22 | // 23 | // To iterate over a list (where l is a List): 24 | // for e := l.Front(); e != nil; e = e.Next() { 25 | // // do something with e. 26 | // } 27 | // 28 | // +stateify savable 29 | type waiterList struct { 30 | head *Entry 31 | tail *Entry 32 | } 33 | 34 | // Reset resets list l to the empty state. 35 | func (l *waiterList) Reset() { 36 | l.head = nil 37 | l.tail = nil 38 | } 39 | 40 | // Empty returns true iff the list is empty. 41 | func (l *waiterList) Empty() bool { 42 | return l.head == nil 43 | } 44 | 45 | // Front returns the first element of list l or nil. 46 | func (l *waiterList) Front() *Entry { 47 | return l.head 48 | } 49 | 50 | // Back returns the last element of list l or nil. 51 | func (l *waiterList) Back() *Entry { 52 | return l.tail 53 | } 54 | 55 | // PushFront inserts the element e at the front of list l. 56 | func (l *waiterList) PushFront(e *Entry) { 57 | waiterElementMapper{}.linkerFor(e).SetNext(l.head) 58 | waiterElementMapper{}.linkerFor(e).SetPrev(nil) 59 | 60 | if l.head != nil { 61 | waiterElementMapper{}.linkerFor(l.head).SetPrev(e) 62 | } else { 63 | l.tail = e 64 | } 65 | 66 | l.head = e 67 | } 68 | 69 | // PushBack inserts the element e at the back of list l. 70 | func (l *waiterList) PushBack(e *Entry) { 71 | waiterElementMapper{}.linkerFor(e).SetNext(nil) 72 | waiterElementMapper{}.linkerFor(e).SetPrev(l.tail) 73 | 74 | if l.tail != nil { 75 | waiterElementMapper{}.linkerFor(l.tail).SetNext(e) 76 | } else { 77 | l.head = e 78 | } 79 | 80 | l.tail = e 81 | } 82 | 83 | // PushBackList inserts list m at the end of list l, emptying m. 84 | func (l *waiterList) PushBackList(m *waiterList) { 85 | if l.head == nil { 86 | l.head = m.head 87 | l.tail = m.tail 88 | } else if m.head != nil { 89 | waiterElementMapper{}.linkerFor(l.tail).SetNext(m.head) 90 | waiterElementMapper{}.linkerFor(m.head).SetPrev(l.tail) 91 | 92 | l.tail = m.tail 93 | } 94 | 95 | m.head = nil 96 | m.tail = nil 97 | } 98 | 99 | // InsertAfter inserts e after b. 100 | func (l *waiterList) InsertAfter(b, e *Entry) { 101 | a := waiterElementMapper{}.linkerFor(b).Next() 102 | waiterElementMapper{}.linkerFor(e).SetNext(a) 103 | waiterElementMapper{}.linkerFor(e).SetPrev(b) 104 | waiterElementMapper{}.linkerFor(b).SetNext(e) 105 | 106 | if a != nil { 107 | waiterElementMapper{}.linkerFor(a).SetPrev(e) 108 | } else { 109 | l.tail = e 110 | } 111 | } 112 | 113 | // InsertBefore inserts e before a. 114 | func (l *waiterList) InsertBefore(a, e *Entry) { 115 | b := waiterElementMapper{}.linkerFor(a).Prev() 116 | waiterElementMapper{}.linkerFor(e).SetNext(a) 117 | waiterElementMapper{}.linkerFor(e).SetPrev(b) 118 | waiterElementMapper{}.linkerFor(a).SetPrev(e) 119 | 120 | if b != nil { 121 | waiterElementMapper{}.linkerFor(b).SetNext(e) 122 | } else { 123 | l.head = e 124 | } 125 | } 126 | 127 | // Remove removes e from l. 128 | func (l *waiterList) Remove(e *Entry) { 129 | prev := waiterElementMapper{}.linkerFor(e).Prev() 130 | next := waiterElementMapper{}.linkerFor(e).Next() 131 | 132 | if prev != nil { 133 | waiterElementMapper{}.linkerFor(prev).SetNext(next) 134 | } else { 135 | l.head = next 136 | } 137 | 138 | if next != nil { 139 | waiterElementMapper{}.linkerFor(next).SetPrev(prev) 140 | } else { 141 | l.tail = prev 142 | } 143 | } 144 | 145 | // Entry is a default implementation of Linker. Users can add anonymous fields 146 | // of this type to their structs to make them automatically implement the 147 | // methods needed by List. 148 | // 149 | // +stateify savable 150 | type waiterEntry struct { 151 | next *Entry 152 | prev *Entry 153 | } 154 | 155 | // Next returns the entry that follows e in the list. 156 | func (e *waiterEntry) Next() *Entry { 157 | return e.next 158 | } 159 | 160 | // Prev returns the entry that precedes e in the list. 161 | func (e *waiterEntry) Prev() *Entry { 162 | return e.prev 163 | } 164 | 165 | // SetNext assigns 'entry' as the entry that follows e in the list. 166 | func (e *waiterEntry) SetNext(elem *Entry) { 167 | e.next = elem 168 | } 169 | 170 | // SetPrev assigns 'entry' as the entry that precedes e in the list. 171 | func (e *waiterEntry) SetPrev(elem *Entry) { 172 | e.prev = elem 173 | } 174 | -------------------------------------------------------------------------------- /waiter/waiter_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The gVisor Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package waiter 16 | 17 | import ( 18 | "sync/atomic" 19 | "testing" 20 | ) 21 | 22 | type callbackStub struct { 23 | f func(e *Entry) 24 | } 25 | 26 | // Callback implements EntryCallback.Callback. 27 | func (c *callbackStub) Callback(e *Entry) { 28 | c.f(e) 29 | } 30 | 31 | func TestEmptyQueue(t *testing.T) { 32 | var q Queue 33 | 34 | // Notify the zero-value of a queue. 35 | q.Notify(EventIn) 36 | 37 | // Register then unregister a waiter, then notify the queue. 38 | cnt := 0 39 | e := Entry{Callback: &callbackStub{func(*Entry) { cnt++ }}} 40 | q.EventRegister(&e, EventIn) 41 | q.EventUnregister(&e) 42 | q.Notify(EventIn) 43 | if cnt != 0 { 44 | t.Errorf("Callback was called when it shouldn't have been") 45 | } 46 | } 47 | 48 | func TestMask(t *testing.T) { 49 | // Register a waiter. 50 | var q Queue 51 | var cnt int 52 | e := Entry{Callback: &callbackStub{func(*Entry) { cnt++ }}} 53 | q.EventRegister(&e, EventIn|EventErr) 54 | 55 | // Notify with an overlapping mask. 56 | cnt = 0 57 | q.Notify(EventIn | EventOut) 58 | if cnt != 1 { 59 | t.Errorf("Callback wasn't called when it should have been") 60 | } 61 | 62 | // Notify with a subset mask. 63 | cnt = 0 64 | q.Notify(EventIn) 65 | if cnt != 1 { 66 | t.Errorf("Callback wasn't called when it should have been") 67 | } 68 | 69 | // Notify with a superset mask. 70 | cnt = 0 71 | q.Notify(EventIn | EventErr | EventOut) 72 | if cnt != 1 { 73 | t.Errorf("Callback wasn't called when it should have been") 74 | } 75 | 76 | // Notify with the exact same mask. 77 | cnt = 0 78 | q.Notify(EventIn | EventErr) 79 | if cnt != 1 { 80 | t.Errorf("Callback wasn't called when it should have been") 81 | } 82 | 83 | // Notify with a disjoint mask. 84 | cnt = 0 85 | q.Notify(EventOut | EventHUp) 86 | if cnt != 0 { 87 | t.Errorf("Callback was called when it shouldn't have been") 88 | } 89 | } 90 | 91 | func TestConcurrentRegistration(t *testing.T) { 92 | var q Queue 93 | var cnt int 94 | const concurrency = 1000 95 | 96 | ch1 := make(chan struct{}) 97 | ch2 := make(chan struct{}) 98 | ch3 := make(chan struct{}) 99 | 100 | // Create goroutines that will all register/unregister concurrently. 101 | for i := 0; i < concurrency; i++ { 102 | go func() { 103 | var e Entry 104 | e.Callback = &callbackStub{func(entry *Entry) { 105 | cnt++ 106 | if entry != &e { 107 | t.Errorf("entry = %p, want %p", entry, &e) 108 | } 109 | }} 110 | 111 | // Wait for notification, then register. 112 | <-ch1 113 | q.EventRegister(&e, EventIn|EventErr) 114 | 115 | // Tell main goroutine that we're done registering. 116 | ch2 <- struct{}{} 117 | 118 | // Wait for notification, then unregister. 119 | <-ch3 120 | q.EventUnregister(&e) 121 | 122 | // Tell main goroutine that we're done unregistering. 123 | ch2 <- struct{}{} 124 | }() 125 | } 126 | 127 | // Let the goroutines register. 128 | close(ch1) 129 | for i := 0; i < concurrency; i++ { 130 | <-ch2 131 | } 132 | 133 | // Issue a notification. 134 | q.Notify(EventIn) 135 | if cnt != concurrency { 136 | t.Errorf("cnt = %d, want %d", cnt, concurrency) 137 | } 138 | 139 | // Let the goroutine unregister. 140 | close(ch3) 141 | for i := 0; i < concurrency; i++ { 142 | <-ch2 143 | } 144 | 145 | // Issue a notification. 146 | q.Notify(EventIn) 147 | if cnt != concurrency { 148 | t.Errorf("cnt = %d, want %d", cnt, concurrency) 149 | } 150 | } 151 | 152 | func TestConcurrentNotification(t *testing.T) { 153 | var q Queue 154 | var cnt int32 155 | const concurrency = 1000 156 | const waiterCount = 1000 157 | 158 | // Register waiters. 159 | for i := 0; i < waiterCount; i++ { 160 | var e Entry 161 | e.Callback = &callbackStub{func(entry *Entry) { 162 | atomic.AddInt32(&cnt, 1) 163 | if entry != &e { 164 | t.Errorf("entry = %p, want %p", entry, &e) 165 | } 166 | }} 167 | 168 | q.EventRegister(&e, EventIn|EventErr) 169 | } 170 | 171 | // Launch notifiers. 172 | ch1 := make(chan struct{}) 173 | ch2 := make(chan struct{}) 174 | for i := 0; i < concurrency; i++ { 175 | go func() { 176 | <-ch1 177 | q.Notify(EventIn) 178 | ch2 <- struct{}{} 179 | }() 180 | } 181 | 182 | // Let notifiers go. 183 | close(ch1) 184 | for i := 0; i < concurrency; i++ { 185 | <-ch2 186 | } 187 | 188 | // Check the count. 189 | if cnt != concurrency*waiterCount { 190 | t.Errorf("cnt = %d, want %d", cnt, concurrency*waiterCount) 191 | } 192 | } 193 | --------------------------------------------------------------------------------