├── .gitignore ├── .goreleaser.yml ├── README.md ├── go.mod ├── go.sum └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | can-i-use-afpacket-fanout 2 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: can-i-use-afpacket-fanout 2 | release: 3 | github: 4 | owner: JustinAzoff 5 | name: can-i-use-afpacket-fanout 6 | name_template: '{{.Tag}}' 7 | builds: 8 | - goos: 9 | - linux 10 | goarch: 11 | - amd64 12 | goarm: 13 | - "6" 14 | targets: 15 | - linux_amd64 16 | main: . 17 | ldflags: -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.Date}} 18 | binary: can-i-use-afpacket-fanout 19 | lang: go 20 | archive: 21 | name_template: '{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm 22 | }}v{{ .Arm }}{{ end }}' 23 | format: tar.gz 24 | files: 25 | - licence* 26 | - LICENCE* 27 | - license* 28 | - LICENSE* 29 | - readme* 30 | - README* 31 | - changelog* 32 | - CHANGELOG* 33 | nfpm: 34 | description: Validate if afpacket PACKET_FANOUT_HASH is working properly 35 | license: MIT 36 | name_template: '{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm 37 | }}v{{ .Arm }}{{ end }}' 38 | bindir: /usr/bin 39 | homepage: https://github.com/JustinAzoff/can-i-use-afpacket-fanout 40 | maintainer: Justin Azoff 41 | formats: 42 | - deb 43 | - rpm 44 | snapshot: 45 | name_template: SNAPSHOT-{{ .Commit }} 46 | checksum: 47 | name_template: '{{ .ProjectName }}_{{ .Version }}_checksums.txt' 48 | dist: dist 49 | sign: 50 | cmd: gpg 51 | args: 52 | - --output 53 | - $signature 54 | - --detach-sig 55 | - $artifact 56 | signature: ${artifact}.sig 57 | artifacts: none 58 | env_files: 59 | github_token: ~/.config/goreleaser/github_token 60 | github_urls: 61 | download: https://github.com 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Can I use afpacket fanout? 2 | 3 | ## Background 4 | 5 | The Linux kernel has a feature for efficiently capturing packets called afpacket. 6 | 7 | A related feature, `fanout groups`, exists so that you can capture from N processes or threads at the 8 | same time, and each thread will see 1/Nth of the traffic. For stateful 9 | applications that perform stream reassembly, instead of a simple round robin distribution the packets need to 10 | be hashed according to their 5 tuple. To further complicate the issue, the hash function 11 | needs to be symmetrical so that a packet from HostA to HostB is hashed to the 12 | same process as the packet from HostB to HostA. This feature is known as PACKET\_FANOUT\_HASH 13 | 14 | Unfortunately, some versions of the Linux kernel are broken and do not properly 15 | implement the symmetric hash. [This issue has been 16 | fixed](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=eb70db8756717b90c01ccc765fdefc4dd969fc74), 17 | but the buggy code made its way into various distribution kernels. 18 | 19 | It's not easy to know just by looking at a kernel version whether or not it 20 | will work properly. 21 | 22 | Furthermore, in some cases -- like, for example, with MPLS traffic -- afpacket fanout might not do the balancing 23 | as you expect. 24 | 25 | `can-i-use-afpacket-fanout` is a tool that runs multiple threads in a fanout group and checks to 26 | see if flows are routed to the appropriate workers. If it sees a flow 27 | on two different workers, or the reverse flow on a different worker, it 28 | will log a Failure. 29 | 30 | ## Install 31 | 32 | $ export GOPATH=~/go # If you don't already have this set to something 33 | $ go install github.com/JustinAzoff/can-i-use-afpacket-fanout@latest 34 | 35 | ## RUN 36 | 37 | $ sudo ~/go/bin/can-i-use-afpacket-fanout -interface wlan0 -maxflows 500 38 | 2016/11/08 03:55:20 Starting worker id 0 on interface wlan0 39 | 2016/11/08 03:55:20 Starting worker id 1 on interface wlan0 40 | 2016/11/08 03:55:20 Starting worker id 2 on interface wlan0 41 | 2016/11/08 03:55:20 Starting worker id 3 on interface wlan0 42 | 2016/11/08 03:55:20 Starting worker id 4 on interface wlan0 43 | 2016/11/08 03:55:20 Starting worker id 5 on interface wlan0 44 | 2016/11/08 03:55:20 Starting worker id 6 on interface wlan0 45 | 2016/11/08 03:55:20 Starting worker id 7 on interface wlan0 46 | 2016/11/08 03:55:20 Collecting results until 500 flows have been seen.. 47 | 2016/11/08 03:55:22 Stats: packets=100 flows=31 success=69 reverse_success=83 failures=0 reverse_failures=0 48 | 2016/11/08 03:55:23 Stats: packets=200 flows=42 success=158 reverse_success=176 failures=0 reverse_failures=0 49 | 2016/11/08 03:55:24 Stats: packets=300 flows=72 success=228 reverse_success=258 failures=0 reverse_failures=0 50 | 2016/11/08 03:55:25 Stats: packets=400 flows=87 success=313 reverse_success=348 failures=0 reverse_failures=0 51 | 2016/11/08 03:55:27 Stats: packets=500 flows=123 success=377 reverse_success=425 failures=0 reverse_failures=0 52 | 2016/11/08 03:55:29 Stats: packets=600 flows=151 success=449 reverse_success=506 failures=0 reverse_failures=0 53 | 2016/11/08 03:55:31 Stats: packets=700 flows=179 success=521 reverse_success=588 failures=0 reverse_failures=0 54 | 2016/11/08 03:55:33 Stats: packets=800 flows=202 success=598 reverse_success=670 failures=0 reverse_failures=0 55 | 2016/11/08 03:55:34 Stats: packets=900 flows=227 success=673 reverse_success=755 failures=0 reverse_failures=0 56 | 2016/11/08 03:55:36 Stats: packets=1000 flows=247 success=753 reverse_success=838 failures=0 reverse_failures=0 57 | 2016/11/08 03:55:37 Stats: packets=1100 flows=269 success=831 reverse_success=926 failures=0 reverse_failures=0 58 | 2016/11/08 03:55:38 Stats: packets=1200 flows=291 success=909 reverse_success=1007 failures=0 reverse_failures=0 59 | 2016/11/08 03:55:40 Stats: packets=1300 flows=305 success=995 reverse_success=1094 failures=0 reverse_failures=0 60 | 2016/11/08 03:55:41 Stats: packets=1400 flows=325 success=1075 reverse_success=1176 failures=0 reverse_failures=0 61 | 2016/11/08 03:55:43 Stats: packets=1500 flows=346 success=1154 reverse_success=1263 failures=0 reverse_failures=0 62 | 2016/11/08 03:55:44 Stats: packets=1600 flows=372 success=1228 reverse_success=1345 failures=0 reverse_failures=0 63 | 2016/11/08 03:55:45 Stats: packets=1700 flows=386 success=1314 reverse_success=1431 failures=0 reverse_failures=0 64 | 2016/11/08 03:55:46 Stats: packets=1800 flows=399 success=1401 reverse_success=1519 failures=0 reverse_failures=0 65 | 2016/11/08 03:55:46 Stats: packets=1900 flows=409 success=1491 reverse_success=1608 failures=0 reverse_failures=0 66 | 2016/11/08 03:55:48 Stats: packets=2000 flows=431 success=1569 reverse_success=1693 failures=0 reverse_failures=0 67 | 2016/11/08 03:55:49 Stats: packets=2100 flows=448 success=1652 reverse_success=1778 failures=0 reverse_failures=0 68 | 2016/11/08 03:55:50 Stats: packets=2200 flows=470 success=1730 reverse_success=1864 failures=0 reverse_failures=0 69 | 2016/11/08 03:55:51 Stats: packets=2300 flows=493 success=1807 reverse_success=1945 failures=0 reverse_failures=0 70 | 2016/11/08 03:55:52 Final Stats: packets=2345 flows=501 success=1844 reverse_success=1983 failures=0 reverse_failures=0 71 | 2016/11/08 03:55:52 Worker flow count distribution: 72 | 2016/11/08 03:55:52 - worker=0 flows=54 73 | 2016/11/08 03:55:52 - worker=1 flows=79 74 | 2016/11/08 03:55:52 - worker=2 flows=44 75 | 2016/11/08 03:55:52 - worker=3 flows=58 76 | 2016/11/08 03:55:52 - worker=4 flows=71 77 | 2016/11/08 03:55:52 - worker=5 flows=73 78 | 2016/11/08 03:55:52 - worker=6 flows=64 79 | 2016/11/08 03:55:52 - worker=7 flows=58 80 | 81 | 82 | YES! 83 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/JustinAzoff/can-i-use-afpacket-fanout 2 | 3 | go 1.16 4 | 5 | require github.com/google/gopacket v1.1.19 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8= 2 | github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= 3 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 4 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 5 | golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= 6 | golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= 7 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 8 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI= 9 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 10 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 11 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 12 | golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= 13 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 14 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 15 | golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= 16 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 17 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "sync" 8 | 9 | "github.com/google/gopacket" 10 | "github.com/google/gopacket/afpacket" 11 | "github.com/google/gopacket/layers" 12 | ) 13 | 14 | var ( 15 | workerCount int 16 | iface string 17 | fanoutGroup int 18 | maxFlows int 19 | statusInterval int 20 | //skipInitial is used to skip packets that are delivered before the kernel fully sets up the load balancing 21 | //between all the workers 22 | skipInitial int 23 | includeNetworkLayer bool 24 | dumpNetworkLayerInformation bool 25 | wg sync.WaitGroup 26 | ) 27 | 28 | func init() { 29 | flag.IntVar(&workerCount, "workercount", 8, "Number of workers") 30 | flag.IntVar(&fanoutGroup, "fanoutGroup", 42, "fanout group id") 31 | flag.IntVar(&maxFlows, "maxflows", 100, "How many flows to track before exiting") 32 | flag.StringVar(&iface, "interface", "eth0", "Interface") 33 | flag.IntVar(&statusInterval, "statusinterval", 500, "How many packets before each status update") 34 | flag.IntVar(&skipInitial, "skipinitial", 100, "How many packets to skip before collecting data") 35 | flag.BoolVar(&includeNetworkLayer, "includenetworklayer", false, "Set this flag to include the link and network layer protocols in the hash calculation") 36 | flag.BoolVar(&dumpNetworkLayerInformation, "dumpnetworklayerinformation", false, "Set this flag to include the network layer information in the per-node output. Implies includenetworklayer") 37 | flag.Parse() 38 | } 39 | 40 | type FiveTuple struct { 41 | proto string 42 | src, sport, dst, dport string 43 | layerNames string 44 | } 45 | 46 | type WorkerFlow struct { 47 | workerID int 48 | flow FiveTuple 49 | } 50 | 51 | type Stats struct { 52 | packets int 53 | success int 54 | reverseSuccess int 55 | failures int 56 | reverseFailures int 57 | } 58 | 59 | func getFiveTuple(p gopacket.Packet) (FiveTuple, error) { 60 | var flow FiveTuple 61 | 62 | nl := p.NetworkLayer() 63 | if nl == nil { 64 | return flow, fmt.Errorf("Nope") 65 | } 66 | src, dst := nl.NetworkFlow().Endpoints() 67 | flow.src = src.String() 68 | flow.dst = dst.String() 69 | tl := p.TransportLayer() 70 | if tl != nil { 71 | flow.proto = tl.LayerType().String() 72 | sport, dport := tl.TransportFlow().Endpoints() 73 | flow.sport = sport.String() 74 | flow.dport = dport.String() 75 | } 76 | 77 | if includeNetworkLayer { 78 | for _, layer := range p.Layers() { 79 | // stop at the transport layer 80 | if tl != nil && layer == tl { 81 | break 82 | } 83 | if len(flow.layerNames) == 0 { 84 | flow.layerNames = layer.LayerType().String() 85 | } else { 86 | flow.layerNames += ", " + layer.LayerType().String() 87 | } 88 | } 89 | } 90 | 91 | return flow, nil 92 | } 93 | 94 | func worker(id int, flowchan chan WorkerFlow) { 95 | handle, err := afpacket.NewTPacket(afpacket.OptInterface(iface)) 96 | if err != nil { 97 | log.Fatal(err) 98 | } 99 | err = handle.SetFanout(afpacket.FanoutHashWithDefrag, uint16(fanoutGroup)) 100 | if err != nil { 101 | log.Fatal(err) 102 | } 103 | defer handle.Close() 104 | 105 | wg.Done() 106 | wg.Wait() 107 | 108 | source := gopacket.NewPacketSource(handle, layers.LinkTypeEthernet) 109 | 110 | n := 0 111 | for packet := range source.Packets() { 112 | ft, err := getFiveTuple(packet) 113 | if err != nil { 114 | continue 115 | } 116 | if n > skipInitial { 117 | flowchan <- WorkerFlow{id, ft} 118 | } else { 119 | n++ 120 | if n == skipInitial { 121 | log.Printf("Worker %d has seen at least %d packets, collecting data", id, skipInitial) 122 | } 123 | } 124 | } 125 | } 126 | 127 | func main() { 128 | 129 | if dumpNetworkLayerInformation { 130 | includeNetworkLayer = true 131 | } 132 | 133 | flows := make(chan WorkerFlow, workerCount) 134 | 135 | flowMap := make(map[FiveTuple]int) 136 | failedFlowMap := make(map[FiveTuple]bool) 137 | successFlowMap := make(map[FiveTuple]bool) 138 | workerFlowCounts := make(map[int]int) 139 | workerProtocolCounts := make(map[int]map[string]int) 140 | 141 | wg.Add(workerCount) 142 | for w := 0; w < workerCount; w++ { 143 | log.Printf("Starting worker id %d on interface %s", w, iface) 144 | go worker(w, flows) 145 | } 146 | wg.Wait() 147 | log.Printf("%d workers started. Collecting results until %d flows have been seen..", workerCount, maxFlows) 148 | 149 | s := Stats{} 150 | for workerflow := range flows { 151 | s.packets++ 152 | 153 | // Check if this flow was seen before, and if so, on the same worker 154 | flow := workerflow.flow 155 | worker, existed := flowMap[flow] 156 | if !existed { 157 | flowMap[flow] = workerflow.workerID 158 | workerFlowCounts[workerflow.workerID]++ 159 | 160 | if dumpNetworkLayerInformation { 161 | // let's also do a protocol count 162 | protocols, ok := workerProtocolCounts[workerflow.workerID] 163 | if !ok { 164 | protocols = make(map[string]int) 165 | workerProtocolCounts[workerflow.workerID] = protocols 166 | } 167 | protocols[flow.layerNames]++ 168 | } 169 | 170 | } else if worker != workerflow.workerID { 171 | log.Printf("FAIL: saw flow %s on worker %d expected %d", flow, workerflow.workerID, worker) 172 | failedFlowMap[flow] = true 173 | delete(successFlowMap, flow) 174 | s.failures++ 175 | } else { 176 | if _, exists := failedFlowMap[flow]; !exists { 177 | successFlowMap[flow] = true 178 | } 179 | s.success++ 180 | } 181 | 182 | 183 | //now check if the reverse flow was seen, and if so, on the same worker 184 | reverseFlow := FiveTuple{flow.proto, flow.dst, flow.dport, flow.src, flow.sport, flow.layerNames} 185 | 186 | worker, existed = flowMap[reverseFlow] 187 | if !existed { 188 | //Nothing to do in this case, can't draw any conclusions 189 | } else if worker != workerflow.workerID { 190 | log.Printf("FAIL: saw reverse flow of %s on worker %d expected %d", flow, workerflow.workerID, worker) 191 | failedFlowMap[reverseFlow] = true 192 | delete(successFlowMap, reverseFlow) 193 | s.reverseFailures++ 194 | } else { 195 | if _, exists := failedFlowMap[reverseFlow]; !exists { 196 | successFlowMap[reverseFlow] = true 197 | } 198 | s.reverseSuccess++ 199 | } 200 | if len(flowMap) > maxFlows { 201 | break 202 | } 203 | 204 | if s.packets%statusInterval == 0 { 205 | log.Printf("Stats: packets=%d flows=%d success_flows=%d failed_flows=%d pkt_success=%d pkt_reverse_success=%d pkt_failures=%d pkt_reverse_failures=%d", 206 | s.packets, len(flowMap), len(successFlowMap), len(failedFlowMap), s.success, s.reverseSuccess, s.failures, s.reverseFailures) 207 | } 208 | } 209 | log.Printf("Final Stats: packets=%d flows=%d success_flows=%d failed_flows=%d pkt_success=%d pkt_reverse_success=%d pkt_failures=%d pkt_reverse_failures=%d", 210 | s.packets, len(flowMap), len(successFlowMap), len(failedFlowMap), s.success, s.reverseSuccess, s.failures, s.reverseFailures) 211 | log.Printf("Worker flow count distribution:") 212 | for w := 0; w < workerCount; w++ { 213 | log.Printf(" - worker=%d flows=%d", w, workerFlowCounts[w]) 214 | if dumpNetworkLayerInformation && workerFlowCounts[w] > 0 { 215 | for proto, count := range workerProtocolCounts[w] { 216 | log.Printf(" - protocol=%s flows=%d", proto, count) 217 | } 218 | } 219 | } 220 | } 221 | --------------------------------------------------------------------------------