├── .editorconfig ├── .github └── workflows │ └── ci.yml ├── .gitmodules ├── AUTHORS ├── NEWS.md ├── README.md ├── meson.build ├── meson_options.txt └── src ├── libnacd.sym ├── meson.build ├── n-acd-bpf-fallback.c ├── n-acd-bpf.c ├── n-acd-private.h ├── n-acd-probe.c ├── n-acd.c ├── n-acd.h ├── test-api.c ├── test-bpf.c ├── test-loopback.c ├── test-twice.c ├── test-unplug.c ├── test-unused.c ├── test-veth.c ├── test.h └── util ├── test-timer.c ├── timer.c └── timer.h /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | charset = utf-8 8 | 9 | [*.{c,h}] 10 | indent_style = space 11 | indent_size = 8 12 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: '0 0 * * *' 8 | 9 | jobs: 10 | ci: 11 | name: CI with Default Configuration 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | # 16 | # Prepare CI 17 | # 18 | # We cannot use the github-action of the `ci-c-util` project, because we 19 | # need privileges in the container. Therefore, fetch the CI sources and 20 | # build the container manually. 21 | # 22 | - name: Fetch CI 23 | uses: actions/checkout@v2 24 | with: 25 | repository: c-util/automation 26 | ref: v1 27 | path: automation 28 | - name: Build CI 29 | working-directory: automation/src/ci-c-util 30 | run: docker build --tag ci-c-util:v1 . 31 | 32 | # 33 | # Run CI 34 | # 35 | # Take the CI image we built and run the CI with the default project 36 | # configuration. We do not use valgrind, since it falls-over with bpf(2) 37 | # syscalls. 38 | # 39 | - name: Fetch Sources 40 | uses: actions/checkout@v2 41 | with: 42 | path: source 43 | - name: Run through C-Util CI 44 | run: | 45 | docker run \ 46 | --privileged \ 47 | -v "$(pwd)/source:/github/workspace" \ 48 | "ci-c-util:v1" \ 49 | "--m32=1" \ 50 | "--source=/github/workspace" 51 | 52 | ci-no-ebpf: 53 | name: CI without eBPF 54 | runs-on: ubuntu-latest 55 | 56 | steps: 57 | # See above in 'ci' job. 58 | - name: Fetch CI 59 | uses: actions/checkout@v2 60 | with: 61 | repository: c-util/automation 62 | ref: v1 63 | path: automation 64 | - name: Build CI 65 | working-directory: automation/src/ci-c-util 66 | run: docker build --tag ci-c-util:v1 . 67 | 68 | # 69 | # Run CI 70 | # 71 | # This again runs the CI, but this time disables eBPF. We do support the 72 | # legacy BPF fallback, so lets make sure we test for it. 73 | # 74 | - name: Fetch Sources 75 | uses: actions/checkout@v2 76 | with: 77 | path: source 78 | - name: Run through C-Util CI 79 | run: | 80 | docker run \ 81 | --privileged \ 82 | -v "$(pwd)/source:/github/workspace" \ 83 | "ci-c-util:v1" \ 84 | "--m32=1" \ 85 | "--mesonargs=-Debpf=false" \ 86 | "--source=/github/workspace" 87 | 88 | ci-valgrind: 89 | name: CI through Valgrind 90 | runs-on: ubuntu-latest 91 | 92 | steps: 93 | # See above in 'ci' job. 94 | - name: Fetch CI 95 | uses: actions/checkout@v2 96 | with: 97 | repository: c-util/automation 98 | ref: v1 99 | path: automation 100 | - name: Build CI 101 | working-directory: automation/src/ci-c-util 102 | run: docker build --tag ci-c-util:v1 . 103 | 104 | # 105 | # Run CI 106 | # 107 | # This again runs the CI, but this time through valgrind. Since some 108 | # syscalls are not implemented on x86-64 32bit compat (e.g., bpf(2)), we 109 | # disable the m32 mode. 110 | # 111 | - name: Fetch Sources 112 | uses: actions/checkout@v2 113 | with: 114 | path: source 115 | - name: Run through C-Util CI 116 | run: | 117 | docker run \ 118 | --privileged \ 119 | -v "$(pwd)/source:/github/workspace" \ 120 | "ci-c-util:v1" \ 121 | "--source=/github/workspace" \ 122 | "--valgrind=1" 123 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "subprojects/c-list"] 2 | path = subprojects/c-list 3 | url = https://github.com/c-util/c-list.git 4 | [submodule "subprojects/c-siphash"] 5 | path = subprojects/c-siphash 6 | url = https://github.com/c-util/c-siphash.git 7 | [submodule "subprojects/c-rbtree"] 8 | path = subprojects/c-rbtree 9 | url = https://github.com/c-util/c-rbtree.git 10 | [submodule "subprojects/c-stdaux"] 11 | path = subprojects/c-stdaux 12 | url = https://github.com/c-util/c-stdaux.git 13 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | LICENSE: 2 | This project is dual-licensed under both the Apache License, Version 3 | 2.0, and the GNU Lesser General Public License, Version 2.1+. 4 | 5 | AUTHORS-ASL: 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | AUTHORS-LGPL: 19 | This program is free software; you can redistribute it and/or modify it 20 | under the terms of the GNU Lesser General Public License as published 21 | by the Free Software Foundation; either version 2.1 of the License, or 22 | (at your option) any later version. 23 | 24 | This program is distributed in the hope that it will be useful, but 25 | WITHOUT ANY WARRANTY; without even the implied warranty of 26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27 | Lesser General Public License for more details. 28 | 29 | You should have received a copy of the GNU Lesser General Public License 30 | along with this program; If not, see . 31 | 32 | COPYRIGHT: (ordered alphabetically) 33 | Copyright (C) 2015-2019 Red Hat, Inc. 34 | 35 | AUTHORS: (ordered alphabetically) 36 | Beniamino Galvani 37 | David Rheinsberg 38 | Thomas Haller 39 | Tom Gundersen 40 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # n-acd - IPv4 Address Conflict Detection 2 | 3 | ## CHANGES WITH 2: 4 | 5 | * All public destructors now include a variant that returns `void`. 6 | This was requested for easier integration with `glib` and friends. 7 | Similar to the `cleanup` variants, these variants are denoted by a 8 | single-character function-name suffix. E.g., `n_acd_freev()` 9 | 10 | * A fallback to `CLOCK_MONOTONIC` is now provided in case 11 | `CLOCK_BOOTTIME` is not supported by the kernel. Note that this is in 12 | no way signalled through the API, so if timers should follow the 13 | `BOOTTIME` rather than monotonic clock, a kernel with this clock is 14 | required. 15 | 16 | * The `c-sundry` dependency is no longer needed. 17 | 18 | * The `transport` configuration property is now mandatory for 19 | `n_acd_new()`. It defaulted to `ETHERNET` before, by mistake. 20 | 21 | * In-source documentation for the public API is now provided. 22 | 23 | Contributions from: Beniamino Galvani, David Herrmann, David 24 | Rheinsberg, Thomas Haller, Tom Gundersen 25 | 26 | - Tübingen, 2019-03-20 27 | 28 | ## CHANGES WITH 1: 29 | 30 | * Initial release of n-acd. This project implements the IPv4 Address 31 | Conflict Detection standard as defined in RFC-5227. The state machine 32 | is implemented in a shared library and provides a stable ISO-C11 API. 33 | The implementation is linux-only and relies heavily on the API 34 | behavior of recent linux kernel releases. 35 | 36 | * Compared to the pre-releases, this release supports many parallel 37 | probes on a single n-acd context. This reduces the number of 38 | allocated network resources to O(1), based on the number of running 39 | parallel probes. 40 | 41 | * The n-acd project is now dual-licensed: ASL-2.0 and LGPL-2.1+ 42 | 43 | Contributions from: Beniamino Galvani, David Herrmann, Thomas Haller, 44 | Tom Gundersen 45 | 46 | - Tübingen, 2018-08-08 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | n-acd 2 | ===== 3 | 4 | IPv4 Address Conflict Detection 5 | 6 | The n-acd project implements the IPv4 Address Conflict Detection standard as 7 | defined in RFC-5227. The state machine is implemented in a shared library and 8 | provides a stable ISO-C11 API. The implementation is linux-only and relies 9 | heavily on the API behavior of recent linux kernel releases. 10 | 11 | ### Project 12 | 13 | * **Website**: 14 | * **Bug Tracker**: 15 | * **Mailing-List**: 16 | 17 | ### Requirements 18 | 19 | The requirements for this project are: 20 | 21 | * `Linux kernel >= 3.19` 22 | * `libc` (e.g., `glibc >= 2.16`) 23 | 24 | At build-time, the following software is required: 25 | 26 | * `meson >= 0.41` 27 | * `pkg-config >= 0.29` 28 | 29 | ### Build 30 | 31 | The meson build-system is used for this project. Contact upstream 32 | documentation for detailed help. In most situations the following 33 | commands are sufficient to build and install from source: 34 | 35 | ```sh 36 | mkdir build 37 | cd build 38 | meson setup .. 39 | ninja 40 | meson test 41 | ninja install 42 | ``` 43 | 44 | The following configuration options are available: 45 | 46 | * `ebpf`: This boolean controls whether `ebpf` features are used to improve 47 | the package filtering performance. If disabled, classic bpf will be 48 | used. This feature requires a rather recent kernel (>=3.19). 49 | Default is: true 50 | 51 | ### Repository: 52 | 53 | - **web**: 54 | - **https**: `https://github.com/nettools/n-acd.git` 55 | - **ssh**: `git@github.com:nettools/n-acd.git` 56 | 57 | ### License: 58 | 59 | - **Apache-2.0** OR **LGPL-2.1-or-later** 60 | - See AUTHORS file for details. 61 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project( 2 | 'n-acd', 3 | 'c', 4 | version: '2', 5 | license: 'Apache', 6 | default_options: [ 7 | 'c_std=c11', 8 | ], 9 | ) 10 | project_description = 'IPv4 Address Conflict Detection' 11 | 12 | add_project_arguments('-D_GNU_SOURCE', language: 'c') 13 | mod_pkgconfig = import('pkgconfig') 14 | 15 | sub_clist = subproject('c-list') 16 | sub_crbtree = subproject('c-rbtree') 17 | sub_csiphash = subproject('c-siphash') 18 | sub_cstdaux = subproject('c-stdaux') 19 | 20 | dep_clist = sub_clist.get_variable('libclist_dep') 21 | dep_crbtree = sub_crbtree.get_variable('libcrbtree_dep') 22 | dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep') 23 | dep_cstdaux = sub_cstdaux.get_variable('libcstdaux_dep') 24 | 25 | use_ebpf = get_option('ebpf') 26 | 27 | subdir('src') 28 | -------------------------------------------------------------------------------- /meson_options.txt: -------------------------------------------------------------------------------- 1 | option('ebpf', type: 'boolean', value: true, description: 'Enable eBPF packet filtering') 2 | -------------------------------------------------------------------------------- /src/libnacd.sym: -------------------------------------------------------------------------------- 1 | LIBNACD_2 { 2 | global: 3 | n_acd_config_new; 4 | n_acd_config_free; 5 | n_acd_config_set_ifindex; 6 | n_acd_config_set_transport; 7 | n_acd_config_set_mac; 8 | 9 | n_acd_probe_config_new; 10 | n_acd_probe_config_free; 11 | n_acd_probe_config_set_ip; 12 | n_acd_probe_config_set_timeout; 13 | 14 | n_acd_new; 15 | n_acd_ref; 16 | n_acd_unref; 17 | n_acd_get_fd; 18 | n_acd_dispatch; 19 | n_acd_pop_event; 20 | n_acd_probe; 21 | 22 | n_acd_probe_free; 23 | n_acd_probe_set_userdata; 24 | n_acd_probe_get_userdata; 25 | n_acd_probe_announce; 26 | local: 27 | *; 28 | }; 29 | -------------------------------------------------------------------------------- /src/meson.build: -------------------------------------------------------------------------------- 1 | # 2 | # target: libnacd.so 3 | # 4 | 5 | libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym') 6 | 7 | libnacd_deps = [ 8 | dep_clist, 9 | dep_crbtree, 10 | dep_csiphash, 11 | dep_cstdaux, 12 | ] 13 | 14 | libnacd_sources = [ 15 | 'n-acd.c', 16 | 'n-acd-probe.c', 17 | 'util/timer.c', 18 | ] 19 | 20 | if use_ebpf 21 | libnacd_sources += [ 22 | 'n-acd-bpf.c', 23 | ] 24 | else 25 | libnacd_sources += [ 26 | 'n-acd-bpf-fallback.c', 27 | ] 28 | endif 29 | 30 | libnacd_private = static_library( 31 | 'nacd-private', 32 | libnacd_sources, 33 | c_args: [ 34 | '-fvisibility=hidden', 35 | '-fno-common' 36 | ], 37 | dependencies: libnacd_deps, 38 | pic: true, 39 | ) 40 | 41 | libnacd_shared = shared_library( 42 | 'nacd', 43 | objects: libnacd_private.extract_all_objects(), 44 | dependencies: libnacd_deps, 45 | install: not meson.is_subproject(), 46 | soversion: 0, 47 | link_depends: libnacd_symfile, 48 | link_args: [ 49 | '-Wl,--no-undefined', 50 | '-Wl,--version-script=@0@'.format(libnacd_symfile) 51 | ], 52 | ) 53 | 54 | libnacd_dep = declare_dependency( 55 | include_directories: include_directories('.'), 56 | link_with: libnacd_private, 57 | dependencies: libnacd_deps, 58 | version: meson.project_version(), 59 | ) 60 | 61 | if not meson.is_subproject() 62 | install_headers('n-acd.h') 63 | 64 | mod_pkgconfig.generate( 65 | libraries: libnacd_shared, 66 | version: meson.project_version(), 67 | name: 'libnacd', 68 | filebase: 'libnacd', 69 | description: project_description, 70 | ) 71 | endif 72 | 73 | # 74 | # target: test-* 75 | # 76 | 77 | test_api = executable('test-api', ['test-api.c'], link_with: libnacd_shared) 78 | test('API Symbol Visibility', test_api) 79 | 80 | if use_ebpf 81 | test_bpf = executable('test-bpf', ['test-bpf.c'], dependencies: libnacd_dep) 82 | test('eBPF socket filtering', test_bpf) 83 | endif 84 | 85 | test_loopback = executable('test-loopback', ['test-loopback.c'], dependencies: libnacd_dep) 86 | test('Echo Suppression via Loopback', test_loopback) 87 | 88 | test_timer = executable('test-timer', ['util/test-timer.c'], dependencies: libnacd_dep) 89 | test('Timer helper', test_timer) 90 | 91 | #test_unplug = executable('test-unplug', ['test-unplug.c'], dependencies: libnacd_dep) 92 | #test('Async Interface Hotplug', test_unplug) 93 | 94 | test_veth = executable('test-veth', ['test-veth.c'], dependencies: libnacd_dep) 95 | test('Parallel ACD instances', test_veth) 96 | -------------------------------------------------------------------------------- /src/n-acd-bpf-fallback.c: -------------------------------------------------------------------------------- 1 | /* 2 | * A noop implementation of eBPF filter for IPv4 Address Conflict Detection 3 | * 4 | * These are a collection of dummy functions that have no effect, but allows 5 | * n-acd to compile without eBPF support. 6 | * 7 | * See n-acd-bpf.c for documentation. 8 | */ 9 | 10 | #include 11 | #include 12 | #include "n-acd-private.h" 13 | 14 | int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) { 15 | *mapfdp = -1; 16 | return 0; 17 | } 18 | 19 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) { 20 | return 0; 21 | } 22 | 23 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) { 24 | return 0; 25 | } 26 | 27 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) { 28 | *progfdp = -1; 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /src/n-acd-bpf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * eBPF filter for IPv4 Address Conflict Detection 3 | * 4 | * An eBPF map and an eBPF program are provided. The map contains all the 5 | * addresses address conflict detection is performed on, and the program 6 | * filters out all packets except exactly the packets relevant to the ACD 7 | * protocol on the addresses currently in the map. 8 | * 9 | * Note that userspace still has to filter the incoming packets, as filter 10 | * are applied when packets are queued on the socket, not when userspace 11 | * receives them. It is therefore possible to receive packets about addresses 12 | * that have already been removed. 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "n-acd-private.h" 27 | 28 | #define BPF_LD_ABS(SIZE, IMM) \ 29 | ((struct bpf_insn) { \ 30 | .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ 31 | .dst_reg = 0, \ 32 | .src_reg = 0, \ 33 | .off = 0, \ 34 | .imm = IMM, \ 35 | }) 36 | 37 | #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 38 | ((struct bpf_insn) { \ 39 | .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ 40 | .dst_reg = DST, \ 41 | .src_reg = SRC, \ 42 | .off = OFF, \ 43 | .imm = 0, \ 44 | }) 45 | 46 | #define BPF_LD_MAP_FD(DST, MAP_FD) \ 47 | ((struct bpf_insn) { \ 48 | .code = BPF_LD | BPF_DW | BPF_IMM, \ 49 | .dst_reg = DST, \ 50 | .src_reg = BPF_PSEUDO_MAP_FD, \ 51 | .off = 0, \ 52 | .imm = (__u32) (MAP_FD), \ 53 | }), \ 54 | ((struct bpf_insn) { \ 55 | .code = 0, /* zero is reserved opcode */ \ 56 | .dst_reg = 0, \ 57 | .src_reg = 0, \ 58 | .off = 0, \ 59 | .imm = ((__u64) (MAP_FD)) >> 32, \ 60 | }) 61 | 62 | #define BPF_ALU_REG(OP, DST, SRC) \ 63 | ((struct bpf_insn) { \ 64 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ 65 | .dst_reg = DST, \ 66 | .src_reg = SRC, \ 67 | .off = 0, \ 68 | .imm = 0, \ 69 | }) 70 | 71 | #define BPF_ALU_IMM(OP, DST, IMM) \ 72 | ((struct bpf_insn) { \ 73 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ 74 | .dst_reg = DST, \ 75 | .src_reg = 0, \ 76 | .off = 0, \ 77 | .imm = IMM, \ 78 | }) 79 | 80 | #define BPF_MOV_REG(DST, SRC) \ 81 | ((struct bpf_insn) { \ 82 | .code = BPF_ALU64 | BPF_MOV | BPF_X, \ 83 | .dst_reg = DST, \ 84 | .src_reg = SRC, \ 85 | .off = 0, \ 86 | .imm = 0, \ 87 | }) 88 | 89 | #define BPF_MOV_IMM(DST, IMM) \ 90 | ((struct bpf_insn) { \ 91 | .code = BPF_ALU64 | BPF_MOV | BPF_K, \ 92 | .dst_reg = DST, \ 93 | .src_reg = 0, \ 94 | .off = 0, \ 95 | .imm = IMM, \ 96 | }) 97 | 98 | #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ 99 | ((struct bpf_insn) { \ 100 | .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ 101 | .dst_reg = DST, \ 102 | .src_reg = SRC, \ 103 | .off = OFF, \ 104 | .imm = 0, \ 105 | }) 106 | 107 | #define BPF_JMP_REG(OP, DST, SRC, OFF) \ 108 | ((struct bpf_insn) { \ 109 | .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ 110 | .dst_reg = DST, \ 111 | .src_reg = SRC, \ 112 | .off = OFF, \ 113 | .imm = 0, \ 114 | }) 115 | 116 | #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ 117 | ((struct bpf_insn) { \ 118 | .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ 119 | .dst_reg = DST, \ 120 | .src_reg = 0, \ 121 | .off = OFF, \ 122 | .imm = IMM, \ 123 | }) 124 | 125 | #define BPF_EMIT_CALL(FUNC) \ 126 | ((struct bpf_insn) { \ 127 | .code = BPF_JMP | BPF_CALL, \ 128 | .dst_reg = 0, \ 129 | .src_reg = 0, \ 130 | .off = 0, \ 131 | .imm = FUNC, \ 132 | }) 133 | 134 | #define BPF_EXIT_INSN() \ 135 | ((struct bpf_insn) { \ 136 | .code = BPF_JMP | BPF_EXIT, \ 137 | .dst_reg = 0, \ 138 | .src_reg = 0, \ 139 | .off = 0, \ 140 | .imm = 0, \ 141 | }) 142 | 143 | static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) { 144 | return (int)syscall(__NR_bpf, cmd, attr, size); 145 | } 146 | 147 | int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) { 148 | union bpf_attr attr; 149 | int mapfd; 150 | 151 | memset(&attr, 0, sizeof(attr)); 152 | attr = (union bpf_attr){ 153 | .map_type = BPF_MAP_TYPE_HASH, 154 | .key_size = sizeof(uint32_t), 155 | .value_size = sizeof(uint8_t), /* values are never used, but must be set */ 156 | .max_entries = max_entries, 157 | }; 158 | 159 | mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 160 | if (mapfd < 0) 161 | return -errno; 162 | 163 | *mapfdp = mapfd; 164 | return 0; 165 | } 166 | 167 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) { 168 | union bpf_attr attr; 169 | uint32_t addr = be32toh(addrp->s_addr); 170 | uint8_t _dummy = 0; 171 | int r; 172 | 173 | memset(&attr, 0, sizeof(attr)); 174 | attr = (union bpf_attr){ 175 | .map_fd = mapfd, 176 | .key = (uint64_t)(unsigned long)&addr, 177 | .value = (uint64_t)(unsigned long)&_dummy, 178 | .flags = BPF_NOEXIST, 179 | }; 180 | 181 | r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); 182 | if (r < 0) 183 | return -errno; 184 | 185 | return 0; 186 | } 187 | 188 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) { 189 | uint32_t addr = be32toh(addrp->s_addr); 190 | union bpf_attr attr; 191 | int r; 192 | 193 | memset(&attr, 0, sizeof(attr)); 194 | attr = (union bpf_attr){ 195 | .map_fd = mapfd, 196 | .key = (uint64_t)(unsigned long)&addr, 197 | }; 198 | 199 | r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); 200 | if (r < 0) 201 | return -errno; 202 | 203 | return 0; 204 | } 205 | 206 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) { 207 | const union { 208 | uint8_t u8[6]; 209 | uint16_t u16[3]; 210 | uint32_t u32[1]; 211 | } mac = { 212 | .u8 = { 213 | macp->ether_addr_octet[0], 214 | macp->ether_addr_octet[1], 215 | macp->ether_addr_octet[2], 216 | macp->ether_addr_octet[3], 217 | macp->ether_addr_octet[4], 218 | macp->ether_addr_octet[5], 219 | }, 220 | }; 221 | struct bpf_insn prog[] = { 222 | /* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */ 223 | BPF_MOV_REG(6, 1), /* r6 = r1 */ 224 | 225 | /* drop the packet if it is too short */ 226 | BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)), /* r0 = skb->len */ 227 | BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2), /* if (r0 >= sizeof(ether_arp)) skip 2 */ 228 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 229 | BPF_EXIT_INSN(), /* return */ 230 | 231 | /* drop the packet if the header is not as expected */ 232 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)), /* r0 = header type */ 233 | BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2), /* if (r0 == ethernet) skip 2 */ 234 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 235 | BPF_EXIT_INSN(), /* return */ 236 | 237 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)), /* r0 = protocol */ 238 | BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2), /* if (r0 == IP) skip 2 */ 239 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 240 | BPF_EXIT_INSN(), /* return */ 241 | 242 | BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)), /* r0 = hw addr length */ 243 | BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2), /* if (r0 == sizeof(ether_addr)) skip 2 */ 244 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 245 | BPF_EXIT_INSN(), /* return */ 246 | 247 | BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)), /* r0 = protocol addr length */ 248 | BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2), /* if (r0 == sizeof(in_addr)) skip 2 */ 249 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 250 | BPF_EXIT_INSN(), /* return */ 251 | 252 | /* drop packets from our own mac address */ 253 | BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)), /* r0 = first four bytes of packet mac address */ 254 | BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4), /* if (r0 != first four bytes of our mac address) skip 4 */ 255 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4), /* r0 = last two bytes of packet mac address */ 256 | BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2), /* if (r0 != last two bytes of our mac address) skip 2 */ 257 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 258 | BPF_EXIT_INSN(), /* return */ 259 | 260 | /* 261 | * We listen for two kinds of packets: 262 | * Conflicts) 263 | * These are requests or replies with the sender address not set to INADDR_ANY. The 264 | * conflicted address is the sender address, remember this in r7. 265 | * Probes) 266 | * These are requests with the sender address set to INADDR_ANY. The probed address 267 | * is the target address, remember this in r7. 268 | * Any other packets are dropped. 269 | */ 270 | BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)), /* r0 = sender ip address */ 271 | BPF_JMP_IMM(BPF_JEQ, 0, 0, 7), /* if (r0 == 0) skip 7 */ 272 | BPF_MOV_REG(7, 0), /* r7 = r0 */ 273 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */ 274 | BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3), /* if (r0 == request) skip 3 */ 275 | BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2), /* if (r0 == reply) skip 2 */ 276 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 277 | BPF_EXIT_INSN(), /* return */ 278 | BPF_JMP_IMM(BPF_JA, 0, 0, 6), /* skip 6 */ 279 | BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)), /* r0 = target ip address */ 280 | BPF_MOV_REG(7, 0), /* r7 = r0 */ 281 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */ 282 | BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2), /* if (r0 == request) skip 2 */ 283 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 284 | BPF_EXIT_INSN(), /* return */ 285 | 286 | /* check if the probe or conflict is for an address we are monitoring */ 287 | BPF_STX_MEM(BPF_W, 10, 7, -4), /* *(uint32_t*)fp - 4 = r7 */ 288 | BPF_MOV_REG(2, 10), /* r2 = fp */ 289 | BPF_ALU_IMM(BPF_ADD, 2, -4), /* r2 -= 4 */ 290 | BPF_LD_MAP_FD(1, mapfd), /* r1 = mapfd */ 291 | BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), /* r0 = map_lookup_elem(r1, r2) */ 292 | BPF_JMP_IMM(BPF_JNE, 0, 0, 2), /* if (r0 != NULL) skip 2 */ 293 | BPF_MOV_IMM(0, 0), /* r0 = 0 */ 294 | BPF_EXIT_INSN(), /* return */ 295 | 296 | /* return exactly the packet length*/ 297 | BPF_MOV_IMM(0, sizeof(struct ether_arp)), /* r0 = sizeof(struct ether_arp) */ 298 | BPF_EXIT_INSN(), /* return */ 299 | }; 300 | union bpf_attr attr; 301 | int progfd; 302 | 303 | memset(&attr, 0, sizeof(attr)); 304 | attr = (union bpf_attr){ 305 | .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 306 | .insns = (uint64_t)(unsigned long)prog, 307 | .insn_cnt = sizeof(prog) / sizeof(*prog), 308 | .license = (uint64_t)(unsigned long)"ASL", 309 | }; 310 | 311 | progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 312 | if (progfd < 0) 313 | return -errno; 314 | 315 | *progfdp = progfd; 316 | return 0; 317 | } 318 | -------------------------------------------------------------------------------- /src/n-acd-private.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "util/timer.h" 13 | #include "n-acd.h" 14 | 15 | typedef struct NAcdEventNode NAcdEventNode; 16 | 17 | /* This augments the error-codes with internal ones that are never exposed. */ 18 | enum { 19 | _N_ACD_INTERNAL = _N_ACD_E_N, 20 | 21 | N_ACD_E_DROPPED, 22 | }; 23 | 24 | enum { 25 | N_ACD_PROBE_STATE_PROBING, 26 | N_ACD_PROBE_STATE_CONFIGURING, 27 | N_ACD_PROBE_STATE_ANNOUNCING, 28 | N_ACD_PROBE_STATE_FAILED, 29 | }; 30 | 31 | struct NAcdConfig { 32 | int ifindex; 33 | unsigned int transport; 34 | uint8_t mac[ETH_ALEN]; 35 | size_t n_mac; 36 | }; 37 | 38 | #define N_ACD_CONFIG_NULL(_x) { \ 39 | .transport = _N_ACD_TRANSPORT_N, \ 40 | } 41 | 42 | struct NAcdProbeConfig { 43 | struct in_addr ip; 44 | uint64_t timeout_msecs; 45 | }; 46 | 47 | #define N_ACD_PROBE_CONFIG_NULL(_x) { \ 48 | .timeout_msecs = N_ACD_TIMEOUT_RFC5227, \ 49 | } 50 | 51 | struct NAcdEventNode { 52 | CList acd_link; 53 | CList probe_link; 54 | NAcdEvent event; 55 | uint8_t sender[ETH_ALEN]; 56 | bool is_public : 1; 57 | }; 58 | 59 | #define N_ACD_EVENT_NODE_NULL(_x) { \ 60 | .acd_link = C_LIST_INIT((_x).acd_link), \ 61 | .probe_link = C_LIST_INIT((_x).probe_link), \ 62 | } 63 | 64 | struct NAcd { 65 | unsigned long n_refs; 66 | unsigned int seed; 67 | int fd_epoll; 68 | int fd_socket; 69 | CRBTree ip_tree; 70 | CList event_list; 71 | Timer timer; 72 | 73 | /* BPF map */ 74 | int fd_bpf_map; 75 | size_t n_bpf_map; 76 | size_t max_bpf_map; 77 | 78 | /* configuration */ 79 | int ifindex; 80 | uint8_t mac[ETH_ALEN]; 81 | 82 | /* flags */ 83 | bool preempted : 1; 84 | }; 85 | 86 | #define N_ACD_NULL(_x) { \ 87 | .n_refs = 1, \ 88 | .fd_epoll = -1, \ 89 | .fd_socket = -1, \ 90 | .ip_tree = C_RBTREE_INIT, \ 91 | .event_list = C_LIST_INIT((_x).event_list), \ 92 | .timer = TIMER_NULL((_x).timer), \ 93 | .fd_bpf_map = -1, \ 94 | } 95 | 96 | struct NAcdProbe { 97 | NAcd *acd; 98 | CRBNode ip_node; 99 | CList event_list; 100 | Timeout timeout; 101 | 102 | /* configuration */ 103 | struct in_addr ip; 104 | uint64_t timeout_multiplier; 105 | void *userdata; 106 | 107 | /* state */ 108 | unsigned int state; 109 | unsigned int n_iteration; 110 | unsigned int defend; 111 | uint64_t last_defend; 112 | }; 113 | 114 | #define N_ACD_PROBE_NULL(_x) { \ 115 | .ip_node = C_RBNODE_INIT((_x).ip_node), \ 116 | .event_list = C_LIST_INIT((_x).event_list), \ 117 | .timeout = TIMEOUT_INIT((_x).timeout), \ 118 | .state = N_ACD_PROBE_STATE_PROBING, \ 119 | .defend = N_ACD_DEFEND_NEVER, \ 120 | } 121 | 122 | /* events */ 123 | 124 | int n_acd_event_node_new(NAcdEventNode **nodep); 125 | NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node); 126 | 127 | /* contexts */ 128 | 129 | void n_acd_remember(NAcd *acd, uint64_t now, bool success); 130 | int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event); 131 | int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa); 132 | int n_acd_ensure_bpf_map_space(NAcd *acd); 133 | 134 | /* probes */ 135 | 136 | int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config); 137 | int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event); 138 | int n_acd_probe_handle_timeout(NAcdProbe *probe); 139 | int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict); 140 | 141 | /* eBPF */ 142 | 143 | int n_acd_bpf_map_create(int *mapfdp, size_t max_elements); 144 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addr); 145 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addr); 146 | 147 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *mac); 148 | 149 | /* inline helpers */ 150 | 151 | static inline void n_acd_event_node_freep(NAcdEventNode **node) { 152 | if (*node) 153 | n_acd_event_node_free(*node); 154 | } 155 | -------------------------------------------------------------------------------- /src/n-acd-probe.c: -------------------------------------------------------------------------------- 1 | /* 2 | * IPv4 Address Conflict Detection 3 | * 4 | * This file implements the probe object. A probe is basically the 5 | * state-machine of a single ACD run. It takes an address to probe for, checks 6 | * for conflicts and then defends it once configured. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "n-acd.h" 24 | #include "n-acd-private.h" 25 | 26 | /* 27 | * These parameters and timing intervals are specified in RFC-5227. The 28 | * original values are: 29 | * 30 | * PROBE_NUM 3 31 | * PROBE_WAIT 1s 32 | * PROBE_MIN 1s 33 | * PROBE_MAX 3s 34 | * ANNOUNCE_NUM 3 35 | * ANNOUNCE_WAIT 2s 36 | * ANNOUNCE_INTERVAL 2s 37 | * MAX_CONFLICTS 10 38 | * RATE_LIMIT_INTERVAL 60s 39 | * DEFEND_INTERVAL 10s 40 | * 41 | * If we assume a best-case and worst-case scenario for non-conflicted runs, we 42 | * end up with a runtime between 4s and 9s to finish the probe. Then it still 43 | * takes a fixed 4s to finish the announcements. 44 | * 45 | * RFC 5227 section 1.1: 46 | * [...] (Note that the values listed here are fixed constants; they are 47 | * not intended to be modifiable by implementers, operators, or end users. 48 | * These constants are given symbolic names here to facilitate the writing 49 | * of future standards that may want to reference this document with 50 | * different values for these named constants; however, at the present time 51 | * no such future standards exist.) [...] 52 | * 53 | * Unfortunately, no-one ever stepped up to write a "future standard" to revise 54 | * the timings. A 9s timeout for successful link setups is not acceptable today. 55 | * Hence, we will just go forward and ignore the proposed values. On both 56 | * wired and wireless local links round-trip latencies of below 3ms are common. 57 | * We require the caller to set a timeout multiplier, where 1 corresponds to a 58 | * total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier 59 | * of about 100 should be a reasonable default. To comply with the RFC select a 60 | * multiplier of 9000. 61 | */ 62 | #define N_ACD_RFC_PROBE_NUM (3) 63 | #define N_ACD_RFC_PROBE_WAIT_NSEC (UINT64_C(111111)) /* 1/9 ms */ 64 | #define N_ACD_RFC_PROBE_MIN_NSEC (UINT64_C(111111)) /* 1/9 ms */ 65 | #define N_ACD_RFC_PROBE_MAX_NSEC (UINT64_C(333333)) /* 3/9 ms */ 66 | #define N_ACD_RFC_ANNOUNCE_NUM (3) 67 | #define N_ACD_RFC_ANNOUNCE_WAIT_NSEC (UINT64_C(222222)) /* 2/9 ms */ 68 | #define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC (UINT64_C(222222)) /* 2/9 ms */ 69 | #define N_ACD_RFC_MAX_CONFLICTS (10) 70 | #define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC (UINT64_C(60000000000)) /* 60s */ 71 | #define N_ACD_RFC_DEFEND_INTERVAL_NSEC (UINT64_C(10000000000)) /* 10s */ 72 | 73 | /** 74 | * n_acd_probe_config_new() - create probe configuration 75 | * @configp: output argument for new probe configuration 76 | * 77 | * This creates a new probe configuration. It will be returned in @configp to 78 | * the caller, which upon return fully owns the object. 79 | * 80 | * A probe configuration collects parameters for probes. It never validates the 81 | * input, but this is left to the consumer of the configuration to do. 82 | * 83 | * Return: 0 on success, negative error code on failure. 84 | */ 85 | _c_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) { 86 | _c_cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL; 87 | 88 | config = malloc(sizeof(*config)); 89 | if (!config) 90 | return -ENOMEM; 91 | 92 | *config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config); 93 | 94 | *configp = config; 95 | config = NULL; 96 | return 0; 97 | } 98 | 99 | /** 100 | * n_acd_probe_config_free() - destroy probe configuration 101 | * @config: configuration to operate on, or NULL 102 | * 103 | * This destroys the probe configuration and all associated objects. If @config 104 | * is NULL, this is a no-op. 105 | * 106 | * Return: NULL is returned. 107 | */ 108 | _c_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) { 109 | if (!config) 110 | return NULL; 111 | 112 | free(config); 113 | 114 | return NULL; 115 | } 116 | 117 | /** 118 | * n_acd_probe_config_set_ip() - set ip property 119 | * @config: configuration to operate on 120 | * @ip: ip to set 121 | * 122 | * This sets the IP property to the value `ip`. The address is copied into the 123 | * configuration object. No validation is performed. 124 | * 125 | * The IP property selects the IP address that a probe checks for. It is the 126 | * caller's responsibility to guarantee the address is valid and can be used. 127 | */ 128 | _c_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) { 129 | config->ip = ip; 130 | } 131 | 132 | /** 133 | * n_acd_probe_config_set_timeout() - set timeout property 134 | * @config: configuration to operate on 135 | * @msecs: timeout to set, in milliseconds 136 | * 137 | * This sets the timeout to use for a conflict detection probe. The 138 | * specification default is provided as `N_ACD_TIMEOUT_RFC5227` and corresponds 139 | * to 9 seconds. 140 | * 141 | * If set to 0, conflict detection is skipped and the address is immediately 142 | * advertised and defended. 143 | * 144 | * Depending on the transport used, the API user should select a suitable 145 | * timeout. Since `ACD` only operates on the link layer, timeouts in the 146 | * hundreds of milliseconds range should be more than enough for any modern 147 | * network. Note that increasing this value directly affects the time it takes 148 | * to connect to a network, since an address should not be used unless conflict 149 | * detection finishes. 150 | * 151 | * Using the specification default is **discouraged**. It is way too slow and 152 | * not appropriate for modern networks. 153 | * 154 | * Default value is `N_ACD_TIMEOUT_RFC5227`. 155 | */ 156 | _c_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) { 157 | config->timeout_msecs = msecs; 158 | } 159 | 160 | static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) { 161 | uint64_t n_time; 162 | 163 | timer_now(&probe->acd->timer, &n_time); 164 | n_time += n_timeout; 165 | 166 | /* 167 | * ACD specifies jitter values to reduce packet storms on the local 168 | * link. This call accepts the maximum relative jitter value in 169 | * nanoseconds as @n_jitter. We then use rand_r(3p) to get a 170 | * pseudo-random jitter on top of the real timeout given as @n_timeout. 171 | */ 172 | if (n_jitter) { 173 | uint64_t random; 174 | 175 | random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed); 176 | n_time += random % n_jitter; 177 | } 178 | 179 | timeout_schedule(&probe->timeout, &probe->acd->timer, n_time); 180 | } 181 | 182 | static void n_acd_probe_unschedule(NAcdProbe *probe) { 183 | timeout_unschedule(&probe->timeout); 184 | } 185 | 186 | static bool n_acd_probe_is_unique(NAcdProbe *probe) { 187 | NAcdProbe *sibling; 188 | 189 | if (!c_rbnode_is_linked(&probe->ip_node)) 190 | return false; 191 | 192 | sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node); 193 | if (sibling && sibling->ip.s_addr == probe->ip.s_addr) 194 | return false; 195 | 196 | sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node); 197 | if (sibling && sibling->ip.s_addr == probe->ip.s_addr) 198 | return false; 199 | 200 | return true; 201 | } 202 | 203 | static int n_acd_probe_link(NAcdProbe *probe) { 204 | int r; 205 | 206 | /* 207 | * Make sure the kernel bpf map has space for at least one more 208 | * entry. 209 | */ 210 | r = n_acd_ensure_bpf_map_space(probe->acd); 211 | if (r) 212 | return r; 213 | 214 | /* 215 | * Link entry into context, indexed by its IP. Note that we allow 216 | * duplicates just fine. It is up to you to decide whether to avoid 217 | * duplicates, if you don't want them. Duplicates on the same context 218 | * do not conflict with each other, though. 219 | */ 220 | { 221 | CRBNode **slot, *parent; 222 | NAcdProbe *other; 223 | 224 | slot = &probe->acd->ip_tree.root; 225 | parent = NULL; 226 | while (*slot) { 227 | other = c_rbnode_entry(*slot, NAcdProbe, ip_node); 228 | parent = *slot; 229 | if (probe->ip.s_addr < other->ip.s_addr) 230 | slot = &(*slot)->left; 231 | else 232 | slot = &(*slot)->right; 233 | } 234 | 235 | c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node); 236 | } 237 | 238 | /* 239 | * Add the ip address to the map, if it is not already there. 240 | */ 241 | if (n_acd_probe_is_unique(probe)) { 242 | r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip); 243 | if (r) { 244 | /* 245 | * Make sure the IP address is linked in userspace iff 246 | * it is linked in the kernel. 247 | */ 248 | c_rbnode_unlink(&probe->ip_node); 249 | return r; 250 | } 251 | ++probe->acd->n_bpf_map; 252 | } 253 | 254 | return 0; 255 | } 256 | 257 | static void n_acd_probe_unlink(NAcdProbe *probe) { 258 | int r; 259 | 260 | /* 261 | * If this is the only probe for a given IP, remove the IP from the 262 | * kernel BPF map. 263 | */ 264 | if (n_acd_probe_is_unique(probe)) { 265 | r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip); 266 | c_assert(r >= 0); 267 | --probe->acd->n_bpf_map; 268 | } 269 | c_rbnode_unlink(&probe->ip_node); 270 | } 271 | 272 | int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) { 273 | _c_cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL; 274 | int r; 275 | 276 | if (!config->ip.s_addr) 277 | return N_ACD_E_INVALID_ARGUMENT; 278 | 279 | probe = malloc(sizeof(*probe)); 280 | if (!probe) 281 | return -ENOMEM; 282 | 283 | *probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe); 284 | probe->acd = n_acd_ref(acd); 285 | probe->ip = config->ip; 286 | 287 | /* 288 | * We use the provided timeout-length as multiplier for all our 289 | * timeouts. The provided timeout defines the maximum length of an 290 | * entire probe-interval until the first announcement. Given the 291 | * spec-provided parameters, this ends up as: 292 | * 293 | * PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT 294 | * = 1s + 3s + 3s + 2s 295 | * = 9s 296 | * 297 | * Hence, the default value for this timeout is 9000ms, which just 298 | * ends up matching the spec-provided values. 299 | * 300 | * What we now semantically do is divide this timeout by 1ns/1000000. 301 | * This first turns it into nanoseconds, then strips the unit by 302 | * turning it into a multiplier. However, rather than performing the 303 | * division here, we multiplier all our timeouts by 1000000 statically 304 | * at compile time. Therefore, we can use the user-provided timeout as 305 | * unmodified multiplier. No conversion necessary. 306 | */ 307 | probe->timeout_multiplier = config->timeout_msecs; 308 | 309 | r = n_acd_probe_link(probe); 310 | if (r) 311 | return r; 312 | 313 | /* 314 | * Now that everything is set up, we have to send the first probe. This 315 | * is done after ~PROBE_WAIT seconds, hence we schedule our timer. 316 | * In case no timeout-multiplier is set, we pretend we already sent all 317 | * probes successfully and schedule the timer so we proceed with the 318 | * announcements. We must schedule a fake timer there, since we are not 319 | * allowed to advance the state machine outside of n_acd_dispatch(). 320 | */ 321 | if (probe->timeout_multiplier) { 322 | probe->n_iteration = 0; 323 | n_acd_probe_schedule(probe, 324 | 0, 325 | probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC); 326 | } else { 327 | probe->n_iteration = N_ACD_RFC_PROBE_NUM; 328 | n_acd_probe_schedule(probe, 0, 0); 329 | } 330 | 331 | *probep = probe; 332 | probe = NULL; 333 | return 0; 334 | } 335 | 336 | /** 337 | * n_acd_probe_free() - destroy a probe 338 | * @probe: probe to operate on, or NULL 339 | * 340 | * This destroys the probe specified by @probe. All operations are immediately 341 | * ceded and all associated objects are released. 342 | * 343 | * If @probe is NULL, this is a no-op. 344 | * 345 | * This function will flush all events associated with @probe from the event 346 | * queue. That is, no events will be returned for this @probe anymore. 347 | * 348 | * Return: NULL is returned. 349 | */ 350 | _c_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) { 351 | NAcdEventNode *node, *t_node; 352 | 353 | if (!probe) 354 | return NULL; 355 | 356 | c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link) 357 | n_acd_event_node_free(node); 358 | 359 | n_acd_probe_unschedule(probe); 360 | n_acd_probe_unlink(probe); 361 | probe->acd = n_acd_unref(probe->acd); 362 | free(probe); 363 | 364 | return NULL; 365 | } 366 | 367 | int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) { 368 | _c_cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL; 369 | int r; 370 | 371 | r = n_acd_raise(probe->acd, &node, event); 372 | if (r) 373 | return r; 374 | 375 | switch (event) { 376 | case N_ACD_EVENT_READY: 377 | node->event.ready.probe = probe; 378 | break; 379 | case N_ACD_EVENT_USED: 380 | node->event.used.probe = probe; 381 | break; 382 | case N_ACD_EVENT_DEFENDED: 383 | node->event.defended.probe = probe; 384 | break; 385 | case N_ACD_EVENT_CONFLICT: 386 | node->event.conflict.probe = probe; 387 | break; 388 | default: 389 | c_assert(0); 390 | return -ENOTRECOVERABLE; 391 | } 392 | 393 | c_list_link_tail(&probe->event_list, &node->probe_link); 394 | 395 | if (nodep) 396 | *nodep = node; 397 | node = NULL; 398 | return 0; 399 | } 400 | 401 | int n_acd_probe_handle_timeout(NAcdProbe *probe) { 402 | int r; 403 | 404 | switch (probe->state) { 405 | case N_ACD_PROBE_STATE_PROBING: 406 | /* 407 | * We are still PROBING. We send 3 probes with a random timeout 408 | * scheduled between each. If, after a fixed timeout, we did 409 | * not receive any conflict we consider the probing successful. 410 | */ 411 | if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) { 412 | /* 413 | * We have not sent all 3 probes, yet. A timer fired, 414 | * so we are ready to send the next probe. If this is 415 | * the third probe, schedule a timer for ANNOUNCE_WAIT 416 | * to give other peers a chance to answer. If this is 417 | * not the third probe, wait between PROBE_MIN and 418 | * PROBE_MAX for the next probe. 419 | */ 420 | 421 | r = n_acd_send(probe->acd, &probe->ip, NULL); 422 | if (r) { 423 | if (r != N_ACD_E_DROPPED) 424 | return r; 425 | 426 | /* 427 | * Packet was dropped, and we know about it. It 428 | * never reached the network. Reasons are 429 | * manifold, and n_acd_send() raises events if 430 | * necessary. 431 | * From a probe-perspective, we simply pretend 432 | * we never sent the probe and schedule a 433 | * timeout for the next probe, effectively 434 | * doubling a single probe-interval. 435 | */ 436 | } else { 437 | /* Successfully sent, so advance counter. */ 438 | ++probe->n_iteration; 439 | } 440 | 441 | if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) 442 | n_acd_probe_schedule(probe, 443 | probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC, 444 | probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC)); 445 | else 446 | n_acd_probe_schedule(probe, 447 | probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC, 448 | 0); 449 | } else { 450 | /* 451 | * All 3 probes succeeded and we waited enough to 452 | * consider this address usable by now. Do not announce 453 | * the address, yet. We must first give the caller a 454 | * chance to configure the address (so they can answer 455 | * ARP requests), before announcing it. 456 | */ 457 | r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY); 458 | if (r) 459 | return r; 460 | 461 | probe->state = N_ACD_PROBE_STATE_CONFIGURING; 462 | } 463 | 464 | break; 465 | 466 | case N_ACD_PROBE_STATE_ANNOUNCING: 467 | /* 468 | * We are ANNOUNCING, meaning the caller configured the address 469 | * on the interface and is actively using it. We send 3 470 | * announcements out, in a short interval, and then just 471 | * perform passive conflict detection. 472 | * Note that once all 3 announcements are sent, we no longer 473 | * schedule a timer, so this part should not trigger, anymore. 474 | */ 475 | 476 | r = n_acd_send(probe->acd, &probe->ip, &probe->ip); 477 | if (r) { 478 | if (r != N_ACD_E_DROPPED) 479 | return r; 480 | 481 | /* 482 | * See above in STATE_PROBING for details. We know the 483 | * packet was never sent, so we simply try again after 484 | * extending the timer. 485 | */ 486 | } else { 487 | /* Successfully sent, so advance counter. */ 488 | ++probe->n_iteration; 489 | } 490 | 491 | if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) { 492 | /* 493 | * Announcements are always scheduled according to the 494 | * time-intervals specified in the spec. We always use 495 | * the RFC5227-mandated multiplier. 496 | * If you reconsider this, note that timeout_multiplier 497 | * might be 0 here. 498 | */ 499 | n_acd_probe_schedule(probe, 500 | N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC, 501 | 0); 502 | } 503 | 504 | break; 505 | 506 | case N_ACD_PROBE_STATE_CONFIGURING: 507 | case N_ACD_PROBE_STATE_FAILED: 508 | default: 509 | /* 510 | * There are no timeouts in these states. If we trigger one, 511 | * something is fishy. 512 | */ 513 | c_assert(0); 514 | return -ENOTRECOVERABLE; 515 | } 516 | 517 | return 0; 518 | } 519 | 520 | int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) { 521 | NAcdEventNode *node; 522 | uint64_t now; 523 | int r; 524 | 525 | timer_now(&probe->acd->timer, &now); 526 | 527 | switch (probe->state) { 528 | case N_ACD_PROBE_STATE_PROBING: 529 | /* 530 | * Regardless whether this is a hard or soft conflict, we must 531 | * treat this as a probe failure. That is, notify the caller of 532 | * the conflict and wait for further instructions. We do not 533 | * react to this, until the caller tells us what to do, but we 534 | * do stop sending further probes. 535 | */ 536 | r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED); 537 | if (r) 538 | return r; 539 | 540 | node->event.used.sender = node->sender; 541 | node->event.used.n_sender = ETH_ALEN; 542 | memcpy(node->sender, packet->arp_sha, ETH_ALEN); 543 | 544 | n_acd_probe_unschedule(probe); 545 | n_acd_probe_unlink(probe); 546 | probe->state = N_ACD_PROBE_STATE_FAILED; 547 | 548 | break; 549 | 550 | case N_ACD_PROBE_STATE_CONFIGURING: 551 | /* 552 | * We are waiting for the caller to configure the interface and 553 | * start ANNOUNCING. In this state, we cannot defend the 554 | * address as that would indicate that it is ready to be used, 555 | * and we cannot signal CONFLICT or USED as the caller may 556 | * already have started to use the address (and may have 557 | * configured the engine to always defend it, which means they 558 | * should be able to rely on never losing it after READY). 559 | * Simply drop the event, and rely on the anticipated ANNOUNCE 560 | * to trigger it again. 561 | */ 562 | 563 | break; 564 | 565 | case N_ACD_PROBE_STATE_ANNOUNCING: { 566 | /* 567 | * We were already instructed to announce the address, which 568 | * means the address is configured and in use. Hence, the 569 | * caller is responsible to serve regular ARP queries. Meaning, 570 | * we can ignore any soft conflicts (other peers doing ACD). 571 | * 572 | * But if we see a hard-conflict, we either defend the address 573 | * according to the caller's instructions, or we report the 574 | * conflict and bail out. 575 | */ 576 | bool conflict = false, rate_limited = false; 577 | 578 | if (!hard_conflict) 579 | break; 580 | 581 | rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC; 582 | 583 | switch (probe->defend) { 584 | case N_ACD_DEFEND_NEVER: 585 | conflict = true; 586 | break; 587 | case N_ACD_DEFEND_ONCE: 588 | if (rate_limited) { 589 | conflict = true; 590 | break; 591 | } 592 | 593 | /* fallthrough */ 594 | case N_ACD_DEFEND_ALWAYS: 595 | if (!rate_limited) { 596 | r = n_acd_send(probe->acd, &probe->ip, &probe->ip); 597 | if (r) { 598 | if (r != N_ACD_E_DROPPED) 599 | return r; 600 | 601 | if (probe->defend == N_ACD_DEFEND_ONCE) { 602 | conflict = true; 603 | break; 604 | } 605 | } 606 | 607 | if (r != N_ACD_E_DROPPED) 608 | probe->last_defend = now; 609 | } 610 | 611 | r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED); 612 | if (r) 613 | return r; 614 | 615 | node->event.defended.sender = node->sender; 616 | node->event.defended.n_sender = ETH_ALEN; 617 | memcpy(node->sender, packet->arp_sha, ETH_ALEN); 618 | 619 | break; 620 | } 621 | 622 | if (conflict) { 623 | r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT); 624 | if (r) 625 | return r; 626 | 627 | node->event.conflict.sender = node->sender; 628 | node->event.conflict.n_sender = ETH_ALEN; 629 | memcpy(node->sender, packet->arp_sha, ETH_ALEN); 630 | 631 | n_acd_probe_unschedule(probe); 632 | n_acd_probe_unlink(probe); 633 | probe->state = N_ACD_PROBE_STATE_FAILED; 634 | } 635 | 636 | break; 637 | } 638 | 639 | case N_ACD_PROBE_STATE_FAILED: 640 | default: 641 | /* 642 | * We are not listening for packets in these states. If we receive one, 643 | * something is fishy. 644 | */ 645 | c_assert(0); 646 | return -ENOTRECOVERABLE; 647 | } 648 | 649 | return 0; 650 | } 651 | 652 | /** 653 | * n_acd_probe_set_userdata - set userdata 654 | * @probe: probe to operate on 655 | * @userdata: userdata pointer 656 | * 657 | * This can be used to set a caller-controlled user-data pointer on @probe. The 658 | * value of the pointer is never inspected or used by `n-acd` and is fully 659 | * under control of the caller. 660 | * 661 | * The default value is NULL. 662 | */ 663 | _c_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) { 664 | probe->userdata = userdata; 665 | } 666 | 667 | /** 668 | * n_acd_probe_get_userdata - get userdata 669 | * @probe: probe to operate on 670 | * 671 | * This queries the userdata pointer that was previously set through 672 | * n_acd_probe_set_userdata(). 673 | * 674 | * The default value is NULL. 675 | * 676 | * Return: The stored userdata pointer is returned. 677 | */ 678 | _c_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) { 679 | *userdatap = probe->userdata; 680 | } 681 | 682 | /** 683 | * n_acd_probe_announce() - announce the configured IP address 684 | * @probe: probe to operate on 685 | * @defend: defense policy 686 | * 687 | * Announce the IP address on the local link, and start defending it according 688 | * to the given policy, which mut be one of N_ACD_DEFEND_ONCE, 689 | * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS. 690 | * 691 | * This must be called in response to an N_ACD_EVENT_READY event, and only 692 | * after the given address has been configured on the given network interface. 693 | * 694 | * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defense policy 695 | * is invalid, negative error code on failure. 696 | */ 697 | _c_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) { 698 | if (defend >= _N_ACD_DEFEND_N) 699 | return N_ACD_E_INVALID_ARGUMENT; 700 | 701 | probe->state = N_ACD_PROBE_STATE_ANNOUNCING; 702 | probe->defend = defend; 703 | probe->n_iteration = 0; 704 | 705 | /* 706 | * We must schedule a fake-timeout, since we are not allowed to 707 | * advance the state-machine outside of n_acd_dispatch(). 708 | */ 709 | n_acd_probe_schedule(probe, 0, 0); 710 | 711 | return 0; 712 | } 713 | -------------------------------------------------------------------------------- /src/n-acd.c: -------------------------------------------------------------------------------- 1 | /* 2 | * IPv4 Address Conflict Detection 3 | * 4 | * This file contains the main context initialization and management functions, 5 | * as well as a bunch of utilities used through the n-acd modules. 6 | */ 7 | 8 | /** 9 | * DOC: IPv4 Address Conflict Detection 10 | * 11 | * The `n-acd` project implements the IPv4 Address Conflict Detection protocol 12 | * as defined in RFC-5227. The protocol originates in the IPv4 Link Local 13 | * Address selection but was later on generalized and resulted in `ACD`. The 14 | * idea is to use `ARP` to query a link for an address to see whether it 15 | * already exists on the network, as well as defending an address that is in 16 | * use on a network interface. Furthermore, `ACD` provides passive diagnostics 17 | * for administrators, as it will detect address conflicts automatically, which 18 | * then can be logged or shown to a user. 19 | * 20 | * The main context object of `n-acd` is the `NAcd` structure. It is a passive 21 | * ref-counted context object which drives `ACD` probes running on it. A 22 | * context is specific to a linux network device and transport. If multiple 23 | * network devices are used, then separate `NAcd` contexts must be deployed. 24 | * 25 | * The `NAcdProbe` object drives a single `ACD` state-machine. A probe is 26 | * created on an `NAcd` context by providing an address to probe for. The probe 27 | * will then raise notifications whether the address conflict detection found 28 | * something, or whether the address is ready to be used. Optionally, the probe 29 | * will then enter into passive mode and defend the address as long as it is 30 | * kept active. 31 | * 32 | * Note that the `n-acd` project only implements the networking protocol. It 33 | * never queries or modifies network interfaces. It completely relies on the 34 | * API user to react to notifications and update network interfaces 35 | * respectively. `n-acd` uses an event-mechanism on every context object. All 36 | * events raise by any probe or operation on a given context will queue all 37 | * events on that context object. The event-queue can then be drained by the 38 | * API user. All events are properly asynchronous and designed in a way that no 39 | * synchronous reaction to any event is required. That is, the events are 40 | * carefully designed to allow forwarding via IPC (or even networks) to a 41 | * controller that handles them and specifies how to react. Furthermore, none 42 | * of the function calls of `n-acd` require synchronous error handling. 43 | * Instead, functions only ever return values on fatal errors. Everything else 44 | * is queued as events, thus guaranteeing that synchronous handling of return 45 | * values is not required. Exceptions are functions that do not affect internal 46 | * state or do not have an associated context object. 47 | */ 48 | 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | #include "n-acd.h" 69 | #include "n-acd-private.h" 70 | 71 | enum { 72 | N_ACD_EPOLL_TIMER, 73 | N_ACD_EPOLL_SOCKET, 74 | }; 75 | 76 | static int n_acd_get_random(unsigned int *random) { 77 | uint8_t hash_seed[] = { 78 | 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a, 79 | 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1, 80 | }; 81 | CSipHash hash = C_SIPHASH_NULL; 82 | struct timespec ts; 83 | const uint8_t *p; 84 | int r; 85 | 86 | /* 87 | * We need random jitter for all timeouts when handling ARP probes. Use 88 | * AT_RANDOM to get a seed for rand_r(3p), if available (should always 89 | * be available on linux). See the time-out scheduler for details. 90 | * Additionally, we include the current time in the seed. This avoids 91 | * using the same jitter in case you run multiple ACD engines in the 92 | * same process. Lastly, the seed is hashed with SipHash24 to avoid 93 | * exposing the value of AT_RANDOM on the network. 94 | */ 95 | c_siphash_init(&hash, hash_seed); 96 | 97 | p = (const uint8_t *)getauxval(AT_RANDOM); 98 | if (p) 99 | c_siphash_append(&hash, p, 16); 100 | 101 | r = clock_gettime(CLOCK_MONOTONIC, &ts); 102 | if (r < 0) 103 | return -c_errno(); 104 | 105 | c_siphash_append(&hash, (const uint8_t *)&ts.tv_sec, sizeof(ts.tv_sec)); 106 | c_siphash_append(&hash, (const uint8_t *)&ts.tv_nsec, sizeof(ts.tv_nsec)); 107 | 108 | *random = c_siphash_finalize(&hash); 109 | return 0; 110 | } 111 | 112 | static int n_acd_socket_new(int *fdp, int fd_bpf_prog, NAcdConfig *config) { 113 | const struct sockaddr_ll address = { 114 | .sll_family = AF_PACKET, 115 | .sll_protocol = htobe16(ETH_P_ARP), 116 | .sll_ifindex = config->ifindex, 117 | .sll_halen = ETH_ALEN, 118 | .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 119 | }; 120 | int r, s = -1; 121 | 122 | s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); 123 | if (s < 0) { 124 | r = -c_errno(); 125 | goto error; 126 | } 127 | 128 | if (fd_bpf_prog >= 0) { 129 | r = setsockopt(s, SOL_SOCKET, SO_ATTACH_BPF, &fd_bpf_prog, sizeof(fd_bpf_prog)); 130 | if (r < 0) { 131 | r = -c_errno(); 132 | goto error; 133 | } 134 | } 135 | 136 | r = bind(s, (struct sockaddr *)&address, sizeof(address)); 137 | if (r < 0) { 138 | r = -c_errno(); 139 | goto error; 140 | } 141 | 142 | *fdp = s; 143 | s = -1; 144 | return 0; 145 | 146 | error: 147 | if (s >= 0) 148 | close(s); 149 | return r; 150 | } 151 | 152 | /** 153 | * n_acd_config_new() - create configuration object 154 | * @configp: output argument for new configuration 155 | * 156 | * This creates a new configuration object and provides it to the caller. The 157 | * object is fully owned by the caller upon function return. 158 | * 159 | * A configuration object is a passive structure that is used to collect 160 | * information that is then passed to a constructor or other function. A 161 | * configuration never validates the data, but it is up to the consumer of a 162 | * configuration to do that. 163 | * 164 | * Return: 0 on success, negative error code on failure. 165 | */ 166 | _c_public_ int n_acd_config_new(NAcdConfig **configp) { 167 | _c_cleanup_(n_acd_config_freep) NAcdConfig *config = NULL; 168 | 169 | config = malloc(sizeof(*config)); 170 | if (!config) 171 | return -ENOMEM; 172 | 173 | *config = (NAcdConfig)N_ACD_CONFIG_NULL(*config); 174 | 175 | *configp = config; 176 | config = NULL; 177 | return 0; 178 | } 179 | 180 | /** 181 | * n_acd_config_free() - destroy configuration object 182 | * @config: configuration to operate on, or NULL 183 | * 184 | * This destroys the configuration object @config. If @config is NULL, this is 185 | * a no-op. 186 | * 187 | * Return: NULL is returned. 188 | */ 189 | _c_public_ NAcdConfig *n_acd_config_free(NAcdConfig *config) { 190 | if (!config) 191 | return NULL; 192 | 193 | free(config); 194 | 195 | return NULL; 196 | } 197 | 198 | /** 199 | * n_acd_config_set_ifindex() - set ifindex property 200 | * @config: configuration to operate on 201 | * @ifindex: ifindex to set 202 | * 203 | * This sets the @ifindex property of the configuration object. Any previous 204 | * value is overwritten. 205 | * 206 | * A valid ifindex is a 32bit integer greater than 0. Any other value is 207 | * treated as unspecified. 208 | * 209 | * The ifindex corresponds to the interface index provided by the linux kernel. 210 | * It specifies the network device to be used. 211 | */ 212 | _c_public_ void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex) { 213 | config->ifindex = ifindex; 214 | } 215 | 216 | /** 217 | * n_acd_config_set_transport() - set transport property 218 | * @config: configuration to operate on 219 | * @transport: transport to set 220 | * 221 | * This specifies the transport to use. A transport must be one of the 222 | * `N_ACD_TRANSPORT_*` identifiers. It selects which transport protocol `n-acd` 223 | * will run on. 224 | */ 225 | _c_public_ void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport) { 226 | config->transport = transport; 227 | } 228 | 229 | /** 230 | * n_acd_config_set_mac() - set mac property 231 | * @config: configuration to operate on 232 | * @mac: mac to set 233 | * 234 | * This specifies the hardware address (also referred to as `MAC Address`) to 235 | * use. Any hardware address can be specified. It is the caller's 236 | * responsibility to make sure the address can actually be used. 237 | * 238 | * The address in @mac is copied into @config. It does not have to be retained 239 | * by the caller. 240 | */ 241 | _c_public_ void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac) { 242 | /* 243 | * We truncate the address at the maximum we support. We still remember 244 | * the original length, so any consumer of this configuration can then 245 | * complain about an unsupported address length. This allows us to 246 | * avoid a memory allocation here and having to return `int`. 247 | */ 248 | config->n_mac = n_mac; 249 | memcpy(config->mac, mac, n_mac > ETH_ALEN ? ETH_ALEN : n_mac); 250 | } 251 | 252 | int n_acd_event_node_new(NAcdEventNode **nodep) { 253 | NAcdEventNode *node; 254 | 255 | node = malloc(sizeof(*node)); 256 | if (!node) 257 | return -ENOMEM; 258 | 259 | *node = (NAcdEventNode)N_ACD_EVENT_NODE_NULL(*node); 260 | 261 | *nodep = node; 262 | return 0; 263 | } 264 | 265 | NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) { 266 | if (!node) 267 | return NULL; 268 | 269 | c_list_unlink(&node->probe_link); 270 | c_list_unlink(&node->acd_link); 271 | free(node); 272 | 273 | return NULL; 274 | } 275 | 276 | int n_acd_ensure_bpf_map_space(NAcd *acd) { 277 | NAcdProbe *probe; 278 | _c_cleanup_(c_closep) int fd_map = -1, fd_prog = -1; 279 | size_t max_map; 280 | int r; 281 | 282 | if (acd->n_bpf_map < acd->max_bpf_map) 283 | return 0; 284 | 285 | max_map = 2 * acd->max_bpf_map; 286 | 287 | r = n_acd_bpf_map_create(&fd_map, max_map); 288 | if (r) 289 | return r; 290 | 291 | c_rbtree_for_each_entry(probe, &acd->ip_tree, ip_node) { 292 | r = n_acd_bpf_map_add(fd_map, &probe->ip); 293 | if (r) 294 | return r; 295 | } 296 | 297 | r = n_acd_bpf_compile(&fd_prog, fd_map, (struct ether_addr*) acd->mac); 298 | if (r) 299 | return r; 300 | 301 | if (fd_prog >= 0) { 302 | r = setsockopt(acd->fd_socket, SOL_SOCKET, SO_ATTACH_BPF, &fd_prog, sizeof(fd_prog)); 303 | if (r) 304 | return -c_errno(); 305 | } 306 | 307 | if (acd->fd_bpf_map >= 0) 308 | close(acd->fd_bpf_map); 309 | acd->fd_bpf_map = fd_map; 310 | fd_map = -1; 311 | acd->max_bpf_map = max_map; 312 | return 0; 313 | } 314 | 315 | /** 316 | * n_acd_new() - create a new ACD context 317 | * @acdp: output argument for new context object 318 | * @config: configuration parameters 319 | * 320 | * Create a new ACD context and return it in @acdp. The configuration @config 321 | * must be initialized by the caller and must specify a valid network 322 | * interface, transport mechanism, as well as hardware address compatible with 323 | * the selected transport. The configuration is copied into the context. The 324 | * @config object thus does not have to be retained by the caller. 325 | * 326 | * Return: 0 on success, negative error code on failure. 327 | */ 328 | _c_public_ int n_acd_new(NAcd **acdp, NAcdConfig *config) { 329 | _c_cleanup_(n_acd_unrefp) NAcd *acd = NULL; 330 | _c_cleanup_(c_closep) int fd_bpf_prog = -1; 331 | struct epoll_event eevent; 332 | int r; 333 | 334 | if (config->ifindex <= 0 || 335 | config->transport != N_ACD_TRANSPORT_ETHERNET || 336 | config->n_mac != ETH_ALEN || 337 | !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN)) 338 | return N_ACD_E_INVALID_ARGUMENT; 339 | 340 | acd = malloc(sizeof(*acd)); 341 | if (!acd) 342 | return -ENOMEM; 343 | 344 | *acd = (NAcd)N_ACD_NULL(*acd); 345 | acd->ifindex = config->ifindex; 346 | memcpy(acd->mac, config->mac, ETH_ALEN); 347 | 348 | r = n_acd_get_random(&acd->seed); 349 | if (r) 350 | return r; 351 | 352 | acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC); 353 | if (acd->fd_epoll < 0) 354 | return -c_errno(); 355 | 356 | r = timer_init(&acd->timer); 357 | if (r < 0) 358 | return r; 359 | 360 | acd->max_bpf_map = 8; 361 | 362 | r = n_acd_bpf_map_create(&acd->fd_bpf_map, acd->max_bpf_map); 363 | if (r) 364 | return r; 365 | 366 | r = n_acd_bpf_compile(&fd_bpf_prog, acd->fd_bpf_map, (struct ether_addr*) acd->mac); 367 | if (r) 368 | return r; 369 | 370 | r = n_acd_socket_new(&acd->fd_socket, fd_bpf_prog, config); 371 | if (r) 372 | return r; 373 | 374 | eevent = (struct epoll_event){ 375 | .events = EPOLLIN, 376 | .data.u32 = N_ACD_EPOLL_TIMER, 377 | }; 378 | r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->timer.fd, &eevent); 379 | if (r < 0) 380 | return -c_errno(); 381 | 382 | eevent = (struct epoll_event){ 383 | .events = EPOLLIN, 384 | .data.u32 = N_ACD_EPOLL_SOCKET, 385 | }; 386 | r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_socket, &eevent); 387 | if (r < 0) 388 | return -c_errno(); 389 | 390 | *acdp = acd; 391 | acd = NULL; 392 | return 0; 393 | } 394 | 395 | static void n_acd_free_internal(NAcd *acd) { 396 | NAcdEventNode *node, *t_node; 397 | 398 | if (!acd) 399 | return; 400 | 401 | c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) 402 | n_acd_event_node_free(node); 403 | 404 | c_assert(c_rbtree_is_empty(&acd->ip_tree)); 405 | 406 | if (acd->fd_socket >= 0) { 407 | c_assert(acd->fd_epoll >= 0); 408 | epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_socket, NULL); 409 | close(acd->fd_socket); 410 | acd->fd_socket = -1; 411 | } 412 | 413 | if (acd->fd_bpf_map >= 0) { 414 | close(acd->fd_bpf_map); 415 | acd->fd_bpf_map = -1; 416 | } 417 | 418 | if (acd->timer.fd >= 0) { 419 | c_assert(acd->fd_epoll >= 0); 420 | epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->timer.fd, NULL); 421 | timer_deinit(&acd->timer); 422 | } 423 | 424 | if (acd->fd_epoll >= 0) { 425 | close(acd->fd_epoll); 426 | acd->fd_epoll = -1; 427 | } 428 | 429 | free(acd); 430 | } 431 | 432 | /** 433 | * n_acd_ref() - acquire reference 434 | * @acd: context to operate on, or NULL 435 | * 436 | * This acquires a single reference to the context specified as @acd. If @acd 437 | * is NULL, this is a no-op. 438 | * 439 | * Return: @acd is returned. 440 | */ 441 | _c_public_ NAcd *n_acd_ref(NAcd *acd) { 442 | if (acd) 443 | ++acd->n_refs; 444 | return acd; 445 | } 446 | 447 | /** 448 | * n_acd_unref() - release reference 449 | * @acd: context to operate on, or NULL 450 | * 451 | * This releases a single reference to the context @acd. If this is the last 452 | * reference, the context is torn down and deallocated. 453 | * 454 | * Return: NULL is returned. 455 | */ 456 | _c_public_ NAcd *n_acd_unref(NAcd *acd) { 457 | if (acd && !--acd->n_refs) 458 | n_acd_free_internal(acd); 459 | return NULL; 460 | } 461 | 462 | int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event) { 463 | NAcdEventNode *node; 464 | int r; 465 | 466 | r = n_acd_event_node_new(&node); 467 | if (r) 468 | return r; 469 | 470 | node->event.event = event; 471 | c_list_link_tail(&acd->event_list, &node->acd_link); 472 | 473 | if (nodep) 474 | *nodep = node; 475 | return 0; 476 | } 477 | 478 | int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa) { 479 | struct sockaddr_ll address = { 480 | .sll_family = AF_PACKET, 481 | .sll_protocol = htobe16(ETH_P_ARP), 482 | .sll_ifindex = acd->ifindex, 483 | .sll_halen = ETH_ALEN, 484 | .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 485 | }; 486 | struct ether_arp arp = { 487 | .ea_hdr = { 488 | .ar_hrd = htobe16(ARPHRD_ETHER), 489 | .ar_pro = htobe16(ETHERTYPE_IP), 490 | .ar_hln = sizeof(acd->mac), 491 | .ar_pln = sizeof(uint32_t), 492 | .ar_op = htobe16(ARPOP_REQUEST), 493 | }, 494 | }; 495 | ssize_t l; 496 | int r; 497 | 498 | memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac)); 499 | memcpy(arp.arp_tpa, &tpa->s_addr, sizeof(uint32_t)); 500 | 501 | if (spa) 502 | memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr)); 503 | 504 | l = sendto(acd->fd_socket, 505 | &arp, 506 | sizeof(arp), 507 | MSG_NOSIGNAL, 508 | (struct sockaddr *)&address, 509 | sizeof(address)); 510 | if (l < 0) { 511 | if (errno == EAGAIN || errno == ENOBUFS) { 512 | /* 513 | * We never maintain outgoing queues. We rely on the 514 | * network device to do that for us. In case the queues 515 | * are full, or the kernel refuses to queue the packet 516 | * for other reasons, we must tell our caller that the 517 | * packet was dropped. 518 | */ 519 | return N_ACD_E_DROPPED; 520 | } else if (errno == ENETDOWN || errno == ENXIO) { 521 | /* 522 | * These errors happen if the network device went down 523 | * or was actually removed. We always propagate this as 524 | * event, so the user can react accordingly (similarly 525 | * to the recvmmsg(2) handler). In case the user does 526 | * not immediately react, we also tell our caller that 527 | * the packet was dropped, so we don't erroneously 528 | * treat this as success. 529 | */ 530 | 531 | r = n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN); 532 | if (r) 533 | return r; 534 | 535 | return N_ACD_E_DROPPED; 536 | } 537 | 538 | /* 539 | * Random network error. We treat this as fatal and propagate 540 | * the error, so it is noticed and can be investigated. 541 | */ 542 | return -c_errno(); 543 | } else if (l != (ssize_t)sizeof(arp)) { 544 | /* 545 | * Ugh, the kernel modified the packet. This is unexpected. We 546 | * consider the packet lost. 547 | */ 548 | return N_ACD_E_DROPPED; 549 | } 550 | 551 | return 0; 552 | } 553 | 554 | /** 555 | * n_acd_get_fd() - get pollable file descriptor 556 | * @acd: context object to operate on 557 | * @fdp: output argument for file descriptor 558 | * 559 | * This returns the backing file-descriptor of the context object @acd. The 560 | * file-descriptor is owned by @acd and valid as long as @acd is. The 561 | * file-descriptor never changes, so it can be cached by the caller as long as 562 | * they hold a reference to @acd. 563 | * 564 | * The file-descriptor is internal to the @acd context and should not be 565 | * modified by the caller. It is only exposed to allow the caller to poll on 566 | * it. Whenever the file-descriptor polls readable, n_acd_dispatch() should be 567 | * called. 568 | * 569 | * Currently, the file-descriptor is an epoll-fd. 570 | */ 571 | _c_public_ void n_acd_get_fd(NAcd *acd, int *fdp) { 572 | *fdp = acd->fd_epoll; 573 | } 574 | 575 | static int n_acd_handle_timeout(NAcd *acd) { 576 | NAcdProbe *probe; 577 | uint64_t now; 578 | int r; 579 | 580 | /* 581 | * Read the current time once, and handle all timeouts that triggered 582 | * before the current time. Rereading the current time in each loop 583 | * might risk creating a live-lock, and the fact that we read the 584 | * time after reading the timer guarantees that the timeout which 585 | * woke us up is handled. 586 | * 587 | * When there are no more timeouts to handle at the given time, we 588 | * rearm the timer to potentially wake us up again in the future. 589 | */ 590 | timer_now(&acd->timer, &now); 591 | 592 | for (;;) { 593 | Timeout *timeout; 594 | 595 | r = timer_pop_timeout(&acd->timer, now, &timeout); 596 | if (r < 0) { 597 | return r; 598 | } else if (!timeout) { 599 | /* 600 | * There are no more timeouts pending before @now. Rearm 601 | * the timer to fire again at the next timeout. 602 | */ 603 | timer_rearm(&acd->timer); 604 | break; 605 | } 606 | 607 | probe = (void *)timeout - offsetof(NAcdProbe, timeout); 608 | r = n_acd_probe_handle_timeout(probe); 609 | if (r) 610 | return r; 611 | } 612 | 613 | return 0; 614 | } 615 | 616 | static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) { 617 | bool hard_conflict; 618 | NAcdProbe *probe; 619 | uint32_t addr; 620 | CRBNode *node; 621 | int r; 622 | 623 | /* 624 | * We are interested in 2 kinds of ARP messages: 625 | * 626 | * 1) Someone who is *NOT* us sends *ANY* ARP message with our IP 627 | * address as sender. This is never good, because it implies an 628 | * address conflict. 629 | * We call this a hard-conflict. 630 | * 631 | * 2) Someone who is *NOT* us sends an ARP REQUEST without any sender 632 | * IP, but our IP as target. This implies someone else performs an 633 | * ARP Probe with our address. This also implies a conflict, but 634 | * one that can be resolved by responding to the probe. 635 | * We call this a soft-conflict. 636 | * 637 | * We are never interested in any other ARP message. The kernel already 638 | * deals with everything else, hence, we can silently ignore those. 639 | * 640 | * Now, we simply check whether a sender-address is set. This allows us 641 | * to distinguish both cases. We then check further conditions, so we 642 | * can bail out early if neither is the case. 643 | * 644 | * Lastly, we perform a lookup in our probe-set to check whether the 645 | * address actually matches, so we can let these probes dispatch the 646 | * message. Note that we allow duplicate probes, so we need to dispatch 647 | * each matching probe, not just one. 648 | */ 649 | 650 | if (memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa))) { 651 | memcpy(&addr, packet->arp_spa, sizeof(addr)); 652 | hard_conflict = true; 653 | } else if (packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) { 654 | memcpy(&addr, packet->arp_tpa, sizeof(addr)); 655 | hard_conflict = false; 656 | } else { 657 | /* 658 | * The BPF filter will not let through any other packet. 659 | */ 660 | return -EIO; 661 | } 662 | 663 | /* Find top-most node that matches @addr. */ 664 | node = acd->ip_tree.root; 665 | while (node) { 666 | probe = c_rbnode_entry(node, NAcdProbe, ip_node); 667 | if (addr < probe->ip.s_addr) 668 | node = node->left; 669 | else if (addr > probe->ip.s_addr) 670 | node = node->right; 671 | else 672 | break; 673 | } 674 | 675 | /* 676 | * If the address is unknown, we drop the package. This might happen if 677 | * the kernel queued the packet and passed the BPF filter, but we 678 | * modified the set before dequeuing the message. 679 | */ 680 | if (!node) 681 | return 0; 682 | 683 | /* Forward to left-most child that still matches @addr. */ 684 | while (node->left && addr == c_rbnode_entry(node->left, 685 | NAcdProbe, 686 | ip_node)->ip.s_addr) 687 | node = node->left; 688 | 689 | /* Iterate all matching entries in-order. */ 690 | do { 691 | probe = c_rbnode_entry(node, NAcdProbe, ip_node); 692 | 693 | r = n_acd_probe_handle_packet(probe, packet, hard_conflict); 694 | if (r) 695 | return r; 696 | 697 | node = c_rbnode_next(node); 698 | } while (node && addr == c_rbnode_entry(node, 699 | NAcdProbe, 700 | ip_node)->ip.s_addr); 701 | 702 | return 0; 703 | } 704 | 705 | static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) { 706 | int r; 707 | 708 | if (event->events & (EPOLLHUP | EPOLLERR)) { 709 | /* 710 | * There is no way to handle either gracefully. If we ignored 711 | * them, we would busy-loop, so lets rather forward the error 712 | * to the caller. 713 | */ 714 | return -EIO; 715 | } 716 | 717 | if (event->events & EPOLLIN) { 718 | r = timer_read(&acd->timer); 719 | if (r <= 0) 720 | return r; 721 | 722 | c_assert(r == TIMER_E_TRIGGERED); 723 | 724 | /* 725 | * A timer triggered, handle all pending timeouts at a given 726 | * point in time. There can only be a finite number of pending 727 | * timeouts, any new ones will be in the future, so not handled 728 | * now, but guaranteed to wake us up again when they do trigger. 729 | */ 730 | r = n_acd_handle_timeout(acd); 731 | if (r) 732 | return r; 733 | } 734 | 735 | return 0; 736 | } 737 | 738 | static bool n_acd_packet_is_valid(NAcd *acd, void *packet, size_t n_packet) { 739 | struct ether_arp *arp; 740 | 741 | /* 742 | * The eBPF filter will ensure that this function always returns true, however, 743 | * this allows the eBPF filter to be an optional optimization which is necessary 744 | * on older kernels. 745 | * 746 | * See comments in n-acd-bpf.c for details. 747 | */ 748 | 749 | if (n_packet != sizeof(*arp)) 750 | return false; 751 | 752 | arp = packet; 753 | 754 | if (arp->arp_hrd != htobe16(ARPHRD_ETHER)) 755 | return false; 756 | 757 | if (arp->arp_pro != htobe16(ETHERTYPE_IP)) 758 | return false; 759 | 760 | if (arp->arp_hln != sizeof(struct ether_addr)) 761 | return false; 762 | 763 | if (arp->arp_pln != sizeof(struct in_addr)) 764 | return false; 765 | 766 | if (!memcmp(arp->arp_sha, acd->mac, sizeof(struct ether_addr))) 767 | return false; 768 | 769 | if (memcmp(arp->arp_spa, &((struct in_addr) { INADDR_ANY }), sizeof(struct in_addr))) { 770 | if (arp->arp_op != htobe16(ARPOP_REQUEST) && arp->arp_op != htobe16(ARPOP_REPLY)) 771 | return false; 772 | } else if (arp->arp_op != htobe16(ARPOP_REQUEST)) { 773 | return false; 774 | } 775 | 776 | return true; 777 | } 778 | 779 | static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) { 780 | const size_t n_batch = 8; 781 | struct mmsghdr msgs[n_batch]; 782 | struct iovec iovecs[n_batch]; 783 | struct ether_arp data[n_batch]; 784 | size_t i; 785 | int r, n; 786 | 787 | for (i = 0; i < n_batch; ++i) { 788 | iovecs[i].iov_base = data + i; 789 | iovecs[i].iov_len = sizeof(data[i]); 790 | msgs[i].msg_hdr = (struct msghdr){ 791 | .msg_iov = iovecs + i, 792 | .msg_iovlen = 1, 793 | }; 794 | } 795 | 796 | /* 797 | * We always directly call into recvmmsg(2), regardless which EPOLL* 798 | * event is signalled. On sockets, the recv(2)-family of syscalls does 799 | * a suitable job of handling all possible scenarios and telling us 800 | * about it. Hence, lets take the easy route and always ask the kernel 801 | * about the current state. 802 | */ 803 | n = recvmmsg(acd->fd_socket, msgs, n_batch, 0, NULL); 804 | if (n < 0) { 805 | if (errno == ENETDOWN) { 806 | /* 807 | * We get ENETDOWN if the network-device goes down or 808 | * is removed. This error is temporary and only queued 809 | * once. Subsequent reads will simply return EAGAIN 810 | * until the device is up again and has data queued. 811 | * Usually, the caller should tear down all probes when 812 | * an interface goes down, but we leave it up to the 813 | * caller to decide what to do. We propagate the code 814 | * and continue. 815 | */ 816 | return n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN); 817 | } else if (errno == EAGAIN) { 818 | /* 819 | * There is no more data queued and we did not get 820 | * preempted. Everything is good to go. 821 | * As a safety-net against busy-looping, we do check 822 | * for HUP/ERR. Neither should be set, since they imply 823 | * error-dequeue behavior on all socket calls. Lets 824 | * fail hard if we trigger it, so we can investigate. 825 | */ 826 | if (event->events & (EPOLLHUP | EPOLLERR)) 827 | return -EIO; 828 | 829 | return 0; 830 | } else { 831 | /* 832 | * Something went wrong. Propagate the error-code, so 833 | * this can be investigated. 834 | */ 835 | return -c_errno(); 836 | } 837 | } else if (n >= (ssize_t)n_batch) { 838 | /* 839 | * If all buffers were filled with data, we cannot be sure that 840 | * there is nothing left to read. But to avoid starvation, we 841 | * cannot loop on this condition. Instead, we mark the context 842 | * as preempted so the caller can call us again. 843 | * Note that in level-triggered event-loops this condition can 844 | * be neglected, but in edge-triggered event-loops it is 845 | * crucial to forward this information. 846 | * 847 | * On the other hand, there are several conditions where the 848 | * kernel might return less batches than requested, but was 849 | * still preempted. However, all of those cases require the 850 | * preemption to have triggered a wakeup *after* we entered 851 | * recvmmsg(). Hence, even if we did not recognize the 852 | * preemption, an edge must have triggered and as such we will 853 | * handle the event on the next turn. 854 | */ 855 | acd->preempted = true; 856 | } 857 | 858 | for (i = 0; (ssize_t)i < n; ++i) { 859 | if (!n_acd_packet_is_valid(acd, data + i, msgs[i].msg_len)) 860 | continue; 861 | /* 862 | * Handle the packet. Bail out if something went wrong. Note 863 | * that this must be fatal errors, since we discard all other 864 | * packets that follow. 865 | */ 866 | r = n_acd_handle_packet(acd, data + i); 867 | if (r) 868 | return r; 869 | } 870 | 871 | return 0; 872 | } 873 | 874 | /** 875 | * n_acd_dispatch() - dispatch context 876 | * @acd: context object to operate on 877 | * 878 | * This dispatches the internal state-machine of all probes and operations 879 | * running on the context @acd. 880 | * 881 | * Any outside effect or event triggered by this dispatcher will be queued on 882 | * the event-queue of @acd. Whenever the dispatcher returns, the caller is 883 | * required to drain the event-queue via n_acd_pop_event() until it is empty. 884 | * 885 | * This function dispatches as many events as possible up to a static limit to 886 | * prevent stalling execution. If the static limit is reached, this function 887 | * will return with N_ACD_E_PREEMPTED, otherwise 0 is returned. In most cases 888 | * preemption can be ignored, because level-triggered event notification 889 | * handles it automatically. However, in case of edge-triggered event 890 | * mechanisms, the caller must make sure to call the dispatcher again. 891 | * 892 | * Return: 0 on success, N_ACD_E_PREEMPTED on preemption, negative error code 893 | * on failure. 894 | */ 895 | _c_public_ int n_acd_dispatch(NAcd *acd) { 896 | struct epoll_event events[2]; 897 | int n, i, r = 0; 898 | 899 | n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0); 900 | if (n < 0) { 901 | /* Linux never returns EINTR if `timeout == 0'. */ 902 | return -c_errno(); 903 | } 904 | 905 | acd->preempted = false; 906 | 907 | for (i = 0; i < n; ++i) { 908 | switch (events[i].data.u32) { 909 | case N_ACD_EPOLL_TIMER: 910 | r = n_acd_dispatch_timer(acd, events + i); 911 | break; 912 | case N_ACD_EPOLL_SOCKET: 913 | r = n_acd_dispatch_socket(acd, events + i); 914 | break; 915 | default: 916 | c_assert(0); 917 | r = 0; 918 | break; 919 | } 920 | 921 | if (r) 922 | return r; 923 | } 924 | 925 | return acd->preempted ? N_ACD_E_PREEMPTED : 0; 926 | } 927 | 928 | /** 929 | * n_acd_pop_event() - get the next pending event 930 | * @acd: context object to operate on 931 | * @eventp: output argument for the event 932 | * 933 | * Returns a pointer to the next pending event. The event is still owend by 934 | * the context, and is only valid until the next call to n_acd_pop_event() 935 | * or until the owning object is freed (either the ACD context or the indicated 936 | * probe object). 937 | * 938 | * An event either originates on the ACD context, or one of the configured 939 | * probes. If the event-type has a 'probe' pointer, it originated on the 940 | * indicated probe (which is *never* NULL), otherwise it originated on the 941 | * context. 942 | * 943 | * Users must call this function repeatedly until either an error is returned, 944 | * or the event-pointer is NULL. Wakeups on the epoll-fd are only guaranteed 945 | * for each batch of events. Hence, it is the callers responsibility to drain 946 | * the event-queue somehow after each call to n_acd_dispatch(). Note that 947 | * events can only be added by n_acd_dispatch(), hence, you cannot live-lock 948 | * when draining the event queue. 949 | * 950 | * The possible events are: 951 | * * N_ACD_EVENT_READY: A configured IP address was probed successfully 952 | * and is ready to be used. Once configured on the 953 | * interface, the caller must call n_acd_announce() 954 | * to announce and start defending the address. 955 | * * N_ACD_EVENT_USED: Someone is already using the IP address being 956 | * probed. The probe is put into stopped state and 957 | * should be freed by the caller. 958 | * * N_ACD_EVENT_DEFENDED: A conflict was detected for an announced IP 959 | * address, and the engine attempted to defend it. 960 | * This is purely informational, and no action is 961 | * required by the caller. 962 | * * N_ACD_EVENT_CONFLICT: A conflict was detected for an announced IP 963 | * address, and the probe was not able to defend 964 | * it (according to the configured policy). The 965 | * probe halted, the caller must stop using 966 | * the address immediately, and should free the probe. 967 | * * N_ACD_EVENT_DOWN: The specified network interface was put down. The 968 | * user is recommended to free *ALL* probes and 969 | * recreate them as soon as the interface is up again. 970 | * Note that this event is purely informational. The 971 | * probes will continue running, but all packets will 972 | * be blackholed, and no network packets are received, 973 | * until the network is back up again. Hence, from an 974 | * operational perspective, the legitimacy of the ACD 975 | * probes is lost and the user better re-probes all 976 | * addresses. 977 | * 978 | * Returns: 0 on success, negative error code on failure. The popped event is 979 | * returned in @eventp. If no event is pending, NULL is placed in 980 | * @eventp and 0 is returned. If an error is returned, @eventp is left 981 | * untouched. 982 | */ 983 | _c_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) { 984 | NAcdEventNode *node, *t_node; 985 | 986 | c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) { 987 | if (node->is_public) { 988 | n_acd_event_node_free(node); 989 | continue; 990 | } 991 | 992 | node->is_public = true; 993 | *eventp = &node->event; 994 | return 0; 995 | } 996 | 997 | *eventp = NULL; 998 | return 0; 999 | } 1000 | 1001 | /** 1002 | * n_acd_probe() - start new probe 1003 | * @acd: context object to operate on 1004 | * @probep: output argument for new probe 1005 | * @config: probe configuration 1006 | * 1007 | * This creates a new probe on the context @acd and returns the probe in 1008 | * @probep. The configuration @config must provide valid probe parameters. At 1009 | * least a valid IP address must be provided through the configuration. 1010 | * 1011 | * This function does not reject duplicate probes for the same address. It is 1012 | * the caller's decision whether duplicates are allowed or not. But note that 1013 | * duplicate probes on the same context will not conflict each other. That is, 1014 | * running a probe for the same address twice on the same context will not 1015 | * cause them to consider each other a duplicate. 1016 | * 1017 | * Probes are rather lightweight objects. They do not create any 1018 | * file-descriptors or other kernel objects. Probes always re-use the 1019 | * infrastructure provided by the context object @acd. This allows running many 1020 | * probes simultaneously without exhausting resources. 1021 | * 1022 | * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT on invalid configuration 1023 | * parameters, negative error code on failure. 1024 | */ 1025 | _c_public_ int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config) { 1026 | return n_acd_probe_new(probep, acd, config); 1027 | } 1028 | -------------------------------------------------------------------------------- /src/n-acd.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * IPv4 Address Conflict Detection 5 | * 6 | * This is the public header of the n-acd library, implementing IPv4 Address 7 | * Conflict Detection as described in RFC-5227. This header defines the public 8 | * API and all entry points of n-acd. 9 | */ 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | typedef struct NAcd NAcd; 21 | typedef struct NAcdConfig NAcdConfig; 22 | typedef struct NAcdEvent NAcdEvent; 23 | typedef struct NAcdProbe NAcdProbe; 24 | typedef struct NAcdProbeConfig NAcdProbeConfig; 25 | 26 | #define N_ACD_TIMEOUT_RFC5227 (UINT64_C(9000)) 27 | 28 | enum { 29 | _N_ACD_E_SUCCESS, 30 | 31 | N_ACD_E_PREEMPTED, 32 | N_ACD_E_INVALID_ARGUMENT, 33 | 34 | _N_ACD_E_N, 35 | }; 36 | 37 | enum { 38 | N_ACD_TRANSPORT_ETHERNET, 39 | _N_ACD_TRANSPORT_N, 40 | }; 41 | 42 | enum { 43 | N_ACD_EVENT_READY, 44 | N_ACD_EVENT_USED, 45 | N_ACD_EVENT_DEFENDED, 46 | N_ACD_EVENT_CONFLICT, 47 | N_ACD_EVENT_DOWN, 48 | _N_ACD_EVENT_N, 49 | }; 50 | 51 | enum { 52 | N_ACD_DEFEND_NEVER, 53 | N_ACD_DEFEND_ONCE, 54 | N_ACD_DEFEND_ALWAYS, 55 | _N_ACD_DEFEND_N, 56 | }; 57 | 58 | struct NAcdEvent { 59 | unsigned int event; 60 | union { 61 | struct { 62 | NAcdProbe *probe; 63 | } ready; 64 | struct { 65 | } down; 66 | struct { 67 | NAcdProbe *probe; 68 | uint8_t *sender; 69 | size_t n_sender; 70 | } used, defended, conflict; 71 | }; 72 | }; 73 | 74 | /* configs */ 75 | 76 | int n_acd_config_new(NAcdConfig **configp); 77 | NAcdConfig *n_acd_config_free(NAcdConfig *config); 78 | 79 | void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex); 80 | void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport); 81 | void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac); 82 | 83 | int n_acd_probe_config_new(NAcdProbeConfig **configp); 84 | NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config); 85 | 86 | void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip); 87 | void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs); 88 | 89 | /* contexts */ 90 | 91 | int n_acd_new(NAcd **acdp, NAcdConfig *config); 92 | NAcd *n_acd_ref(NAcd *acd); 93 | NAcd *n_acd_unref(NAcd *acd); 94 | 95 | void n_acd_get_fd(NAcd *acd, int *fdp); 96 | int n_acd_dispatch(NAcd *acd); 97 | int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp); 98 | 99 | int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config); 100 | 101 | /* probes */ 102 | 103 | NAcdProbe *n_acd_probe_free(NAcdProbe *probe); 104 | 105 | void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata); 106 | void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap); 107 | 108 | int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend); 109 | 110 | /* inline helpers */ 111 | 112 | static inline void n_acd_config_freep(NAcdConfig **config) { 113 | if (*config) 114 | n_acd_config_free(*config); 115 | } 116 | 117 | static inline void n_acd_config_freev(NAcdConfig *config) { 118 | n_acd_config_free(config); 119 | } 120 | 121 | static inline void n_acd_probe_config_freep(NAcdProbeConfig **config) { 122 | if (*config) 123 | n_acd_probe_config_free(*config); 124 | } 125 | 126 | static inline void n_acd_probe_config_freev(NAcdProbeConfig *config) { 127 | n_acd_probe_config_free(config); 128 | } 129 | 130 | static inline void n_acd_unrefp(NAcd **acd) { 131 | if (*acd) 132 | n_acd_unref(*acd); 133 | } 134 | 135 | static inline void n_acd_unrefv(NAcd *acd) { 136 | n_acd_unref(acd); 137 | } 138 | 139 | static inline void n_acd_probe_freep(NAcdProbe **probe) { 140 | if (*probe) 141 | n_acd_probe_free(*probe); 142 | } 143 | 144 | static inline void n_acd_probe_freev(NAcdProbe *probe) { 145 | n_acd_probe_free(probe); 146 | } 147 | 148 | #ifdef __cplusplus 149 | } 150 | #endif 151 | -------------------------------------------------------------------------------- /src/test-api.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tests for n-acd API 3 | * This verifies the visibility and availability of the public API. 4 | */ 5 | 6 | #undef NDEBUG 7 | #include 8 | #include 9 | #include "n-acd.h" 10 | 11 | static void test_api_constants(void) { 12 | assert(1 + N_ACD_TIMEOUT_RFC5227); 13 | 14 | assert(1 + _N_ACD_E_SUCCESS); 15 | assert(1 + N_ACD_E_PREEMPTED); 16 | assert(1 + N_ACD_E_INVALID_ARGUMENT); 17 | assert(1 + _N_ACD_E_N); 18 | 19 | assert(1 + N_ACD_TRANSPORT_ETHERNET); 20 | assert(1 + _N_ACD_TRANSPORT_N); 21 | 22 | assert(1 + N_ACD_EVENT_READY); 23 | assert(1 + N_ACD_EVENT_USED); 24 | assert(1 + N_ACD_EVENT_DEFENDED); 25 | assert(1 + N_ACD_EVENT_CONFLICT); 26 | assert(1 + N_ACD_EVENT_DOWN); 27 | assert(1 + _N_ACD_EVENT_N); 28 | 29 | assert(1 + N_ACD_DEFEND_NEVER); 30 | assert(1 + N_ACD_DEFEND_ONCE); 31 | assert(1 + N_ACD_DEFEND_ALWAYS); 32 | assert(1 + _N_ACD_DEFEND_N); 33 | } 34 | 35 | static void test_api_types(void) { 36 | assert(sizeof(NAcdEvent*)); 37 | assert(sizeof(NAcdConfig*)); 38 | assert(sizeof(NAcdProbeConfig*)); 39 | assert(sizeof(NAcd*)); 40 | assert(sizeof(NAcdProbe*)); 41 | } 42 | 43 | static void test_api_functions(void) { 44 | void *fns[] = { 45 | (void *)n_acd_config_new, 46 | (void *)n_acd_config_free, 47 | (void *)n_acd_config_set_ifindex, 48 | (void *)n_acd_config_set_transport, 49 | (void *)n_acd_config_set_mac, 50 | (void *)n_acd_probe_config_new, 51 | (void *)n_acd_probe_config_free, 52 | (void *)n_acd_probe_config_set_ip, 53 | (void *)n_acd_probe_config_set_timeout, 54 | 55 | (void *)n_acd_new, 56 | (void *)n_acd_ref, 57 | (void *)n_acd_unref, 58 | (void *)n_acd_get_fd, 59 | (void *)n_acd_dispatch, 60 | (void *)n_acd_pop_event, 61 | (void *)n_acd_probe, 62 | 63 | (void *)n_acd_probe_free, 64 | (void *)n_acd_probe_set_userdata, 65 | (void *)n_acd_probe_get_userdata, 66 | (void *)n_acd_probe_announce, 67 | 68 | (void *)n_acd_config_freep, 69 | (void *)n_acd_config_freev, 70 | (void *)n_acd_probe_config_freep, 71 | (void *)n_acd_probe_config_freev, 72 | (void *)n_acd_unrefp, 73 | (void *)n_acd_unrefv, 74 | (void *)n_acd_probe_freep, 75 | (void *)n_acd_probe_freev, 76 | }; 77 | size_t i; 78 | 79 | for (i = 0; i < sizeof(fns) / sizeof(*fns); ++i) 80 | assert(!!fns[i]); 81 | } 82 | 83 | int main(int argc, char **argv) { 84 | test_api_constants(); 85 | test_api_types(); 86 | test_api_functions(); 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /src/test-bpf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * eBPF socket filter tests 3 | */ 4 | 5 | #undef NDEBUG 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "n-acd.h" 19 | #include "n-acd-private.h" 20 | #include "test.h" 21 | 22 | #define ETHER_ARP_PACKET_INIT(_op, _mac, _sip, _tip) { \ 23 | .ea_hdr = { \ 24 | .ar_hrd = htobe16(ARPHRD_ETHER), \ 25 | .ar_pro = htobe16(ETHERTYPE_IP), \ 26 | .ar_hln = 6, \ 27 | .ar_pln = 4, \ 28 | .ar_op = htobe16(_op), \ 29 | }, \ 30 | .arp_sha[0] = (_mac)->ether_addr_octet[0], \ 31 | .arp_sha[1] = (_mac)->ether_addr_octet[1], \ 32 | .arp_sha[2] = (_mac)->ether_addr_octet[2], \ 33 | .arp_sha[3] = (_mac)->ether_addr_octet[3], \ 34 | .arp_sha[4] = (_mac)->ether_addr_octet[4], \ 35 | .arp_sha[5] = (_mac)->ether_addr_octet[5], \ 36 | .arp_spa[0] = (be32toh((_sip)->s_addr) >> 24) & 0xff, \ 37 | .arp_spa[1] = (be32toh((_sip)->s_addr) >> 16) & 0xff, \ 38 | .arp_spa[2] = (be32toh((_sip)->s_addr) >> 8) & 0xff, \ 39 | .arp_spa[3] = be32toh((_sip)->s_addr) & 0xff, \ 40 | .arp_tpa[0] = (be32toh((_tip)->s_addr) >> 24) & 0xff, \ 41 | .arp_tpa[1] = (be32toh((_tip)->s_addr) >> 16) & 0xff, \ 42 | .arp_tpa[2] = (be32toh((_tip)->s_addr) >> 8) & 0xff, \ 43 | .arp_tpa[3] = be32toh((_tip)->s_addr) & 0xff, \ 44 | } 45 | 46 | static void test_map(void) { 47 | int r, mapfd = -1; 48 | struct in_addr addr = { 1 }; 49 | 50 | r = n_acd_bpf_map_create(&mapfd, 8); 51 | c_assert(r >= 0); 52 | c_assert(mapfd >= 0); 53 | 54 | r = n_acd_bpf_map_remove(mapfd, &addr); 55 | c_assert(r == -ENOENT); 56 | 57 | r = n_acd_bpf_map_add(mapfd, &addr); 58 | c_assert(r >= 0); 59 | 60 | r = n_acd_bpf_map_add(mapfd, &addr); 61 | c_assert(r == -EEXIST); 62 | 63 | r = n_acd_bpf_map_remove(mapfd, &addr); 64 | c_assert(r >= 0); 65 | 66 | r = n_acd_bpf_map_remove(mapfd, &addr); 67 | c_assert(r == -ENOENT); 68 | 69 | close(mapfd); 70 | } 71 | 72 | static void verify_success(struct ether_arp *packet, int out_fd, int in_fd) { 73 | uint8_t buf[sizeof(struct ether_arp)]; 74 | int r; 75 | 76 | r = send(out_fd, packet, sizeof(struct ether_arp), 0); 77 | c_assert(r == sizeof(struct ether_arp)); 78 | 79 | r = recv(in_fd, buf, sizeof(buf), 0); 80 | c_assert(r == sizeof(struct ether_arp)); 81 | } 82 | 83 | static void verify_failure(struct ether_arp *packet, int out_fd, int in_fd) { 84 | uint8_t buf[sizeof(struct ether_arp)]; 85 | int r; 86 | 87 | r = send(out_fd, packet, sizeof(struct ether_arp), 0); 88 | c_assert(r == sizeof(struct ether_arp)); 89 | 90 | r = recv(in_fd, buf, sizeof(buf), 0); 91 | c_assert(r < 0); 92 | c_assert(errno == EAGAIN); 93 | } 94 | 95 | static void test_filter(void) { 96 | uint8_t buf[sizeof(struct ether_arp) + 1] = {}; 97 | struct ether_addr mac1 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 } }; 98 | struct ether_addr mac2 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x07 } }; 99 | struct in_addr ip0 = { 0 }; 100 | struct in_addr ip1 = { 1 }; 101 | struct in_addr ip2 = { 2 }; 102 | struct ether_arp *packet = (struct ether_arp *)buf; 103 | int r, mapfd = -1, progfd = -1, pair[2]; 104 | 105 | r = n_acd_bpf_map_create(&mapfd, 1); 106 | c_assert(r >= 0); 107 | 108 | r = n_acd_bpf_compile(&progfd, mapfd, &mac1); 109 | c_assert(r >= 0); 110 | c_assert(progfd >= 0); 111 | 112 | r = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, pair); 113 | c_assert(r >= 0); 114 | 115 | r = setsockopt(pair[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd, 116 | sizeof(progfd)); 117 | c_assert(r >= 0); 118 | 119 | r = n_acd_bpf_map_add(mapfd, &ip1); 120 | c_assert(r >= 0); 121 | 122 | /* valid */ 123 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 124 | verify_success(packet, pair[0], pair[1]); 125 | 126 | /* valid: reply instead of request */ 127 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip1, &ip2); 128 | verify_success(packet, pair[0], pair[1]); 129 | 130 | /* valid: to us instead of from us */ 131 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip1); 132 | verify_success(packet, pair[0], pair[1]); 133 | 134 | /* invalid header type */ 135 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 136 | packet->arp_hrd += 1; 137 | verify_failure(packet, pair[0], pair[1]); 138 | 139 | /* invalid protocol */ 140 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 141 | packet->arp_pro += 1; 142 | verify_failure(packet, pair[0], pair[1]); 143 | 144 | /* invalid hw addr length */ 145 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 146 | packet->arp_hln += 1; 147 | verify_failure(packet, pair[0], pair[1]); 148 | 149 | /* invalid protocol addr length */ 150 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 151 | packet->arp_pln += 1; 152 | verify_failure(packet, pair[0], pair[1]); 153 | 154 | /* invalid operation */ 155 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_NAK, &mac2, &ip1, &ip2); 156 | packet->arp_hln += 1; 157 | verify_failure(packet, pair[0], pair[1]); 158 | 159 | /* own mac */ 160 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac1, &ip1, &ip2); 161 | verify_failure(packet, pair[0], pair[1]); 162 | 163 | /* not to, nor from us, with source */ 164 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip2, &ip2); 165 | verify_failure(packet, pair[0], pair[1]); 166 | 167 | /* not to, nor from us, without source */ 168 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip2); 169 | verify_failure(packet, pair[0], pair[1]); 170 | 171 | /* to us instead of from us, but reply */ 172 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip0, &ip1); 173 | verify_failure(packet, pair[0], pair[1]); 174 | 175 | /* long */ 176 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 177 | r = send(pair[0], buf, sizeof(struct ether_arp) + 1, 0); 178 | c_assert(r == sizeof(struct ether_arp) + 1); 179 | 180 | r = recv(pair[1], buf, sizeof(buf), 0); 181 | c_assert(r == sizeof(struct ether_arp)); 182 | 183 | /* short */ 184 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 185 | r = send(pair[0], buf, sizeof(struct ether_arp) - 1, 0); 186 | c_assert(r == sizeof(struct ether_arp) - 1); 187 | 188 | r = recv(pair[1], buf, sizeof(buf), 0); 189 | c_assert(r < 0); 190 | c_assert(errno == EAGAIN); 191 | 192 | /* 193 | * Send one packet before and one packet after modifying the map, 194 | * verify that the modification applies at the time of send(), not recv(). 195 | */ 196 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2); 197 | r = send(pair[0], buf, sizeof(struct ether_arp), 0); 198 | c_assert(r == sizeof(struct ether_arp)); 199 | 200 | r = n_acd_bpf_map_remove(mapfd, &ip1); 201 | c_assert(r >= 0); 202 | 203 | r = send(pair[0], buf, sizeof(struct ether_arp), 0); 204 | c_assert(r == sizeof(struct ether_arp)); 205 | 206 | r = recv(pair[1], buf, sizeof(buf), 0); 207 | c_assert(r == sizeof(struct ether_arp)); 208 | 209 | r = recv(pair[1], buf, sizeof(buf), 0); 210 | c_assert(r < 0); 211 | c_assert(errno == EAGAIN); 212 | 213 | close(pair[0]); 214 | close(pair[1]); 215 | close(progfd); 216 | close(mapfd); 217 | } 218 | 219 | int main(int argc, char **argv) { 220 | test_setup(); 221 | 222 | test_map(); 223 | test_filter(); 224 | 225 | return 0; 226 | } 227 | -------------------------------------------------------------------------------- /src/test-loopback.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Test on loopback device 3 | * This runs the ACD engine on the loopback device, effectively testing the BPF 4 | * filter of ACD to discard its own packets. This might happen on 5 | * non-spanning-tree networks, or on networks that echo packets. 6 | */ 7 | 8 | #undef NDEBUG 9 | #include 10 | #include 11 | #include "test.h" 12 | 13 | static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) { 14 | NAcdConfig *config; 15 | NAcd *acd; 16 | struct pollfd pfds; 17 | int r, fd; 18 | 19 | r = n_acd_config_new(&config); 20 | c_assert(!r); 21 | 22 | n_acd_config_set_ifindex(config, ifindex); 23 | n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET); 24 | n_acd_config_set_mac(config, mac, n_mac); 25 | 26 | r = n_acd_new(&acd, config); 27 | c_assert(!r); 28 | 29 | n_acd_config_free(config); 30 | 31 | { 32 | NAcdProbeConfig *probe_config; 33 | NAcdProbe *probe; 34 | struct in_addr ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }; 35 | 36 | r = n_acd_probe_config_new(&probe_config); 37 | c_assert(!r); 38 | 39 | n_acd_probe_config_set_ip(probe_config, ip); 40 | n_acd_probe_config_set_timeout(probe_config, 100); 41 | 42 | r = n_acd_probe(acd, &probe, probe_config); 43 | c_assert(!r); 44 | 45 | n_acd_probe_config_free(probe_config); 46 | 47 | n_acd_get_fd(acd, &fd); 48 | 49 | for (;;) { 50 | NAcdEvent *event; 51 | pfds = (struct pollfd){ .fd = fd, .events = POLLIN }; 52 | r = poll(&pfds, 1, -1); 53 | c_assert(r >= 0); 54 | 55 | r = n_acd_dispatch(acd); 56 | c_assert(!r); 57 | 58 | r = n_acd_pop_event(acd, &event); 59 | c_assert(!r); 60 | if (event) { 61 | c_assert(event->event == N_ACD_EVENT_READY); 62 | break; 63 | } 64 | } 65 | 66 | n_acd_probe_free(probe); 67 | } 68 | 69 | n_acd_unref(acd); 70 | } 71 | 72 | int main(int argc, char **argv) { 73 | struct ether_addr mac; 74 | int ifindex; 75 | 76 | test_setup(); 77 | 78 | test_loopback_up(&ifindex, &mac); 79 | test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet)); 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /src/test-twice.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Test with unused address twice in parallel 3 | * This runs the ACD engine with an unused address on a veth pair, but it runs 4 | * it on both ends. We expect the PROBE to fail on at least one of the devices. 5 | */ 6 | 7 | #undef NDEBUG 8 | #include 9 | #include 10 | #include "test.h" 11 | 12 | static void test_unused(int ifindex1, uint8_t *mac1, size_t n_mac1, int ifindex2, uint8_t *mac2, size_t n_mac2) { 13 | NAcdConfig config1 = { 14 | .ifindex = ifindex1, 15 | .transport = N_ACD_TRANSPORT_ETHERNET, 16 | .mac = mac1, 17 | .n_mac = n_mac1, 18 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, 19 | .timeout_msec = 100, 20 | }; 21 | NAcdConfig config2 = { 22 | .ifindex = ifindex2, 23 | .transport = N_ACD_TRANSPORT_ETHERNET, 24 | .mac = mac2, 25 | .n_mac = n_mac2, 26 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, 27 | .timeout_msec = 100, 28 | }; 29 | struct pollfd pfds[2]; 30 | NAcd *acd1, *acd2; 31 | int r, fd1, fd2, state1, state2; 32 | 33 | r = n_acd_new(&acd1); 34 | c_assert(!r); 35 | r = n_acd_new(&acd2); 36 | c_assert(!r); 37 | 38 | n_acd_get_fd(acd1, &fd1); 39 | n_acd_get_fd(acd2, &fd2); 40 | 41 | r = n_acd_start(acd1, &config1); 42 | c_assert(!r); 43 | r = n_acd_start(acd2, &config2); 44 | c_assert(!r); 45 | 46 | for (state1 = state2 = -1; state1 == -1 || state2 == -1; ) { 47 | NAcdEvent *event; 48 | pfds[0] = (struct pollfd){ .fd = fd1, .events = (state1 == -1) ? POLLIN : 0 }; 49 | pfds[1] = (struct pollfd){ .fd = fd2, .events = (state2 == -1) ? POLLIN : 0 }; 50 | 51 | r = poll(pfds, sizeof(pfds) / sizeof(*pfds), -1); 52 | c_assert(r >= 0); 53 | 54 | if (state1 == -1) { 55 | r = n_acd_dispatch(acd1); 56 | c_assert(!r); 57 | 58 | r = n_acd_pop_event(acd1, &event); 59 | if (!r) { 60 | c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED); 61 | state1 = !!(event->event == N_ACD_EVENT_READY); 62 | } else { 63 | c_assert(r == N_ACD_E_DONE); 64 | } 65 | } 66 | 67 | if (state2 == -1) { 68 | r = n_acd_dispatch(acd2); 69 | c_assert(!r); 70 | 71 | r = n_acd_pop_event(acd2, &event); 72 | if (!r) { 73 | c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED); 74 | state2 = !!(event->event == N_ACD_EVENT_READY); 75 | } else { 76 | c_assert(r == N_ACD_E_DONE); 77 | } 78 | } 79 | } 80 | 81 | n_acd_free(acd1); 82 | n_acd_free(acd2); 83 | 84 | c_assert(!state1 || !state2); 85 | } 86 | 87 | int main(int argc, char **argv) { 88 | struct ether_addr mac1, mac2; 89 | int ifindex1, ifindex2; 90 | 91 | test_setup(); 92 | 93 | test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2); 94 | test_unused(ifindex1, mac1.ether_addr_octet, sizeof(mac2.ether_addr_octet), ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet)); 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /src/test-unplug.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Unplug device during test run 3 | * Run the ACD engine with an address that is not used by anyone else on the 4 | * link, but DOWN or UNPLUG the device while running. 5 | */ 6 | 7 | #undef NDEBUG 8 | #include 9 | #include 10 | #include "test.h" 11 | 12 | static void test_unplug_down(int ifindex, uint8_t *mac, size_t n_mac, unsigned int run) { 13 | NAcdConfig config = { 14 | .ifindex = ifindex, 15 | .transport = N_ACD_TRANSPORT_ETHERNET, 16 | .mac = mac, 17 | .n_mac = n_mac, 18 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, 19 | .timeout_msec = 100, 20 | }; 21 | struct pollfd pfds; 22 | NAcd *acd; 23 | int r, fd; 24 | 25 | if (!run--) 26 | test_veth_cmd(ifindex, "down"); 27 | 28 | r = n_acd_new(&acd); 29 | c_assert(!r); 30 | 31 | if (!run--) 32 | test_veth_cmd(ifindex, "down"); 33 | 34 | n_acd_get_fd(acd, &fd); 35 | r = n_acd_start(acd, &config); 36 | c_assert(!r); 37 | 38 | if (!run--) 39 | test_veth_cmd(ifindex, "down"); 40 | 41 | for (;;) { 42 | NAcdEvent *event; 43 | pfds = (struct pollfd){ .fd = fd, .events = POLLIN }; 44 | r = poll(&pfds, 1, -1); 45 | c_assert(r >= 0); 46 | 47 | if (!run--) 48 | test_veth_cmd(ifindex, "down"); 49 | 50 | r = n_acd_dispatch(acd); 51 | c_assert(!r); 52 | 53 | r = n_acd_pop_event(acd, &event); 54 | if (!r) { 55 | if (event->event == N_ACD_EVENT_DOWN) { 56 | break; 57 | } else { 58 | c_assert(event->event == N_ACD_EVENT_READY); 59 | test_veth_cmd(ifindex, "down"); 60 | } 61 | } else { 62 | c_assert(r == N_ACD_E_DONE); 63 | } 64 | } 65 | 66 | n_acd_free(acd); 67 | } 68 | 69 | int main(int argc, char **argv) { 70 | struct ether_addr mac; 71 | unsigned int i; 72 | int ifindex; 73 | 74 | test_setup(); 75 | 76 | test_veth_new(&ifindex, &mac, NULL, NULL); 77 | 78 | for (i = 0; i < 5; ++i) { 79 | test_unplug_down(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet), i); 80 | test_veth_cmd(ifindex, "up"); 81 | } 82 | 83 | return 0; 84 | } 85 | -------------------------------------------------------------------------------- /src/test-unused.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Test with unused address 3 | * Run the ACD engine with an address that is not used by anyone else on the 4 | * link. This should just pass through, with a short, random timeout. 5 | */ 6 | 7 | #undef NDEBUG 8 | #include 9 | #include 10 | #include "test.h" 11 | 12 | static void test_unused(int ifindex, const uint8_t *mac, size_t n_mac) { 13 | NAcdConfig config = { 14 | .ifindex = ifindex, 15 | .transport = N_ACD_TRANSPORT_ETHERNET, 16 | .mac = mac, 17 | .n_mac = n_mac, 18 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, 19 | .timeout_msec = 100, 20 | }; 21 | struct pollfd pfds; 22 | NAcd *acd; 23 | int r, fd; 24 | 25 | r = n_acd_new(&acd); 26 | c_assert(!r); 27 | 28 | n_acd_get_fd(acd, &fd); 29 | r = n_acd_start(acd, &config); 30 | c_assert(!r); 31 | 32 | for (;;) { 33 | NAcdEvent *event; 34 | pfds = (struct pollfd){ .fd = fd, .events = POLLIN }; 35 | r = poll(&pfds, 1, -1); 36 | c_assert(r >= 0); 37 | 38 | r = n_acd_dispatch(acd); 39 | c_assert(!r); 40 | 41 | r = n_acd_pop_event(acd, &event); 42 | if (!r) { 43 | c_assert(event->event == N_ACD_EVENT_READY); 44 | break; 45 | } else { 46 | c_assert(r == N_ACD_E_DONE); 47 | } 48 | } 49 | 50 | n_acd_free(acd); 51 | } 52 | 53 | int main(int argc, char **argv) { 54 | struct ether_addr mac; 55 | int ifindex; 56 | 57 | test_setup(); 58 | 59 | test_veth_new(&ifindex, &mac, NULL, NULL); 60 | test_unused(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet)); 61 | 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /src/test-veth.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Test on a veth link 3 | * 4 | * This essentially mimics a real network with two peers. 5 | * 6 | * Run one ACD context on each end of the tunnel. On one end probe for N, 7 | * addresses on the other end pre-configure N/3 of the same addresses and probe 8 | * for another N/3 of the addresses. 9 | * 10 | * Verify that in the case of simultaneous probes of the same address at most one 11 | * succeed, in the case of probing for a configured address it always fails, and 12 | * probing for a non-existent address always succeeds. 13 | * 14 | * Make sure to keep N fairly high as the protocol is probabilistic, and we also 15 | * want to verify that resizing the internal maps works correctly. 16 | */ 17 | 18 | #undef NDEBUG 19 | #include 20 | #include 21 | #include "test.h" 22 | 23 | #define TEST_ACD_N_PROBES (9) 24 | 25 | typedef enum { 26 | TEST_ACD_STATE_UNKNOWN, 27 | TEST_ACD_STATE_USED, 28 | TEST_ACD_STATE_READY, 29 | } TestAcdState; 30 | 31 | static void test_veth(int ifindex1, uint8_t *mac1, size_t n_mac1, 32 | int ifindex2, uint8_t *mac2, size_t n_mac2) { 33 | NAcdConfig *config; 34 | NAcd *acd1, *acd2; 35 | NAcdProbe *probes1[TEST_ACD_N_PROBES]; 36 | NAcdProbe *probes2[TEST_ACD_N_PROBES]; 37 | unsigned long state1, state2; 38 | size_t n_running = 0; 39 | int r; 40 | 41 | r = n_acd_config_new(&config); 42 | c_assert(!r); 43 | 44 | n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET); 45 | 46 | n_acd_config_set_ifindex(config, ifindex1); 47 | n_acd_config_set_mac(config, mac1, n_mac1); 48 | r = n_acd_new(&acd1, config); 49 | c_assert(!r); 50 | 51 | n_acd_config_set_ifindex(config, ifindex2); 52 | n_acd_config_set_mac(config, mac2, n_mac2); 53 | r = n_acd_new(&acd2, config); 54 | c_assert(!r); 55 | 56 | n_acd_config_free(config); 57 | 58 | { 59 | NAcdProbeConfig *probe_config; 60 | 61 | r = n_acd_probe_config_new(&probe_config); 62 | c_assert(!r); 63 | n_acd_probe_config_set_timeout(probe_config, 1024); 64 | 65 | c_assert(TEST_ACD_N_PROBES <= 10 << 24); 66 | 67 | for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) { 68 | struct in_addr ip = { htobe32((10 << 24) | i) }; 69 | 70 | n_acd_probe_config_set_ip(probe_config, ip); 71 | 72 | switch (i % 3) { 73 | case 0: 74 | /* 75 | * Probe on one side, and leave the address 76 | * unset on the other. The probe must succeed. 77 | */ 78 | break; 79 | case 1: 80 | /* 81 | * Preconfigure the address on one side, and 82 | * probe on the other. The probe must fail. 83 | */ 84 | test_add_child_ip(&ip); 85 | break; 86 | case 2: 87 | /* 88 | * Probe both sides for the same address, at 89 | * most one may succeed. 90 | */ 91 | 92 | r = n_acd_probe(acd2, &probes2[i], probe_config); 93 | c_assert(!r); 94 | 95 | ++n_running; 96 | break; 97 | default: 98 | c_assert(0); 99 | abort(); 100 | break; 101 | } 102 | 103 | r = n_acd_probe(acd1, &probes1[i], probe_config); 104 | c_assert(!r); 105 | 106 | ++n_running; 107 | } 108 | 109 | n_acd_probe_config_free(probe_config); 110 | 111 | while (n_running > 0) { 112 | NAcdEvent *event; 113 | struct pollfd pfds[2] = { 114 | { .events = POLLIN }, 115 | { .events = POLLIN }, 116 | }; 117 | 118 | n_acd_get_fd(acd1, &pfds[0].fd); 119 | n_acd_get_fd(acd2, &pfds[1].fd); 120 | 121 | r = poll(pfds, 2, -1); 122 | c_assert(r >= 0); 123 | 124 | if (pfds[0].revents & POLLIN) { 125 | r = n_acd_dispatch(acd1); 126 | c_assert(!r || r == N_ACD_E_PREEMPTED); 127 | 128 | for (;;) { 129 | r = n_acd_pop_event(acd1, &event); 130 | c_assert(!r); 131 | if (event) { 132 | switch (event->event) { 133 | case N_ACD_EVENT_READY: 134 | n_acd_probe_get_userdata(event->ready.probe, (void**)&state1); 135 | c_assert(state1 == TEST_ACD_STATE_UNKNOWN); 136 | state1 = TEST_ACD_STATE_READY; 137 | n_acd_probe_set_userdata(event->ready.probe, (void*)state1); 138 | 139 | break; 140 | case N_ACD_EVENT_USED: 141 | n_acd_probe_get_userdata(event->used.probe, (void**)&state1); 142 | c_assert(state1 == TEST_ACD_STATE_UNKNOWN); 143 | state1 = TEST_ACD_STATE_USED; 144 | n_acd_probe_set_userdata(event->used.probe, (void*)state1); 145 | 146 | break; 147 | default: 148 | c_assert(0); 149 | } 150 | 151 | --n_running; 152 | } else { 153 | break; 154 | } 155 | } 156 | } 157 | 158 | if (pfds[1].revents & POLLIN) { 159 | r = n_acd_dispatch(acd2); 160 | c_assert(!r || r == N_ACD_E_PREEMPTED); 161 | 162 | for (;;) { 163 | r = n_acd_pop_event(acd2, &event); 164 | c_assert(!r); 165 | if (event) { 166 | switch (event->event) { 167 | case N_ACD_EVENT_READY: 168 | n_acd_probe_get_userdata(event->ready.probe, (void**)&state2); 169 | c_assert(state2 == TEST_ACD_STATE_UNKNOWN); 170 | state2 = TEST_ACD_STATE_READY; 171 | n_acd_probe_set_userdata(event->ready.probe, (void*)state2); 172 | 173 | break; 174 | case N_ACD_EVENT_USED: 175 | n_acd_probe_get_userdata(event->used.probe, (void**)&state2); 176 | c_assert(state2 == TEST_ACD_STATE_UNKNOWN); 177 | state2 = TEST_ACD_STATE_USED; 178 | n_acd_probe_set_userdata(event->used.probe, (void*)state2); 179 | 180 | break; 181 | default: 182 | c_assert(0); 183 | } 184 | 185 | --n_running; 186 | } else { 187 | break; 188 | } 189 | } 190 | } 191 | } 192 | 193 | for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) { 194 | struct in_addr ip = { htobe32((10 << 24) | i) }; 195 | 196 | switch (i % 3) { 197 | case 0: 198 | n_acd_probe_get_userdata(probes1[i], (void **)&state1); 199 | c_assert(state1 == TEST_ACD_STATE_READY); 200 | 201 | break; 202 | case 1: 203 | test_del_child_ip(&ip); 204 | 205 | n_acd_probe_get_userdata(probes1[i], (void **)&state1); 206 | c_assert(state1 == TEST_ACD_STATE_USED); 207 | 208 | break; 209 | case 2: 210 | n_acd_probe_get_userdata(probes1[i], (void **)&state1); 211 | n_acd_probe_get_userdata(probes2[i], (void **)&state2); 212 | c_assert(state1 != TEST_ACD_STATE_UNKNOWN); 213 | c_assert(state2 != TEST_ACD_STATE_UNKNOWN); 214 | c_assert(state1 == TEST_ACD_STATE_USED || state2 == TEST_ACD_STATE_USED); 215 | n_acd_probe_free(probes2[i]); 216 | 217 | break; 218 | } 219 | n_acd_probe_free(probes1[i]); 220 | } 221 | } 222 | 223 | n_acd_unref(acd2); 224 | n_acd_unref(acd1); 225 | } 226 | 227 | int main(int argc, char **argv) { 228 | struct ether_addr mac1, mac2; 229 | int ifindex1, ifindex2; 230 | 231 | test_setup(); 232 | 233 | test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2); 234 | for (unsigned int i = 0; i < 8; ++i) { 235 | test_veth(ifindex1, mac1.ether_addr_octet, sizeof(mac1.ether_addr_octet), 236 | ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet)); 237 | } 238 | 239 | return 0; 240 | } 241 | -------------------------------------------------------------------------------- /src/test.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * Test Helpers 5 | * Bunch of helpers to setup the environment for networking tests. This 6 | * includes net-namespace setups, veth setups, and more. 7 | */ 8 | 9 | #undef NDEBUG 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "n-acd.h" 33 | 34 | static inline void test_add_child_ip(const struct in_addr *addr) { 35 | char *p; 36 | int r; 37 | 38 | r = asprintf(&p, "ip addr add dev veth1 %s/8", inet_ntoa(*addr)); 39 | c_assert(r >= 0); 40 | 41 | r = system(p); 42 | c_assert(r >= 0); 43 | 44 | free(p); 45 | } 46 | 47 | static inline void test_del_child_ip(const struct in_addr *addr) { 48 | char *p; 49 | int r; 50 | 51 | r = asprintf(&p, "ip addr del dev veth1 %s/8", inet_ntoa(*addr)); 52 | c_assert(r >= 0); 53 | 54 | r = system(p); 55 | c_assert(r >= 0); 56 | 57 | free(p); 58 | } 59 | 60 | static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) { 61 | struct ifreq ifr = {}; 62 | size_t l; 63 | int r, s; 64 | 65 | l = strlen(name); 66 | c_assert(l <= IF_NAMESIZE); 67 | 68 | if (indexp) { 69 | *indexp = if_nametoindex(name); 70 | c_assert(*indexp > 0); 71 | } 72 | 73 | if (macp) { 74 | s = socket(AF_INET, SOCK_DGRAM, 0); 75 | c_assert(s >= 0); 76 | 77 | strncpy(ifr.ifr_name, name, l + 1); 78 | r = ioctl(s, SIOCGIFHWADDR, &ifr); 79 | c_assert(r >= 0); 80 | 81 | memcpy(macp->ether_addr_octet, ifr.ifr_hwaddr.sa_data, ETH_ALEN); 82 | 83 | close(s); 84 | } 85 | } 86 | 87 | static inline void test_veth_cmd(int ifindex, const char *cmd) { 88 | char *p, name[IF_NAMESIZE + 1] = {}; 89 | int r; 90 | 91 | p = if_indextoname(ifindex, name); 92 | c_assert(p); 93 | 94 | r = asprintf(&p, "ip link set %s %s", name, cmd); 95 | c_assert(r >= 0); 96 | 97 | /* Again: Ewwww... */ 98 | r = system(p); 99 | c_assert(r == 0); 100 | 101 | free(p); 102 | } 103 | 104 | static inline void test_veth_new(int *parent_indexp, 105 | struct ether_addr *parent_macp, 106 | int *child_indexp, 107 | struct ether_addr *child_macp) { 108 | int r; 109 | 110 | /* Eww... but it works. */ 111 | r = system("ip link add type veth"); 112 | c_assert(r == 0); 113 | r = system("ip link set veth0 up"); 114 | c_assert(r == 0); 115 | r = system("ip link set veth1 up"); 116 | c_assert(r == 0); 117 | 118 | test_if_query("veth0", parent_indexp, parent_macp); 119 | test_if_query("veth1", child_indexp, child_macp); 120 | } 121 | 122 | static inline void test_loopback_up(int *indexp, struct ether_addr *macp) { 123 | int r; 124 | 125 | r = system("ip link set lo up"); 126 | c_assert(r == 0); 127 | 128 | test_if_query("lo", indexp, macp); 129 | } 130 | 131 | static inline void test_raise_memlock(void) { 132 | const size_t wanted = 64 * 1024 * 1024; 133 | struct rlimit get, set; 134 | int r; 135 | 136 | r = getrlimit(RLIMIT_MEMLOCK, &get); 137 | c_assert(!r); 138 | 139 | /* try raising limit to @wanted */ 140 | set.rlim_cur = wanted; 141 | set.rlim_max = (wanted > get.rlim_max) ? wanted : get.rlim_max; 142 | r = setrlimit(RLIMIT_MEMLOCK, &set); 143 | if (r) { 144 | c_assert(errno == EPERM); 145 | 146 | /* not privileged to raise limit, so maximize soft limit */ 147 | set.rlim_cur = get.rlim_max; 148 | set.rlim_max = get.rlim_max; 149 | r = setrlimit(RLIMIT_MEMLOCK, &set); 150 | c_assert(!r); 151 | } 152 | } 153 | 154 | static inline void test_unshare_user_namespace(void) { 155 | uid_t euid; 156 | gid_t egid; 157 | int r, fd; 158 | 159 | /* 160 | * Enter a new user namespace as root:root. 161 | */ 162 | 163 | euid = geteuid(); 164 | egid = getegid(); 165 | 166 | r = unshare(CLONE_NEWUSER); 167 | c_assert(r >= 0); 168 | 169 | fd = open("/proc/self/uid_map", O_WRONLY); 170 | c_assert(fd >= 0); 171 | r = dprintf(fd, "0 %d 1\n", euid); 172 | c_assert(r >= 0); 173 | close(fd); 174 | 175 | fd = open("/proc/self/setgroups", O_WRONLY); 176 | c_assert(fd >= 0); 177 | r = dprintf(fd, "deny"); 178 | c_assert(r >= 0); 179 | close(fd); 180 | 181 | fd = open("/proc/self/gid_map", O_WRONLY); 182 | c_assert(fd >= 0); 183 | r = dprintf(fd, "0 %d 1\n", egid); 184 | c_assert(r >= 0); 185 | close(fd); 186 | } 187 | 188 | static inline void test_setup(void) { 189 | int r; 190 | 191 | /* 192 | * Move into a new network and mount namespace both associated 193 | * with a new user namespace where the current eUID is mapped to 194 | * 0. Then create a private instance of /run/netns. This ensures 195 | * that any network devices or network namespaces are private to 196 | * the test process. 197 | */ 198 | 199 | test_raise_memlock(); 200 | test_unshare_user_namespace(); 201 | 202 | r = unshare(CLONE_NEWNET | CLONE_NEWNS); 203 | c_assert(r >= 0); 204 | 205 | r = mount(NULL, "/", "", MS_PRIVATE | MS_REC, NULL); 206 | c_assert(r >= 0); 207 | 208 | r = mount(NULL, "/run", "tmpfs", 0, NULL); 209 | c_assert(r >= 0); 210 | 211 | r = mkdir("/run/netns", 0755); 212 | c_assert(r >= 0); 213 | } 214 | -------------------------------------------------------------------------------- /src/util/test-timer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tests for timer utility library 3 | */ 4 | 5 | #undef NDEBUG 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "timer.h" 14 | 15 | #define N_TIMEOUTS (10000) 16 | 17 | static void test_api(void) { 18 | Timer timer = TIMER_NULL(timer); 19 | Timeout t1 = TIMEOUT_INIT(t1), t2 = TIMEOUT_INIT(t2), *t; 20 | int r; 21 | 22 | r = timer_init(&timer); 23 | c_assert(!r); 24 | 25 | timeout_schedule(&t1, &timer, 1); 26 | timeout_schedule(&t2, &timer, 2); 27 | 28 | r = timer_pop_timeout(&timer, 10, &t); 29 | c_assert(!r); 30 | c_assert(t == &t1); 31 | 32 | timeout_unschedule(&t2); 33 | 34 | r = timer_pop_timeout(&timer, 10, &t); 35 | c_assert(!r); 36 | c_assert(!t); 37 | 38 | timer_deinit(&timer); 39 | } 40 | 41 | static void test_pop(void) { 42 | Timer timer = TIMER_NULL(timer); 43 | Timeout timeouts[N_TIMEOUTS] = {}; 44 | uint64_t times[N_TIMEOUTS] = {}; 45 | size_t n_timeouts = 0; 46 | bool armed; 47 | Timeout *t; 48 | int r; 49 | 50 | r = timer_init(&timer); 51 | c_assert(!r); 52 | 53 | for(size_t i = 0; i < N_TIMEOUTS; ++i) { 54 | timeouts[i] = (Timeout)TIMEOUT_INIT(timeouts[i]); 55 | times[i] = rand() % 128 + 1; 56 | timeout_schedule(&timeouts[i], &timer, times[i]); 57 | } 58 | 59 | armed = true; 60 | 61 | for(size_t i = 0; i <= 128; ++i) { 62 | if (armed) { 63 | struct pollfd pfd = { 64 | .fd = timer.fd, 65 | .events = POLLIN, 66 | }; 67 | uint64_t count; 68 | 69 | r = poll(&pfd, 1, -1); 70 | c_assert(r == 1); 71 | 72 | r = read(timer.fd, &count, sizeof(count)); 73 | c_assert(r == sizeof(count)); 74 | c_assert(count == 1); 75 | armed = false; 76 | } 77 | 78 | for (;;) { 79 | uint64_t current_time; 80 | 81 | r = timer_pop_timeout(&timer, i, &t); 82 | c_assert(!r); 83 | if (!t) { 84 | timer_rearm(&timer); 85 | break; 86 | } 87 | 88 | current_time = times[t - timeouts]; 89 | c_assert(current_time == i); 90 | ++n_timeouts; 91 | armed = true; 92 | } 93 | } 94 | 95 | c_assert(n_timeouts == N_TIMEOUTS); 96 | 97 | r = timer_pop_timeout(&timer, (uint64_t)-1, &t); 98 | c_assert(!r); 99 | c_assert(!t); 100 | 101 | timer_deinit(&timer); 102 | } 103 | 104 | void test_arm(void) { 105 | struct itimerspec spec = { 106 | .it_value = { 107 | .tv_sec = 1000, 108 | }, 109 | }; 110 | int fd1, fd2, r; 111 | 112 | fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK); 113 | c_assert(fd1 >= 0); 114 | 115 | fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK); 116 | c_assert(fd1 >= 0); 117 | 118 | r = timerfd_settime(fd1, 0, &spec, NULL); 119 | c_assert(r >= 0); 120 | 121 | r = timerfd_settime(fd2, 0, &spec, NULL); 122 | c_assert(r >= 0); 123 | 124 | r = timerfd_gettime(fd1, &spec); 125 | c_assert(r >= 0); 126 | c_assert(spec.it_value.tv_sec); 127 | 128 | r = timerfd_gettime(fd2, &spec); 129 | c_assert(r >= 0); 130 | c_assert(spec.it_value.tv_sec); 131 | 132 | spec = (struct itimerspec){}; 133 | 134 | r = timerfd_settime(fd1, 0, &spec, NULL); 135 | c_assert(r >= 0); 136 | 137 | r = timerfd_gettime(fd1, &spec); 138 | c_assert(r >= 0); 139 | c_assert(!spec.it_value.tv_sec); 140 | c_assert(!spec.it_value.tv_nsec); 141 | 142 | r = timerfd_gettime(fd2, &spec); 143 | c_assert(r >= 0); 144 | c_assert(spec.it_value.tv_sec); 145 | 146 | spec = (struct itimerspec){ .it_value = { .tv_nsec = 1, }, }; 147 | 148 | r = timerfd_settime(fd1, 0, &spec, NULL); 149 | c_assert(r >= 0); 150 | 151 | r = poll(&(struct pollfd) { .fd = fd1, .events = POLLIN }, 1, -1); 152 | c_assert(r == 1); 153 | 154 | r = timerfd_settime(fd2, 0, &spec, NULL); 155 | c_assert(r >= 0); 156 | 157 | r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1); 158 | c_assert(r == 1); 159 | 160 | spec = (struct itimerspec){}; 161 | 162 | r = timerfd_settime(fd1, 0, &spec, NULL); 163 | c_assert(r >= 0); 164 | 165 | r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1); 166 | c_assert(r == 1); 167 | 168 | close(fd2); 169 | close(fd1); 170 | } 171 | 172 | int main(int argc, char **argv) { 173 | test_arm(); 174 | test_api(); 175 | test_pop(); 176 | return 0; 177 | } 178 | -------------------------------------------------------------------------------- /src/util/timer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Timer Utility Library 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "timer.h" 13 | 14 | int timer_init(Timer *timer) { 15 | clockid_t clock = CLOCK_BOOTTIME; 16 | int r; 17 | 18 | r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK); 19 | if (r < 0 && errno == EINVAL) { 20 | clock = CLOCK_MONOTONIC; 21 | r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK); 22 | } 23 | if (r < 0) 24 | return -errno; 25 | 26 | *timer = (Timer)TIMER_NULL(*timer); 27 | timer->fd = r; 28 | timer->clock = clock; 29 | 30 | return 0; 31 | } 32 | 33 | void timer_deinit(Timer *timer) { 34 | c_assert(c_rbtree_is_empty(&timer->tree)); 35 | 36 | if (timer->fd >= 0) { 37 | close(timer->fd); 38 | timer->fd = -1; 39 | } 40 | } 41 | 42 | void timer_now(Timer *timer, uint64_t *nowp) { 43 | struct timespec ts; 44 | int r; 45 | 46 | r = clock_gettime(timer->clock, &ts); 47 | c_assert(r >= 0); 48 | 49 | *nowp = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; 50 | } 51 | 52 | void timer_rearm(Timer *timer) { 53 | uint64_t time; 54 | Timeout *timeout; 55 | int r; 56 | 57 | /* 58 | * A timeout value of 0 clears the timer, we should only set that if 59 | * no timeout exists in the tree. 60 | */ 61 | 62 | timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node); 63 | c_assert(!timeout || timeout->timeout); 64 | 65 | time = timeout ? timeout->timeout : 0; 66 | 67 | if (time != timer->scheduled_timeout) { 68 | r = timerfd_settime(timer->fd, 69 | TFD_TIMER_ABSTIME, 70 | &(struct itimerspec){ 71 | .it_value = { 72 | .tv_sec = time / UINT64_C(1000000000), 73 | .tv_nsec = time % UINT64_C(1000000000), 74 | }, 75 | }, 76 | NULL); 77 | c_assert(r >= 0); 78 | 79 | timer->scheduled_timeout = time; 80 | } 81 | } 82 | 83 | int timer_read(Timer *timer) { 84 | uint64_t v; 85 | int r; 86 | 87 | r = read(timer->fd, &v, sizeof(v)); 88 | if (r < 0) { 89 | if (errno == EAGAIN) { 90 | /* 91 | * No more pending events. 92 | */ 93 | return 0; 94 | } else { 95 | /* 96 | * Something failed. We use CLOCK_BOOTTIME/MONOTONIC, 97 | * so ECANCELED cannot happen. Hence, there is no 98 | * error that we could gracefully handle. Fail hard 99 | * and let the caller deal with it. 100 | */ 101 | return -errno; 102 | } 103 | } else if (r != sizeof(v) || v == 0) { 104 | /* 105 | * Kernel guarantees 8-byte reads, and only to return 106 | * data if at least one timer triggered; fail hard if 107 | * it suddenly starts doing weird shit. 108 | */ 109 | return -EIO; 110 | } 111 | 112 | return TIMER_E_TRIGGERED; 113 | } 114 | 115 | 116 | int timer_pop_timeout(Timer *timer, uint64_t until, Timeout **timeoutp) { 117 | Timeout *timeout; 118 | 119 | /* 120 | * If the first timeout is scheduled before @until, then unlink 121 | * it and return it. Otherwise, return NULL. 122 | */ 123 | timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node); 124 | if (timeout && timeout->timeout <= until) { 125 | c_rbnode_unlink(&timeout->node); 126 | timeout->timeout = 0; 127 | *timeoutp = timeout; 128 | } else { 129 | *timeoutp = NULL; 130 | } 131 | 132 | return 0; 133 | } 134 | 135 | void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time) { 136 | c_assert(time); 137 | 138 | /* 139 | * In case @timeout was already scheduled, remove it from the 140 | * tree. If we are moving it to a new timer, rearm the old one. 141 | */ 142 | if (timeout->timer) { 143 | c_rbnode_unlink(&timeout->node); 144 | if (timeout->timer != timer) 145 | timer_rearm(timeout->timer); 146 | } 147 | timeout->timer = timer; 148 | timeout->timeout = time; 149 | 150 | /* 151 | * Now insert it back into the tree in the correct new position. 152 | * We allow duplicates in the tree, so this insertion is open-coded. 153 | */ 154 | { 155 | Timeout *other; 156 | CRBNode **slot, *parent; 157 | 158 | slot = &timer->tree.root; 159 | parent = NULL; 160 | while (*slot) { 161 | other = c_rbnode_entry(*slot, Timeout, node); 162 | parent = *slot; 163 | if (timeout->timeout < other->timeout) 164 | slot = &(*slot)->left; 165 | else 166 | slot = &(*slot)->right; 167 | } 168 | 169 | c_rbtree_add(&timer->tree, parent, slot, &timeout->node); 170 | } 171 | 172 | /* 173 | * Rearm the timer as we updated the timeout tree. 174 | */ 175 | timer_rearm(timer); 176 | } 177 | 178 | void timeout_unschedule(Timeout *timeout) { 179 | Timer *timer = timeout->timer; 180 | 181 | if (!timer) 182 | return; 183 | 184 | c_rbnode_unlink(&timeout->node); 185 | timeout->timeout = 0; 186 | timeout->timer = NULL; 187 | 188 | timer_rearm(timer); 189 | } 190 | -------------------------------------------------------------------------------- /src/util/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | typedef struct Timer Timer; 11 | typedef struct Timeout Timeout; 12 | 13 | enum { 14 | _TIMER_E_SUCCESS, 15 | 16 | TIMER_E_TRIGGERED, 17 | 18 | _TIMER_E_N, 19 | }; 20 | 21 | struct Timer { 22 | int fd; 23 | clockid_t clock; 24 | CRBTree tree; 25 | uint64_t scheduled_timeout; 26 | }; 27 | 28 | #define TIMER_NULL(_x) { \ 29 | .fd = -1, \ 30 | .tree = C_RBTREE_INIT, \ 31 | } 32 | 33 | struct Timeout { 34 | Timer *timer; 35 | CRBNode node; 36 | uint64_t timeout; 37 | }; 38 | 39 | #define TIMEOUT_INIT(_x) { \ 40 | .node = C_RBNODE_INIT((_x).node), \ 41 | } 42 | 43 | int timer_init(Timer *timer); 44 | void timer_deinit(Timer *timer); 45 | 46 | void timer_now(Timer *timer, uint64_t *nowp); 47 | 48 | int timer_pop_timeout(Timer *timer, uint64_t now, Timeout **timerp); 49 | void timer_rearm(Timer *timer); 50 | int timer_read(Timer *timer); 51 | 52 | void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time); 53 | void timeout_unschedule(Timeout *timeout); 54 | 55 | --------------------------------------------------------------------------------