├── .editorconfig
├── .github
    └── workflows
    │   └── ci.yml
├── .gitmodules
├── AUTHORS
├── NEWS.md
├── README.md
├── meson.build
├── meson_options.txt
└── src
    ├── libnacd.sym
    ├── meson.build
    ├── n-acd-bpf-fallback.c
    ├── n-acd-bpf.c
    ├── n-acd-private.h
    ├── n-acd-probe.c
    ├── n-acd.c
    ├── n-acd.h
    ├── test-api.c
    ├── test-bpf.c
    ├── test-loopback.c
    ├── test-twice.c
    ├── test-unplug.c
    ├── test-unused.c
    ├── test-veth.c
    ├── test.h
    └── util
        ├── test-timer.c
        ├── timer.c
        └── timer.h


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | insert_final_newline = true
 6 | trim_trailing_whitespace = true
 7 | charset = utf-8
 8 | 
 9 | [*.{c,h}]
10 | indent_style = space
11 | indent_size = 8
12 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: Continuous Integration
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 |   schedule:
  7 |   - cron:  '0 0 * * *'
  8 | 
  9 | jobs:
 10 |   ci:
 11 |     name: CI with Default Configuration
 12 |     runs-on: ubuntu-latest
 13 | 
 14 |     steps:
 15 |     #
 16 |     # Prepare CI
 17 |     #
 18 |     # We cannot use the github-action of the `ci-c-util` project, because we
 19 |     # need privileges in the container. Therefore, fetch the CI sources and
 20 |     # build the container manually.
 21 |     #
 22 |     - name: Fetch CI
 23 |       uses: actions/checkout@v2
 24 |       with:
 25 |         repository: c-util/automation
 26 |         ref: v1
 27 |         path: automation
 28 |     - name: Build CI
 29 |       working-directory: automation/src/ci-c-util
 30 |       run: docker build --tag ci-c-util:v1 .
 31 | 
 32 |     #
 33 |     # Run CI
 34 |     #
 35 |     # Take the CI image we built and run the CI with the default project
 36 |     # configuration. We do not use valgrind, since it falls-over with bpf(2)
 37 |     # syscalls.
 38 |     #
 39 |     - name: Fetch Sources
 40 |       uses: actions/checkout@v2
 41 |       with:
 42 |         path: source
 43 |     - name: Run through C-Util CI
 44 |       run: |
 45 |         docker run \
 46 |                 --privileged \
 47 |                 -v "$(pwd)/source:/github/workspace" \
 48 |                 "ci-c-util:v1" \
 49 |                 "--m32=1" \
 50 |                 "--source=/github/workspace"
 51 | 
 52 |   ci-no-ebpf:
 53 |     name: CI without eBPF
 54 |     runs-on: ubuntu-latest
 55 | 
 56 |     steps:
 57 |     # See above in 'ci' job.
 58 |     - name: Fetch CI
 59 |       uses: actions/checkout@v2
 60 |       with:
 61 |         repository: c-util/automation
 62 |         ref: v1
 63 |         path: automation
 64 |     - name: Build CI
 65 |       working-directory: automation/src/ci-c-util
 66 |       run: docker build --tag ci-c-util:v1 .
 67 | 
 68 |     #
 69 |     # Run CI
 70 |     #
 71 |     # This again runs the CI, but this time disables eBPF. We do support the
 72 |     # legacy BPF fallback, so lets make sure we test for it.
 73 |     #
 74 |     - name: Fetch Sources
 75 |       uses: actions/checkout@v2
 76 |       with:
 77 |         path: source
 78 |     - name: Run through C-Util CI
 79 |       run: |
 80 |         docker run \
 81 |                 --privileged \
 82 |                 -v "$(pwd)/source:/github/workspace" \
 83 |                 "ci-c-util:v1" \
 84 |                 "--m32=1" \
 85 |                 "--mesonargs=-Debpf=false" \
 86 |                 "--source=/github/workspace"
 87 | 
 88 |   ci-valgrind:
 89 |     name: CI through Valgrind
 90 |     runs-on: ubuntu-latest
 91 | 
 92 |     steps:
 93 |     # See above in 'ci' job.
 94 |     - name: Fetch CI
 95 |       uses: actions/checkout@v2
 96 |       with:
 97 |         repository: c-util/automation
 98 |         ref: v1
 99 |         path: automation
100 |     - name: Build CI
101 |       working-directory: automation/src/ci-c-util
102 |       run: docker build --tag ci-c-util:v1 .
103 | 
104 |     #
105 |     # Run CI
106 |     #
107 |     # This again runs the CI, but this time through valgrind. Since some
108 |     # syscalls are not implemented on x86-64 32bit compat (e.g., bpf(2)), we
109 |     # disable the m32 mode.
110 |     #
111 |     - name: Fetch Sources
112 |       uses: actions/checkout@v2
113 |       with:
114 |         path: source
115 |     - name: Run through C-Util CI
116 |       run: |
117 |         docker run \
118 |                 --privileged \
119 |                 -v "$(pwd)/source:/github/workspace" \
120 |                 "ci-c-util:v1" \
121 |                 "--source=/github/workspace" \
122 |                 "--valgrind=1"
123 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "subprojects/c-list"]
 2 | 	path = subprojects/c-list
 3 | 	url = https://github.com/c-util/c-list.git
 4 | [submodule "subprojects/c-siphash"]
 5 | 	path = subprojects/c-siphash
 6 | 	url = https://github.com/c-util/c-siphash.git
 7 | [submodule "subprojects/c-rbtree"]
 8 | 	path = subprojects/c-rbtree
 9 | 	url = https://github.com/c-util/c-rbtree.git
10 | [submodule "subprojects/c-stdaux"]
11 | 	path = subprojects/c-stdaux
12 | 	url = https://github.com/c-util/c-stdaux.git
13 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | LICENSE:
 2 |         This project is dual-licensed under both the Apache License, Version
 3 |         2.0, and the GNU Lesser General Public License, Version 2.1+.
 4 | 
 5 | AUTHORS-ASL:
 6 |         Licensed under the Apache License, Version 2.0 (the "License");
 7 |         you may not use this file except in compliance with the License.
 8 |         You may obtain a copy of the License at
 9 | 
10 |                 http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |         Unless required by applicable law or agreed to in writing, software
13 |         distributed under the License is distributed on an "AS IS" BASIS,
14 |         WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |         See the License for the specific language governing permissions and
16 |         limitations under the License.
17 | 
18 | AUTHORS-LGPL:
19 |         This program is free software; you can redistribute it and/or modify it
20 |         under the terms of the GNU Lesser General Public License as published
21 |         by the Free Software Foundation; either version 2.1 of the License, or
22 |         (at your option) any later version.
23 | 
24 |         This program is distributed in the hope that it will be useful, but
25 |         WITHOUT ANY WARRANTY; without even the implied warranty of
26 |         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 |         Lesser General Public License for more details.
28 | 
29 |         You should have received a copy of the GNU Lesser General Public License
30 |         along with this program; If not, see <http://www.gnu.org/licenses/>.
31 | 
32 | COPYRIGHT: (ordered alphabetically)
33 |         Copyright (C) 2015-2019 Red Hat, Inc.
34 | 
35 | AUTHORS: (ordered alphabetically)
36 |         Beniamino Galvani <bgalvani@redhat.com>
37 |         David Rheinsberg <david.rheinsberg@gmail.com>
38 |         Thomas Haller <thaller@redhat.com>
39 |         Tom Gundersen <teg@jklm.no>
40 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # n-acd - IPv4 Address Conflict Detection
 2 | 
 3 | ## CHANGES WITH 2:
 4 | 
 5 |         * All public destructors now include a variant that returns `void`.
 6 |           This was requested for easier integration with `glib` and friends.
 7 |           Similar to the `cleanup` variants, these variants are denoted by a
 8 |           single-character function-name suffix. E.g., `n_acd_freev()`
 9 | 
10 |         * A fallback to `CLOCK_MONOTONIC` is now provided in case
11 |           `CLOCK_BOOTTIME` is not supported by the kernel. Note that this is in
12 |           no way signalled through the API, so if timers should follow the
13 |           `BOOTTIME` rather than monotonic clock, a kernel with this clock is
14 |           required.
15 | 
16 |         * The `c-sundry` dependency is no longer needed.
17 | 
18 |         * The `transport` configuration property is now mandatory for
19 |           `n_acd_new()`. It defaulted to `ETHERNET` before, by mistake.
20 | 
21 |         * In-source documentation for the public API is now provided.
22 | 
23 |         Contributions from: Beniamino Galvani, David Herrmann, David
24 |                             Rheinsberg, Thomas Haller, Tom Gundersen
25 | 
26 |         - Tübingen, 2019-03-20
27 | 
28 | ## CHANGES WITH 1:
29 | 
30 |         * Initial release of n-acd. This project implements the IPv4 Address
31 |           Conflict Detection standard as defined in RFC-5227. The state machine
32 |           is implemented in a shared library and provides a stable ISO-C11 API.
33 |           The implementation is linux-only and relies heavily on the API
34 |           behavior of recent linux kernel releases.
35 | 
36 |         * Compared to the pre-releases, this release supports many parallel
37 |           probes on a single n-acd context. This reduces the number of
38 |           allocated network resources to O(1), based on the number of running
39 |           parallel probes.
40 | 
41 |         * The n-acd project is now dual-licensed: ASL-2.0 and LGPL-2.1+
42 | 
43 |         Contributions from: Beniamino Galvani, David Herrmann, Thomas Haller,
44 |                             Tom Gundersen
45 | 
46 |         - Tübingen, 2018-08-08
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | n-acd
 2 | =====
 3 | 
 4 | IPv4 Address Conflict Detection
 5 | 
 6 | The n-acd project implements the IPv4 Address Conflict Detection standard as
 7 | defined in RFC-5227. The state machine is implemented in a shared library and
 8 | provides a stable ISO-C11 API. The implementation is linux-only and relies
 9 | heavily on the API behavior of recent linux kernel releases.
10 | 
11 | ### Project
12 | 
13 |  * **Website**: <https://nettools.github.io/n-acd>
14 |  * **Bug Tracker**: <https://github.com/nettools/n-acd/issues>
15 |  * **Mailing-List**: <https://groups.google.com/forum/#!forum/nettools-devel>
16 | 
17 | ### Requirements
18 | 
19 | The requirements for this project are:
20 | 
21 |  * `Linux kernel >= 3.19`
22 |  * `libc` (e.g., `glibc >= 2.16`)
23 | 
24 | At build-time, the following software is required:
25 | 
26 |  * `meson >= 0.41`
27 |  * `pkg-config >= 0.29`
28 | 
29 | ### Build
30 | 
31 | The meson build-system is used for this project. Contact upstream
32 | documentation for detailed help. In most situations the following
33 | commands are sufficient to build and install from source:
34 | 
35 | ```sh
36 | mkdir build
37 | cd build
38 | meson setup ..
39 | ninja
40 | meson test
41 | ninja install
42 | ```
43 | 
44 | The following configuration options are available:
45 | 
46 |  * `ebpf`: This boolean controls whether `ebpf` features are used to improve
47 |            the package filtering performance. If disabled, classic bpf will be
48 |            used. This feature requires a rather recent kernel (>=3.19).
49 |            Default is: true
50 | 
51 | ### Repository:
52 | 
53 |  - **web**:   <https://github.com/nettools/n-acd>
54 |  - **https**: `https://github.com/nettools/n-acd.git`
55 |  - **ssh**:   `git@github.com:nettools/n-acd.git`
56 | 
57 | ### License:
58 | 
59 |  - **Apache-2.0** OR **LGPL-2.1-or-later**
60 |  - See AUTHORS file for details.
61 | 


--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
 1 | project(
 2 |         'n-acd',
 3 |         'c',
 4 |         version: '2',
 5 |         license: 'Apache',
 6 |         default_options: [
 7 |                 'c_std=c11',
 8 |         ],
 9 | )
10 | project_description = 'IPv4 Address Conflict Detection'
11 | 
12 | add_project_arguments('-D_GNU_SOURCE', language: 'c')
13 | mod_pkgconfig = import('pkgconfig')
14 | 
15 | sub_clist = subproject('c-list')
16 | sub_crbtree = subproject('c-rbtree')
17 | sub_csiphash = subproject('c-siphash')
18 | sub_cstdaux = subproject('c-stdaux')
19 | 
20 | dep_clist = sub_clist.get_variable('libclist_dep')
21 | dep_crbtree = sub_crbtree.get_variable('libcrbtree_dep')
22 | dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep')
23 | dep_cstdaux = sub_cstdaux.get_variable('libcstdaux_dep')
24 | 
25 | use_ebpf = get_option('ebpf')
26 | 
27 | subdir('src')
28 | 


--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
1 | option('ebpf', type: 'boolean', value: true, description: 'Enable eBPF packet filtering')
2 | 


--------------------------------------------------------------------------------
/src/libnacd.sym:
--------------------------------------------------------------------------------
 1 | LIBNACD_2 {
 2 | global:
 3 |         n_acd_config_new;
 4 |         n_acd_config_free;
 5 |         n_acd_config_set_ifindex;
 6 |         n_acd_config_set_transport;
 7 |         n_acd_config_set_mac;
 8 | 
 9 |         n_acd_probe_config_new;
10 |         n_acd_probe_config_free;
11 |         n_acd_probe_config_set_ip;
12 |         n_acd_probe_config_set_timeout;
13 | 
14 |         n_acd_new;
15 |         n_acd_ref;
16 |         n_acd_unref;
17 |         n_acd_get_fd;
18 |         n_acd_dispatch;
19 |         n_acd_pop_event;
20 |         n_acd_probe;
21 | 
22 |         n_acd_probe_free;
23 |         n_acd_probe_set_userdata;
24 |         n_acd_probe_get_userdata;
25 |         n_acd_probe_announce;
26 | local:
27 |        *;
28 | };
29 | 


--------------------------------------------------------------------------------
/src/meson.build:
--------------------------------------------------------------------------------
 1 | #
 2 | # target: libnacd.so
 3 | #
 4 | 
 5 | libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym')
 6 | 
 7 | libnacd_deps = [
 8 |         dep_clist,
 9 |         dep_crbtree,
10 |         dep_csiphash,
11 |         dep_cstdaux,
12 | ]
13 | 
14 | libnacd_sources = [
15 |         'n-acd.c',
16 |         'n-acd-probe.c',
17 |         'util/timer.c',
18 | ]
19 | 
20 | if use_ebpf
21 |         libnacd_sources += [
22 |                 'n-acd-bpf.c',
23 |         ]
24 | else
25 |         libnacd_sources += [
26 |                 'n-acd-bpf-fallback.c',
27 |         ]
28 | endif
29 | 
30 | libnacd_private = static_library(
31 |         'nacd-private',
32 |         libnacd_sources,
33 |         c_args: [
34 |                 '-fvisibility=hidden',
35 |                 '-fno-common'
36 |         ],
37 |         dependencies: libnacd_deps,
38 |         pic: true,
39 | )
40 | 
41 | libnacd_shared = shared_library(
42 |         'nacd',
43 |         objects: libnacd_private.extract_all_objects(),
44 |         dependencies: libnacd_deps,
45 |         install: not meson.is_subproject(),
46 |         soversion: 0,
47 |         link_depends: libnacd_symfile,
48 |         link_args: [
49 |                 '-Wl,--no-undefined',
50 |                 '-Wl,--version-script=@0@'.format(libnacd_symfile)
51 |         ],
52 | )
53 | 
54 | libnacd_dep = declare_dependency(
55 |         include_directories: include_directories('.'),
56 |         link_with: libnacd_private,
57 |         dependencies: libnacd_deps,
58 |         version: meson.project_version(),
59 | )
60 | 
61 | if not meson.is_subproject()
62 |         install_headers('n-acd.h')
63 | 
64 |         mod_pkgconfig.generate(
65 |                 libraries: libnacd_shared,
66 |                 version: meson.project_version(),
67 |                 name: 'libnacd',
68 |                 filebase: 'libnacd',
69 |                 description: project_description,
70 |         )
71 | endif
72 | 
73 | #
74 | # target: test-*
75 | #
76 | 
77 | test_api = executable('test-api', ['test-api.c'], link_with: libnacd_shared)
78 | test('API Symbol Visibility', test_api)
79 | 
80 | if use_ebpf
81 |         test_bpf = executable('test-bpf', ['test-bpf.c'], dependencies: libnacd_dep)
82 |         test('eBPF socket filtering', test_bpf)
83 | endif
84 | 
85 | test_loopback = executable('test-loopback', ['test-loopback.c'], dependencies: libnacd_dep)
86 | test('Echo Suppression via Loopback', test_loopback)
87 | 
88 | test_timer = executable('test-timer', ['util/test-timer.c'], dependencies: libnacd_dep)
89 | test('Timer helper', test_timer)
90 | 
91 | #test_unplug = executable('test-unplug', ['test-unplug.c'], dependencies: libnacd_dep)
92 | #test('Async Interface Hotplug', test_unplug)
93 | 
94 | test_veth = executable('test-veth', ['test-veth.c'], dependencies: libnacd_dep)
95 | test('Parallel ACD instances', test_veth)
96 | 


--------------------------------------------------------------------------------
/src/n-acd-bpf-fallback.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * A noop implementation of eBPF filter for IPv4 Address Conflict Detection
 3 |  *
 4 |  * These are a collection of dummy functions that have no effect, but allows
 5 |  * n-acd to compile without eBPF support.
 6 |  *
 7 |  * See n-acd-bpf.c for documentation.
 8 |  */
 9 | 
10 | #include <c-stdaux.h>
11 | #include <stddef.h>
12 | #include "n-acd-private.h"
13 | 
14 | int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
15 |         *mapfdp = -1;
16 |         return 0;
17 | }
18 | 
19 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
20 |         return 0;
21 | }
22 | 
23 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
24 |         return 0;
25 | }
26 | 
27 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
28 |         *progfdp = -1;
29 |         return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/n-acd-bpf.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * eBPF filter for IPv4 Address Conflict Detection
  3 |  *
  4 |  * An eBPF map and an eBPF program are provided. The map contains all the
  5 |  * addresses address conflict detection is performed on, and the program
  6 |  * filters out all packets except exactly the packets relevant to the ACD
  7 |  * protocol on the addresses currently in the map.
  8 |  *
  9 |  * Note that userspace still has to filter the incoming packets, as filter
 10 |  * are applied when packets are queued on the socket, not when userspace
 11 |  * receives them. It is therefore possible to receive packets about addresses
 12 |  * that have already been removed.
 13 |  */
 14 | 
 15 | #include <c-stdaux.h>
 16 | #include <errno.h>
 17 | #include <inttypes.h>
 18 | #include <linux/bpf.h>
 19 | #include <netinet/if_ether.h>
 20 | #include <netinet/in.h>
 21 | #include <stdlib.h>
 22 | #include <string.h>
 23 | #include <sys/resource.h>
 24 | #include <sys/syscall.h>
 25 | #include <unistd.h>
 26 | #include "n-acd-private.h"
 27 | 
 28 | #define BPF_LD_ABS(SIZE, IMM)                                                   \
 29 |         ((struct bpf_insn) {                                                    \
 30 |                 .code           = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,            \
 31 |                 .dst_reg        = 0,                                            \
 32 |                 .src_reg        = 0,                                            \
 33 |                 .off            = 0,                                            \
 34 |                 .imm            = IMM,                                          \
 35 |         })
 36 | 
 37 | #define BPF_LDX_MEM(SIZE, DST, SRC, OFF)                                        \
 38 |         ((struct bpf_insn) {                                                    \
 39 |                 .code           = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,           \
 40 |                 .dst_reg        = DST,                                          \
 41 |                 .src_reg        = SRC,                                          \
 42 |                 .off            = OFF,                                          \
 43 |                 .imm            = 0,                                            \
 44 |         })
 45 | 
 46 | #define BPF_LD_MAP_FD(DST, MAP_FD)                                              \
 47 |         ((struct bpf_insn) {                                                    \
 48 |                 .code           = BPF_LD | BPF_DW | BPF_IMM,                    \
 49 |                 .dst_reg        = DST,                                          \
 50 |                 .src_reg        = BPF_PSEUDO_MAP_FD,                            \
 51 |                 .off            = 0,                                            \
 52 |                 .imm            = (__u32) (MAP_FD),                             \
 53 |         }),                                                                     \
 54 |         ((struct bpf_insn) {                                                    \
 55 |                 .code           = 0, /* zero is reserved opcode */              \
 56 |                 .dst_reg        = 0,                                            \
 57 |                 .src_reg        = 0,                                            \
 58 |                 .off            = 0,                                            \
 59 |                 .imm            = ((__u64) (MAP_FD)) >> 32,                     \
 60 |         })
 61 | 
 62 | #define BPF_ALU_REG(OP, DST, SRC)                                               \
 63 |         ((struct bpf_insn) {                                                    \
 64 |                 .code           = BPF_ALU64 | BPF_OP(OP) | BPF_X,               \
 65 |                 .dst_reg        = DST,                                          \
 66 |                 .src_reg        = SRC,                                          \
 67 |                 .off            = 0,                                            \
 68 |                 .imm            = 0,                                            \
 69 |         })
 70 | 
 71 | #define BPF_ALU_IMM(OP, DST, IMM)                                               \
 72 |         ((struct bpf_insn) {                                                    \
 73 |                 .code           = BPF_ALU64 | BPF_OP(OP) | BPF_K,               \
 74 |                 .dst_reg        = DST,                                          \
 75 |                 .src_reg        = 0,                                            \
 76 |                 .off            = 0,                                            \
 77 |                 .imm            = IMM,                                          \
 78 |         })
 79 | 
 80 | #define BPF_MOV_REG(DST, SRC)                                                   \
 81 |         ((struct bpf_insn) {                                                    \
 82 |                 .code           = BPF_ALU64 | BPF_MOV | BPF_X,                  \
 83 |                 .dst_reg        = DST,                                          \
 84 |                 .src_reg        = SRC,                                          \
 85 |                 .off            = 0,                                            \
 86 |                 .imm            = 0,                                            \
 87 |         })
 88 | 
 89 | #define BPF_MOV_IMM(DST, IMM)                                                   \
 90 |         ((struct bpf_insn) {                                                    \
 91 |                 .code           = BPF_ALU64 | BPF_MOV | BPF_K,                  \
 92 |                 .dst_reg        = DST,                                          \
 93 |                 .src_reg        = 0,                                            \
 94 |                 .off            = 0,                                            \
 95 |                 .imm            = IMM,                                          \
 96 |         })
 97 | 
 98 | #define BPF_STX_MEM(SIZE, DST, SRC, OFF)                                        \
 99 |         ((struct bpf_insn) {                                                    \
100 |                 .code           = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,           \
101 |                 .dst_reg        = DST,                                          \
102 |                 .src_reg        = SRC,                                          \
103 |                 .off            = OFF,                                          \
104 |                 .imm            = 0,                                            \
105 |         })
106 | 
107 | #define BPF_JMP_REG(OP, DST, SRC, OFF)                                          \
108 |         ((struct bpf_insn) {                                                    \
109 |                 .code           = BPF_JMP | BPF_OP(OP) | BPF_X,                 \
110 |                 .dst_reg        = DST,                                          \
111 |                 .src_reg        = SRC,                                          \
112 |                 .off            = OFF,                                          \
113 |                 .imm            = 0,                                            \
114 |         })
115 | 
116 | #define BPF_JMP_IMM(OP, DST, IMM, OFF)                                          \
117 |         ((struct bpf_insn) {                                                    \
118 |                 .code           = BPF_JMP | BPF_OP(OP) | BPF_K,                 \
119 |                 .dst_reg        = DST,                                          \
120 |                 .src_reg        = 0,                                            \
121 |                 .off            = OFF,                                          \
122 |                 .imm            = IMM,                                          \
123 |         })
124 | 
125 | #define BPF_EMIT_CALL(FUNC)                                                     \
126 |         ((struct bpf_insn) {                                                    \
127 |                 .code           = BPF_JMP | BPF_CALL,                           \
128 |                 .dst_reg        = 0,                                            \
129 |                 .src_reg        = 0,                                            \
130 |                 .off            = 0,                                            \
131 |                 .imm            = FUNC,                                         \
132 |         })
133 | 
134 | #define BPF_EXIT_INSN()                                                         \
135 |         ((struct bpf_insn) {                                                    \
136 |                 .code           = BPF_JMP | BPF_EXIT,                           \
137 |                 .dst_reg        = 0,                                            \
138 |                 .src_reg        = 0,                                            \
139 |                 .off            = 0,                                            \
140 |                 .imm            = 0,                                            \
141 |         })
142 | 
143 | static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) {
144 |         return (int)syscall(__NR_bpf, cmd, attr, size);
145 | }
146 | 
147 | int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
148 |         union bpf_attr attr;
149 |         int mapfd;
150 | 
151 |         memset(&attr, 0, sizeof(attr));
152 |         attr = (union bpf_attr){
153 |                 .map_type    = BPF_MAP_TYPE_HASH,
154 |                 .key_size    = sizeof(uint32_t),
155 |                 .value_size  = sizeof(uint8_t), /* values are never used, but must be set */
156 |                 .max_entries = max_entries,
157 |         };
158 | 
159 |         mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
160 |         if (mapfd < 0)
161 |                 return -errno;
162 | 
163 |         *mapfdp = mapfd;
164 |         return 0;
165 | }
166 | 
167 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
168 |         union bpf_attr attr;
169 |         uint32_t addr = be32toh(addrp->s_addr);
170 |         uint8_t _dummy = 0;
171 |         int r;
172 | 
173 |         memset(&attr, 0, sizeof(attr));
174 |         attr = (union bpf_attr){
175 |                 .map_fd = mapfd,
176 |                 .key    = (uint64_t)(unsigned long)&addr,
177 |                 .value  = (uint64_t)(unsigned long)&_dummy,
178 |                 .flags  = BPF_NOEXIST,
179 |         };
180 | 
181 |         r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
182 |         if (r < 0)
183 |                 return -errno;
184 | 
185 |         return 0;
186 | }
187 | 
188 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
189 |         uint32_t addr = be32toh(addrp->s_addr);
190 |         union bpf_attr attr;
191 |         int r;
192 | 
193 |         memset(&attr, 0, sizeof(attr));
194 |         attr = (union bpf_attr){
195 |                 .map_fd = mapfd,
196 |                 .key    = (uint64_t)(unsigned long)&addr,
197 |         };
198 | 
199 |         r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
200 |         if (r < 0)
201 |                 return -errno;
202 | 
203 |         return 0;
204 | }
205 | 
206 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
207 |         const union {
208 |                 uint8_t u8[6];
209 |                 uint16_t u16[3];
210 |                 uint32_t u32[1];
211 |         } mac = {
212 |                 .u8 = {
213 |                         macp->ether_addr_octet[0],
214 |                         macp->ether_addr_octet[1],
215 |                         macp->ether_addr_octet[2],
216 |                         macp->ether_addr_octet[3],
217 |                         macp->ether_addr_octet[4],
218 |                         macp->ether_addr_octet[5],
219 |                 },
220 |         };
221 |         struct bpf_insn prog[] = {
222 |                 /* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */
223 |                 BPF_MOV_REG(6, 1),                                              /* r6 = r1 */
224 | 
225 |                 /* drop the packet if it is too short */
226 |                 BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)),      /* r0 = skb->len */
227 |                 BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2),           /* if (r0 >= sizeof(ether_arp)) skip 2 */
228 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
229 |                 BPF_EXIT_INSN(),                                                /* return */
230 | 
231 |                 /* drop the packet if the header is not as expected */
232 |                 BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)),         /* r0 = header type */
233 |                 BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2),                       /* if (r0 == ethernet) skip 2 */
234 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
235 |                 BPF_EXIT_INSN(),                                                /* return */
236 | 
237 |                 BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)),         /* r0 = protocol */
238 |                 BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2),                       /* if (r0 == IP) skip 2 */
239 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
240 |                 BPF_EXIT_INSN(),                                                /* return */
241 | 
242 |                 BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)),         /* r0 = hw addr length */
243 |                 BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2),          /* if (r0 == sizeof(ether_addr)) skip 2 */
244 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
245 |                 BPF_EXIT_INSN(),                                                /* return */
246 | 
247 |                 BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)),         /* r0 = protocol addr length */
248 |                 BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2),             /* if (r0 == sizeof(in_addr)) skip 2 */
249 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
250 |                 BPF_EXIT_INSN(),                                                /* return */
251 | 
252 |                 /* drop packets from our own mac address */
253 |                 BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)),         /* r0 = first four bytes of packet mac address */
254 |                 BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4),                /* if (r0 != first four bytes of our mac address) skip 4 */
255 |                 BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4),     /* r0 = last two bytes of packet mac address */
256 |                 BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2),                /* if (r0 != last two bytes of our mac address) skip 2 */
257 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
258 |                 BPF_EXIT_INSN(),                                                /* return */
259 | 
260 |                 /*
261 |                  * We listen for two kinds of packets:
262 |                  *  Conflicts)
263 |                  *    These are requests or replies with the sender address not set to INADDR_ANY. The
264 |                  *    conflicted address is the sender address, remember this in r7.
265 |                  *  Probes)
266 |                  *    These are requests with the sender address set to INADDR_ANY. The probed address
267 |                  *    is the target address, remember this in r7.
268 |                  *  Any other packets are dropped.
269 |                  */
270 |                 BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)),         /* r0 = sender ip address */
271 |                 BPF_JMP_IMM(BPF_JEQ, 0, 0, 7),                                  /* if (r0 == 0) skip 7 */
272 |                 BPF_MOV_REG(7, 0),                                              /* r7 = r0 */
273 |                 BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)),          /* r0 = operation */
274 |                 BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3),                      /* if (r0 == request) skip 3 */
275 |                 BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2),                        /* if (r0 == reply) skip 2 */
276 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
277 |                 BPF_EXIT_INSN(),                                                /* return */
278 |                 BPF_JMP_IMM(BPF_JA, 0, 0, 6),                                   /* skip 6 */
279 |                 BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)),         /* r0 = target ip address */
280 |                 BPF_MOV_REG(7, 0),                                              /* r7 = r0 */
281 |                 BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)),          /* r0 = operation */
282 |                 BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2),                      /* if (r0 == request) skip 2 */
283 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
284 |                 BPF_EXIT_INSN(),                                                /* return */
285 | 
286 |                 /* check if the probe or conflict is for an address we are monitoring */
287 |                 BPF_STX_MEM(BPF_W, 10, 7, -4),                                  /* *(uint32_t*)fp - 4 = r7 */
288 |                 BPF_MOV_REG(2, 10),                                             /* r2 = fp */
289 |                 BPF_ALU_IMM(BPF_ADD, 2, -4),                                    /* r2 -= 4 */
290 |                 BPF_LD_MAP_FD(1, mapfd),                                        /* r1 = mapfd */
291 |                 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),                        /* r0 = map_lookup_elem(r1, r2) */
292 |                 BPF_JMP_IMM(BPF_JNE, 0, 0, 2),                                  /* if (r0 != NULL) skip 2 */
293 |                 BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
294 |                 BPF_EXIT_INSN(),                                                /* return */
295 | 
296 |                 /* return exactly the packet length*/
297 |                 BPF_MOV_IMM(0, sizeof(struct ether_arp)),                       /* r0 = sizeof(struct ether_arp) */
298 |                 BPF_EXIT_INSN(),                                                /* return */
299 |         };
300 |         union bpf_attr attr;
301 |         int progfd;
302 | 
303 |         memset(&attr, 0, sizeof(attr));
304 |         attr = (union bpf_attr){
305 |                 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
306 |                 .insns     = (uint64_t)(unsigned long)prog,
307 |                 .insn_cnt  = sizeof(prog) / sizeof(*prog),
308 |                 .license   = (uint64_t)(unsigned long)"ASL",
309 |         };
310 | 
311 |         progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
312 |         if (progfd < 0)
313 |                 return -errno;
314 | 
315 |         *progfdp = progfd;
316 |         return 0;
317 | }
318 | 


--------------------------------------------------------------------------------
/src/n-acd-private.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <c-list.h>
  4 | #include <c-rbtree.h>
  5 | #include <c-stdaux.h>
  6 | #include <errno.h>
  7 | #include <inttypes.h>
  8 | #include <netinet/if_ether.h>
  9 | #include <netinet/in.h>
 10 | #include <stdbool.h>
 11 | #include <stdlib.h>
 12 | #include "util/timer.h"
 13 | #include "n-acd.h"
 14 | 
 15 | typedef struct NAcdEventNode NAcdEventNode;
 16 | 
 17 | /* This augments the error-codes with internal ones that are never exposed. */
 18 | enum {
 19 |         _N_ACD_INTERNAL = _N_ACD_E_N,
 20 | 
 21 |         N_ACD_E_DROPPED,
 22 | };
 23 | 
 24 | enum {
 25 |         N_ACD_PROBE_STATE_PROBING,
 26 |         N_ACD_PROBE_STATE_CONFIGURING,
 27 |         N_ACD_PROBE_STATE_ANNOUNCING,
 28 |         N_ACD_PROBE_STATE_FAILED,
 29 | };
 30 | 
 31 | struct NAcdConfig {
 32 |         int ifindex;
 33 |         unsigned int transport;
 34 |         uint8_t mac[ETH_ALEN];
 35 |         size_t n_mac;
 36 | };
 37 | 
 38 | #define N_ACD_CONFIG_NULL(_x) {                                                 \
 39 |                 .transport = _N_ACD_TRANSPORT_N,                                \
 40 |         }
 41 | 
 42 | struct NAcdProbeConfig {
 43 |         struct in_addr ip;
 44 |         uint64_t timeout_msecs;
 45 | };
 46 | 
 47 | #define N_ACD_PROBE_CONFIG_NULL(_x) {                                           \
 48 |                 .timeout_msecs = N_ACD_TIMEOUT_RFC5227,                         \
 49 |         }
 50 | 
 51 | struct NAcdEventNode {
 52 |         CList acd_link;
 53 |         CList probe_link;
 54 |         NAcdEvent event;
 55 |         uint8_t sender[ETH_ALEN];
 56 |         bool is_public : 1;
 57 | };
 58 | 
 59 | #define N_ACD_EVENT_NODE_NULL(_x) {                                             \
 60 |                 .acd_link = C_LIST_INIT((_x).acd_link),                         \
 61 |                 .probe_link = C_LIST_INIT((_x).probe_link),                     \
 62 |         }
 63 | 
 64 | struct NAcd {
 65 |         unsigned long n_refs;
 66 |         unsigned int seed;
 67 |         int fd_epoll;
 68 |         int fd_socket;
 69 |         CRBTree ip_tree;
 70 |         CList event_list;
 71 |         Timer timer;
 72 | 
 73 |         /* BPF map */
 74 |         int fd_bpf_map;
 75 |         size_t n_bpf_map;
 76 |         size_t max_bpf_map;
 77 | 
 78 |         /* configuration */
 79 |         int ifindex;
 80 |         uint8_t mac[ETH_ALEN];
 81 | 
 82 |         /* flags */
 83 |         bool preempted : 1;
 84 | };
 85 | 
 86 | #define N_ACD_NULL(_x) {                                                        \
 87 |                 .n_refs = 1,                                                    \
 88 |                 .fd_epoll = -1,                                                 \
 89 |                 .fd_socket = -1,                                                \
 90 |                 .ip_tree = C_RBTREE_INIT,                                       \
 91 |                 .event_list = C_LIST_INIT((_x).event_list),                     \
 92 |                 .timer = TIMER_NULL((_x).timer),                                \
 93 |                 .fd_bpf_map = -1,                                               \
 94 |         }
 95 | 
 96 | struct NAcdProbe {
 97 |         NAcd *acd;
 98 |         CRBNode ip_node;
 99 |         CList event_list;
100 |         Timeout timeout;
101 | 
102 |         /* configuration */
103 |         struct in_addr ip;
104 |         uint64_t timeout_multiplier;
105 |         void *userdata;
106 | 
107 |         /* state */
108 |         unsigned int state;
109 |         unsigned int n_iteration;
110 |         unsigned int defend;
111 |         uint64_t last_defend;
112 | };
113 | 
114 | #define N_ACD_PROBE_NULL(_x) {                                                  \
115 |                 .ip_node = C_RBNODE_INIT((_x).ip_node),                         \
116 |                 .event_list = C_LIST_INIT((_x).event_list),                     \
117 |                 .timeout = TIMEOUT_INIT((_x).timeout),                          \
118 |                 .state = N_ACD_PROBE_STATE_PROBING,                             \
119 |                 .defend = N_ACD_DEFEND_NEVER,                                   \
120 |         }
121 | 
122 | /* events */
123 | 
124 | int n_acd_event_node_new(NAcdEventNode **nodep);
125 | NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node);
126 | 
127 | /* contexts */
128 | 
129 | void n_acd_remember(NAcd *acd, uint64_t now, bool success);
130 | int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event);
131 | int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa);
132 | int n_acd_ensure_bpf_map_space(NAcd *acd);
133 | 
134 | /* probes */
135 | 
136 | int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config);
137 | int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event);
138 | int n_acd_probe_handle_timeout(NAcdProbe *probe);
139 | int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict);
140 | 
141 | /* eBPF */
142 | 
143 | int n_acd_bpf_map_create(int *mapfdp, size_t max_elements);
144 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addr);
145 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addr);
146 | 
147 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *mac);
148 | 
149 | /* inline helpers */
150 | 
151 | static inline void n_acd_event_node_freep(NAcdEventNode **node) {
152 |         if (*node)
153 |                 n_acd_event_node_free(*node);
154 | }
155 | 


--------------------------------------------------------------------------------
/src/n-acd-probe.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * IPv4 Address Conflict Detection
  3 |  *
  4 |  * This file implements the probe object. A probe is basically the
  5 |  * state-machine of a single ACD run. It takes an address to probe for, checks
  6 |  * for conflicts and then defends it once configured.
  7 |  */
  8 | 
  9 | #include <assert.h>
 10 | #include <c-rbtree.h>
 11 | #include <c-stdaux.h>
 12 | #include <endian.h>
 13 | #include <errno.h>
 14 | #include <inttypes.h>
 15 | #include <limits.h>
 16 | #include <netinet/if_ether.h>
 17 | #include <netinet/in.h>
 18 | #include <stdio.h>
 19 | #include <stdlib.h>
 20 | #include <string.h>
 21 | #include <sys/types.h>
 22 | #include <unistd.h>
 23 | #include "n-acd.h"
 24 | #include "n-acd-private.h"
 25 | 
 26 | /*
 27 |  * These parameters and timing intervals are specified in RFC-5227. The
 28 |  * original values are:
 29 |  *
 30 |  *     PROBE_NUM                                3
 31 |  *     PROBE_WAIT                               1s
 32 |  *     PROBE_MIN                                1s
 33 |  *     PROBE_MAX                                3s
 34 |  *     ANNOUNCE_NUM                             3
 35 |  *     ANNOUNCE_WAIT                            2s
 36 |  *     ANNOUNCE_INTERVAL                        2s
 37 |  *     MAX_CONFLICTS                            10
 38 |  *     RATE_LIMIT_INTERVAL                      60s
 39 |  *     DEFEND_INTERVAL                          10s
 40 |  *
 41 |  * If we assume a best-case and worst-case scenario for non-conflicted runs, we
 42 |  * end up with a runtime between 4s and 9s to finish the probe. Then it still
 43 |  * takes a fixed 4s to finish the announcements.
 44 |  *
 45 |  * RFC 5227 section 1.1:
 46 |  *     [...] (Note that the values listed here are fixed constants; they are
 47 |  *     not intended to be modifiable by implementers, operators, or end users.
 48 |  *     These constants are given symbolic names here to facilitate the writing
 49 |  *     of future standards that may want to reference this document with
 50 |  *     different values for these named constants; however, at the present time
 51 |  *     no such future standards exist.) [...]
 52 |  *
 53 |  * Unfortunately, no-one ever stepped up to write a "future standard" to revise
 54 |  * the timings. A 9s timeout for successful link setups is not acceptable today.
 55 |  * Hence, we will just go forward and ignore the proposed values. On both
 56 |  * wired and wireless local links round-trip latencies of below 3ms are common.
 57 |  * We require the caller to set a timeout multiplier, where 1 corresponds to a
 58 |  * total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier
 59 |  * of about 100 should be a reasonable default. To comply with the RFC select a
 60 |  * multiplier of 9000.
 61 |  */
 62 | #define N_ACD_RFC_PROBE_NUM                     (3)
 63 | #define N_ACD_RFC_PROBE_WAIT_NSEC               (UINT64_C(111111)) /* 1/9 ms */
 64 | #define N_ACD_RFC_PROBE_MIN_NSEC                (UINT64_C(111111)) /* 1/9 ms */
 65 | #define N_ACD_RFC_PROBE_MAX_NSEC                (UINT64_C(333333)) /* 3/9 ms */
 66 | #define N_ACD_RFC_ANNOUNCE_NUM                  (3)
 67 | #define N_ACD_RFC_ANNOUNCE_WAIT_NSEC            (UINT64_C(222222)) /* 2/9 ms */
 68 | #define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC        (UINT64_C(222222)) /* 2/9 ms */
 69 | #define N_ACD_RFC_MAX_CONFLICTS                 (10)
 70 | #define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC      (UINT64_C(60000000000)) /* 60s */
 71 | #define N_ACD_RFC_DEFEND_INTERVAL_NSEC          (UINT64_C(10000000000)) /* 10s */
 72 | 
 73 | /**
 74 |  * n_acd_probe_config_new() - create probe configuration
 75 |  * @configp:                    output argument for new probe configuration
 76 |  *
 77 |  * This creates a new probe configuration. It will be returned in @configp to
 78 |  * the caller, which upon return fully owns the object.
 79 |  *
 80 |  * A probe configuration collects parameters for probes. It never validates the
 81 |  * input, but this is left to the consumer of the configuration to do.
 82 |  *
 83 |  * Return: 0 on success, negative error code on failure.
 84 |  */
 85 | _c_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) {
 86 |         _c_cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL;
 87 | 
 88 |         config = malloc(sizeof(*config));
 89 |         if (!config)
 90 |                 return -ENOMEM;
 91 | 
 92 |         *config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config);
 93 | 
 94 |         *configp = config;
 95 |         config = NULL;
 96 |         return 0;
 97 | }
 98 | 
 99 | /**
100 |  * n_acd_probe_config_free() - destroy probe configuration
101 |  * @config:                     configuration to operate on, or NULL
102 |  *
103 |  * This destroys the probe configuration and all associated objects. If @config
104 |  * is NULL, this is a no-op.
105 |  *
106 |  * Return: NULL is returned.
107 |  */
108 | _c_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) {
109 |         if (!config)
110 |                 return NULL;
111 | 
112 |         free(config);
113 | 
114 |         return NULL;
115 | }
116 | 
117 | /**
118 |  * n_acd_probe_config_set_ip() - set ip property
119 |  * @config:                     configuration to operate on
120 |  * @ip:                         ip to set
121 |  *
122 |  * This sets the IP property to the value `ip`. The address is copied into the
123 |  * configuration object. No validation is performed.
124 |  *
125 |  * The IP property selects the IP address that a probe checks for. It is the
126 |  * caller's responsibility to guarantee the address is valid and can be used.
127 |  */
128 | _c_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) {
129 |         config->ip = ip;
130 | }
131 | 
132 | /**
133 |  * n_acd_probe_config_set_timeout() - set timeout property
134 |  * @config:                     configuration to operate on
135 |  * @msecs:                      timeout to set, in milliseconds
136 |  *
137 |  * This sets the timeout to use for a conflict detection probe. The
138 |  * specification default is provided as `N_ACD_TIMEOUT_RFC5227` and corresponds
139 |  * to 9 seconds.
140 |  *
141 |  * If set to 0, conflict detection is skipped and the address is immediately
142 |  * advertised and defended.
143 |  *
144 |  * Depending on the transport used, the API user should select a suitable
145 |  * timeout. Since `ACD` only operates on the link layer, timeouts in the
146 |  * hundreds of milliseconds range should be more than enough for any modern
147 |  * network. Note that increasing this value directly affects the time it takes
148 |  * to connect to a network, since an address should not be used unless conflict
149 |  * detection finishes.
150 |  *
151 |  * Using the specification default is **discouraged**. It is way too slow and
152 |  * not appropriate for modern networks.
153 |  *
154 |  * Default value is `N_ACD_TIMEOUT_RFC5227`.
155 |  */
156 | _c_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) {
157 |         config->timeout_msecs = msecs;
158 | }
159 | 
160 | static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) {
161 |         uint64_t n_time;
162 | 
163 |         timer_now(&probe->acd->timer, &n_time);
164 |         n_time += n_timeout;
165 | 
166 |         /*
167 |          * ACD specifies jitter values to reduce packet storms on the local
168 |          * link. This call accepts the maximum relative jitter value in
169 |          * nanoseconds as @n_jitter. We then use rand_r(3p) to get a
170 |          * pseudo-random jitter on top of the real timeout given as @n_timeout.
171 |          */
172 |         if (n_jitter) {
173 |                 uint64_t random;
174 | 
175 |                 random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed);
176 |                 n_time += random % n_jitter;
177 |         }
178 | 
179 |         timeout_schedule(&probe->timeout, &probe->acd->timer, n_time);
180 | }
181 | 
182 | static void n_acd_probe_unschedule(NAcdProbe *probe) {
183 |         timeout_unschedule(&probe->timeout);
184 | }
185 | 
186 | static bool n_acd_probe_is_unique(NAcdProbe *probe) {
187 |         NAcdProbe *sibling;
188 | 
189 |         if (!c_rbnode_is_linked(&probe->ip_node))
190 |                 return false;
191 | 
192 |         sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node);
193 |         if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
194 |                 return false;
195 | 
196 |         sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node);
197 |         if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
198 |                 return false;
199 | 
200 |         return true;
201 | }
202 | 
203 | static int n_acd_probe_link(NAcdProbe *probe) {
204 |         int r;
205 | 
206 |         /*
207 |          * Make sure the kernel bpf map has space for at least one more
208 |          * entry.
209 |          */
210 |         r = n_acd_ensure_bpf_map_space(probe->acd);
211 |         if (r)
212 |                 return r;
213 | 
214 |         /*
215 |          * Link entry into context, indexed by its IP. Note that we allow
216 |          * duplicates just fine. It is up to you to decide whether to avoid
217 |          * duplicates, if you don't want them. Duplicates on the same context
218 |          * do not conflict with each other, though.
219 |          */
220 |         {
221 |                 CRBNode **slot, *parent;
222 |                 NAcdProbe *other;
223 | 
224 |                 slot = &probe->acd->ip_tree.root;
225 |                 parent = NULL;
226 |                 while (*slot) {
227 |                         other = c_rbnode_entry(*slot, NAcdProbe, ip_node);
228 |                         parent = *slot;
229 |                         if (probe->ip.s_addr < other->ip.s_addr)
230 |                                 slot = &(*slot)->left;
231 |                         else
232 |                                 slot = &(*slot)->right;
233 |                 }
234 | 
235 |                 c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node);
236 |         }
237 | 
238 |         /*
239 |          * Add the ip address to the map, if it is not already there.
240 |          */
241 |         if (n_acd_probe_is_unique(probe)) {
242 |                 r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip);
243 |                 if (r) {
244 |                         /*
245 |                          * Make sure the IP address is linked in userspace iff
246 |                          * it is linked in the kernel.
247 |                          */
248 |                         c_rbnode_unlink(&probe->ip_node);
249 |                         return r;
250 |                 }
251 |                 ++probe->acd->n_bpf_map;
252 |         }
253 | 
254 |         return 0;
255 | }
256 | 
257 | static void n_acd_probe_unlink(NAcdProbe *probe) {
258 |         int r;
259 | 
260 |         /*
261 |          * If this is the only probe for a given IP, remove the IP from the
262 |          * kernel BPF map.
263 |          */
264 |         if (n_acd_probe_is_unique(probe)) {
265 |                 r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip);
266 |                 c_assert(r >= 0);
267 |                 --probe->acd->n_bpf_map;
268 |         }
269 |         c_rbnode_unlink(&probe->ip_node);
270 | }
271 | 
272 | int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) {
273 |         _c_cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL;
274 |         int r;
275 | 
276 |         if (!config->ip.s_addr)
277 |                 return N_ACD_E_INVALID_ARGUMENT;
278 | 
279 |         probe = malloc(sizeof(*probe));
280 |         if (!probe)
281 |                 return -ENOMEM;
282 | 
283 |         *probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe);
284 |         probe->acd = n_acd_ref(acd);
285 |         probe->ip = config->ip;
286 | 
287 |         /*
288 |          * We use the provided timeout-length as multiplier for all our
289 |          * timeouts. The provided timeout defines the maximum length of an
290 |          * entire probe-interval until the first announcement. Given the
291 |          * spec-provided parameters, this ends up as:
292 |          *
293 |          *     PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT
294 |          *   =         1s +        3s +        3s +            2s
295 |          *   = 9s
296 |          *
297 |          * Hence, the default value for this timeout is 9000ms, which just
298 |          * ends up matching the spec-provided values.
299 |          *
300 |          * What we now semantically do is divide this timeout by 1ns/1000000.
301 |          * This first turns it into nanoseconds, then strips the unit by
302 |          * turning it into a multiplier. However, rather than performing the
303 |          * division here, we multiplier all our timeouts by 1000000 statically
304 |          * at compile time. Therefore, we can use the user-provided timeout as
305 |          * unmodified multiplier. No conversion necessary.
306 |          */
307 |         probe->timeout_multiplier = config->timeout_msecs;
308 | 
309 |         r = n_acd_probe_link(probe);
310 |         if (r)
311 |                 return r;
312 | 
313 |         /*
314 |          * Now that everything is set up, we have to send the first probe. This
315 |          * is done after ~PROBE_WAIT seconds, hence we schedule our timer.
316 |          * In case no timeout-multiplier is set, we pretend we already sent all
317 |          * probes successfully and schedule the timer so we proceed with the
318 |          * announcements. We must schedule a fake timer there, since we are not
319 |          * allowed to advance the state machine outside of n_acd_dispatch().
320 |          */
321 |         if (probe->timeout_multiplier) {
322 |                 probe->n_iteration = 0;
323 |                 n_acd_probe_schedule(probe,
324 |                                      0,
325 |                                      probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC);
326 |         } else {
327 |                 probe->n_iteration = N_ACD_RFC_PROBE_NUM;
328 |                 n_acd_probe_schedule(probe, 0, 0);
329 |         }
330 | 
331 |         *probep = probe;
332 |         probe = NULL;
333 |         return 0;
334 | }
335 | 
336 | /**
337 |  * n_acd_probe_free() - destroy a probe
338 |  * @probe:                      probe to operate on, or NULL
339 |  *
340 |  * This destroys the probe specified by @probe. All operations are immediately
341 |  * ceded and all associated objects are released.
342 |  *
343 |  * If @probe is NULL, this is a no-op.
344 |  *
345 |  * This function will flush all events associated with @probe from the event
346 |  * queue. That is, no events will be returned for this @probe anymore.
347 |  *
348 |  * Return: NULL is returned.
349 |  */
350 | _c_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) {
351 |         NAcdEventNode *node, *t_node;
352 | 
353 |         if (!probe)
354 |                 return NULL;
355 | 
356 |         c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link)
357 |                 n_acd_event_node_free(node);
358 | 
359 |         n_acd_probe_unschedule(probe);
360 |         n_acd_probe_unlink(probe);
361 |         probe->acd = n_acd_unref(probe->acd);
362 |         free(probe);
363 | 
364 |         return NULL;
365 | }
366 | 
367 | int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) {
368 |         _c_cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL;
369 |         int r;
370 | 
371 |         r = n_acd_raise(probe->acd, &node, event);
372 |         if (r)
373 |                 return r;
374 | 
375 |         switch (event) {
376 |         case N_ACD_EVENT_READY:
377 |                 node->event.ready.probe = probe;
378 |                 break;
379 |         case N_ACD_EVENT_USED:
380 |                 node->event.used.probe = probe;
381 |                 break;
382 |         case N_ACD_EVENT_DEFENDED:
383 |                 node->event.defended.probe = probe;
384 |                 break;
385 |         case N_ACD_EVENT_CONFLICT:
386 |                 node->event.conflict.probe = probe;
387 |                 break;
388 |         default:
389 |                 c_assert(0);
390 |                 return -ENOTRECOVERABLE;
391 |         }
392 | 
393 |         c_list_link_tail(&probe->event_list, &node->probe_link);
394 | 
395 |         if (nodep)
396 |                 *nodep = node;
397 |         node = NULL;
398 |         return 0;
399 | }
400 | 
401 | int n_acd_probe_handle_timeout(NAcdProbe *probe) {
402 |         int r;
403 | 
404 |         switch (probe->state) {
405 |         case N_ACD_PROBE_STATE_PROBING:
406 |                 /*
407 |                  * We are still PROBING. We send 3 probes with a random timeout
408 |                  * scheduled between each. If, after a fixed timeout, we did
409 |                  * not receive any conflict we consider the probing successful.
410 |                  */
411 |                 if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) {
412 |                         /*
413 |                          * We have not sent all 3 probes, yet. A timer fired,
414 |                          * so we are ready to send the next probe. If this is
415 |                          * the third probe, schedule a timer for ANNOUNCE_WAIT
416 |                          * to give other peers a chance to answer. If this is
417 |                          * not the third probe, wait between PROBE_MIN and
418 |                          * PROBE_MAX for the next probe.
419 |                          */
420 | 
421 |                         r = n_acd_send(probe->acd, &probe->ip, NULL);
422 |                         if (r) {
423 |                                 if (r != N_ACD_E_DROPPED)
424 |                                         return r;
425 | 
426 |                                 /*
427 |                                  * Packet was dropped, and we know about it. It
428 |                                  * never reached the network. Reasons are
429 |                                  * manifold, and n_acd_send() raises events if
430 |                                  * necessary.
431 |                                  * From a probe-perspective, we simply pretend
432 |                                  * we never sent the probe and schedule a
433 |                                  * timeout for the next probe, effectively
434 |                                  * doubling a single probe-interval.
435 |                                  */
436 |                         } else {
437 |                                 /* Successfully sent, so advance counter. */
438 |                                 ++probe->n_iteration;
439 |                         }
440 | 
441 |                         if (probe->n_iteration < N_ACD_RFC_PROBE_NUM)
442 |                                 n_acd_probe_schedule(probe,
443 |                                                      probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC,
444 |                                                      probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC));
445 |                         else
446 |                                 n_acd_probe_schedule(probe,
447 |                                                      probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC,
448 |                                                      0);
449 |                 } else {
450 |                         /*
451 |                          * All 3 probes succeeded and we waited enough to
452 |                          * consider this address usable by now. Do not announce
453 |                          * the address, yet. We must first give the caller a
454 |                          * chance to configure the address (so they can answer
455 |                          * ARP requests), before announcing it.
456 |                          */
457 |                         r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY);
458 |                         if (r)
459 |                                 return r;
460 | 
461 |                         probe->state = N_ACD_PROBE_STATE_CONFIGURING;
462 |                 }
463 | 
464 |                 break;
465 | 
466 |         case N_ACD_PROBE_STATE_ANNOUNCING:
467 |                 /*
468 |                  * We are ANNOUNCING, meaning the caller configured the address
469 |                  * on the interface and is actively using it. We send 3
470 |                  * announcements out, in a short interval, and then just
471 |                  * perform passive conflict detection.
472 |                  * Note that once all 3 announcements are sent, we no longer
473 |                  * schedule a timer, so this part should not trigger, anymore.
474 |                  */
475 | 
476 |                 r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
477 |                 if (r) {
478 |                         if (r != N_ACD_E_DROPPED)
479 |                                 return r;
480 | 
481 |                         /*
482 |                          * See above in STATE_PROBING for details. We know the
483 |                          * packet was never sent, so we simply try again after
484 |                          * extending the timer.
485 |                          */
486 |                 } else {
487 |                         /* Successfully sent, so advance counter. */
488 |                         ++probe->n_iteration;
489 |                 }
490 | 
491 |                 if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
492 |                         /*
493 |                          * Announcements are always scheduled according to the
494 |                          * time-intervals specified in the spec. We always use
495 |                          * the RFC5227-mandated multiplier.
496 |                          * If you reconsider this, note that timeout_multiplier
497 |                          * might be 0 here.
498 |                          */
499 |                         n_acd_probe_schedule(probe,
500 |                                              N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC,
501 |                                              0);
502 |                 }
503 | 
504 |                 break;
505 | 
506 |         case N_ACD_PROBE_STATE_CONFIGURING:
507 |         case N_ACD_PROBE_STATE_FAILED:
508 |         default:
509 |                 /*
510 |                  * There are no timeouts in these states. If we trigger one,
511 |                  * something is fishy.
512 |                  */
513 |                 c_assert(0);
514 |                 return -ENOTRECOVERABLE;
515 |         }
516 | 
517 |         return 0;
518 | }
519 | 
520 | int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) {
521 |         NAcdEventNode *node;
522 |         uint64_t now;
523 |         int r;
524 | 
525 |         timer_now(&probe->acd->timer, &now);
526 | 
527 |         switch (probe->state) {
528 |         case N_ACD_PROBE_STATE_PROBING:
529 |                 /*
530 |                  * Regardless whether this is a hard or soft conflict, we must
531 |                  * treat this as a probe failure. That is, notify the caller of
532 |                  * the conflict and wait for further instructions. We do not
533 |                  * react to this, until the caller tells us what to do, but we
534 |                  * do stop sending further probes.
535 |                  */
536 |                 r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED);
537 |                 if (r)
538 |                         return r;
539 | 
540 |                 node->event.used.sender = node->sender;
541 |                 node->event.used.n_sender = ETH_ALEN;
542 |                 memcpy(node->sender, packet->arp_sha, ETH_ALEN);
543 | 
544 |                 n_acd_probe_unschedule(probe);
545 |                 n_acd_probe_unlink(probe);
546 |                 probe->state = N_ACD_PROBE_STATE_FAILED;
547 | 
548 |                 break;
549 | 
550 |         case N_ACD_PROBE_STATE_CONFIGURING:
551 |                 /*
552 |                  * We are waiting for the caller to configure the interface and
553 |                  * start ANNOUNCING. In this state, we cannot defend the
554 |                  * address as that would indicate that it is ready to be used,
555 |                  * and we cannot signal CONFLICT or USED as the caller may
556 |                  * already have started to use the address (and may have
557 |                  * configured the engine to always defend it, which means they
558 |                  * should be able to rely on never losing it after READY).
559 |                  * Simply drop the event, and rely on the anticipated ANNOUNCE
560 |                  * to trigger it again.
561 |                  */
562 | 
563 |                 break;
564 | 
565 |         case N_ACD_PROBE_STATE_ANNOUNCING: {
566 |                 /*
567 |                  * We were already instructed to announce the address, which
568 |                  * means the address is configured and in use. Hence, the
569 |                  * caller is responsible to serve regular ARP queries. Meaning,
570 |                  * we can ignore any soft conflicts (other peers doing ACD).
571 |                  *
572 |                  * But if we see a hard-conflict, we either defend the address
573 |                  * according to the caller's instructions, or we report the
574 |                  * conflict and bail out.
575 |                  */
576 |                 bool conflict = false, rate_limited = false;
577 | 
578 |                 if (!hard_conflict)
579 |                         break;
580 | 
581 |                 rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC;
582 | 
583 |                 switch (probe->defend) {
584 |                 case N_ACD_DEFEND_NEVER:
585 |                         conflict = true;
586 |                         break;
587 |                 case N_ACD_DEFEND_ONCE:
588 |                         if (rate_limited) {
589 |                                 conflict = true;
590 |                                 break;
591 |                         }
592 | 
593 |                         /* fallthrough */
594 |                 case N_ACD_DEFEND_ALWAYS:
595 |                         if (!rate_limited) {
596 |                                 r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
597 |                                 if (r) {
598 |                                         if (r != N_ACD_E_DROPPED)
599 |                                                 return r;
600 | 
601 |                                         if (probe->defend == N_ACD_DEFEND_ONCE) {
602 |                                                 conflict = true;
603 |                                                 break;
604 |                                         }
605 |                                 }
606 | 
607 |                                 if (r != N_ACD_E_DROPPED)
608 |                                         probe->last_defend = now;
609 |                         }
610 | 
611 |                         r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED);
612 |                         if (r)
613 |                                 return r;
614 | 
615 |                         node->event.defended.sender = node->sender;
616 |                         node->event.defended.n_sender = ETH_ALEN;
617 |                         memcpy(node->sender, packet->arp_sha, ETH_ALEN);
618 | 
619 |                         break;
620 |                 }
621 | 
622 |                 if (conflict) {
623 |                         r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT);
624 |                         if (r)
625 |                                 return r;
626 | 
627 |                         node->event.conflict.sender = node->sender;
628 |                         node->event.conflict.n_sender = ETH_ALEN;
629 |                         memcpy(node->sender, packet->arp_sha, ETH_ALEN);
630 | 
631 |                         n_acd_probe_unschedule(probe);
632 |                         n_acd_probe_unlink(probe);
633 |                         probe->state = N_ACD_PROBE_STATE_FAILED;
634 |                 }
635 | 
636 |                 break;
637 |         }
638 | 
639 |         case N_ACD_PROBE_STATE_FAILED:
640 |         default:
641 |                 /*
642 |                  * We are not listening for packets in these states. If we receive one,
643 |                  * something is fishy.
644 |                  */
645 |                 c_assert(0);
646 |                 return -ENOTRECOVERABLE;
647 |         }
648 | 
649 |         return 0;
650 | }
651 | 
652 | /**
653 |  * n_acd_probe_set_userdata - set userdata
654 |  * @probe:                      probe to operate on
655 |  * @userdata:                   userdata pointer
656 |  *
657 |  * This can be used to set a caller-controlled user-data pointer on @probe. The
658 |  * value of the pointer is never inspected or used by `n-acd` and is fully
659 |  * under control of the caller.
660 |  *
661 |  * The default value is NULL.
662 |  */
663 | _c_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) {
664 |         probe->userdata = userdata;
665 | }
666 | 
667 | /**
668 |  * n_acd_probe_get_userdata - get userdata
669 |  * @probe:                      probe to operate on
670 |  *
671 |  * This queries the userdata pointer that was previously set through
672 |  * n_acd_probe_set_userdata().
673 |  *
674 |  * The default value is NULL.
675 |  *
676 |  * Return: The stored userdata pointer is returned.
677 |  */
678 | _c_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) {
679 |         *userdatap = probe->userdata;
680 | }
681 | 
682 | /**
683 |  * n_acd_probe_announce() - announce the configured IP address
684 |  * @probe:                      probe to operate on
685 |  * @defend:                     defense policy
686 |  *
687 |  * Announce the IP address on the local link, and start defending it according
688 |  * to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
689 |  * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
690 |  *
691 |  * This must be called in response to an N_ACD_EVENT_READY event, and only
692 |  * after the given address has been configured on the given network interface.
693 |  *
694 |  * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defense policy
695 |  *         is invalid, negative error code on failure.
696 |  */
697 | _c_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) {
698 |         if (defend >= _N_ACD_DEFEND_N)
699 |                 return N_ACD_E_INVALID_ARGUMENT;
700 | 
701 |         probe->state = N_ACD_PROBE_STATE_ANNOUNCING;
702 |         probe->defend = defend;
703 |         probe->n_iteration = 0;
704 | 
705 |         /*
706 |          * We must schedule a fake-timeout, since we are not allowed to
707 |          * advance the state-machine outside of n_acd_dispatch().
708 |          */
709 |         n_acd_probe_schedule(probe, 0, 0);
710 | 
711 |         return 0;
712 | }
713 | 


--------------------------------------------------------------------------------
/src/n-acd.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * IPv4 Address Conflict Detection
   3 |  *
   4 |  * This file contains the main context initialization and management functions,
   5 |  * as well as a bunch of utilities used through the n-acd modules.
   6 |  */
   7 | 
   8 | /**
   9 |  * DOC: IPv4 Address Conflict Detection
  10 |  *
  11 |  * The `n-acd` project implements the IPv4 Address Conflict Detection protocol
  12 |  * as defined in RFC-5227. The protocol originates in the IPv4 Link Local
  13 |  * Address selection but was later on generalized and resulted in `ACD`. The
  14 |  * idea is to use `ARP` to query a link for an address to see whether it
  15 |  * already exists on the network, as well as defending an address that is in
  16 |  * use on a network interface. Furthermore, `ACD` provides passive diagnostics
  17 |  * for administrators, as it will detect address conflicts automatically, which
  18 |  * then can be logged or shown to a user.
  19 |  *
  20 |  * The main context object of `n-acd` is the `NAcd` structure. It is a passive
  21 |  * ref-counted context object which drives `ACD` probes running on it. A
  22 |  * context is specific to a linux network device and transport. If multiple
  23 |  * network devices are used, then separate `NAcd` contexts must be deployed.
  24 |  *
  25 |  * The `NAcdProbe` object drives a single `ACD` state-machine. A probe is
  26 |  * created on an `NAcd` context by providing an address to probe for. The probe
  27 |  * will then raise notifications whether the address conflict detection found
  28 |  * something, or whether the address is ready to be used. Optionally, the probe
  29 |  * will then enter into passive mode and defend the address as long as it is
  30 |  * kept active.
  31 |  *
  32 |  * Note that the `n-acd` project only implements the networking protocol. It
  33 |  * never queries or modifies network interfaces. It completely relies on the
  34 |  * API user to react to notifications and update network interfaces
  35 |  * respectively. `n-acd` uses an event-mechanism on every context object. All
  36 |  * events raise by any probe or operation on a given context will queue all
  37 |  * events on that context object. The event-queue can then be drained by the
  38 |  * API user. All events are properly asynchronous and designed in a way that no
  39 |  * synchronous reaction to any event is required. That is, the events are
  40 |  * carefully designed to allow forwarding via IPC (or even networks) to a
  41 |  * controller that handles them and specifies how to react. Furthermore, none
  42 |  * of the function calls of `n-acd` require synchronous error handling.
  43 |  * Instead, functions only ever return values on fatal errors. Everything else
  44 |  * is queued as events, thus guaranteeing that synchronous handling of return
  45 |  * values is not required. Exceptions are functions that do not affect internal
  46 |  * state or do not have an associated context object.
  47 |  */
  48 | 
  49 | #include <assert.h>
  50 | #include <c-list.h>
  51 | #include <c-rbtree.h>
  52 | #include <c-siphash.h>
  53 | #include <c-stdaux.h>
  54 | #include <endian.h>
  55 | #include <errno.h>
  56 | #include <inttypes.h>
  57 | #include <limits.h>
  58 | #include <linux/if_packet.h>
  59 | #include <netinet/if_ether.h>
  60 | #include <netinet/in.h>
  61 | #include <stdlib.h>
  62 | #include <string.h>
  63 | #include <sys/auxv.h>
  64 | #include <sys/epoll.h>
  65 | #include <sys/socket.h>
  66 | #include <sys/types.h>
  67 | #include <unistd.h>
  68 | #include "n-acd.h"
  69 | #include "n-acd-private.h"
  70 | 
  71 | enum {
  72 |         N_ACD_EPOLL_TIMER,
  73 |         N_ACD_EPOLL_SOCKET,
  74 | };
  75 | 
  76 | static int n_acd_get_random(unsigned int *random) {
  77 |         uint8_t hash_seed[] = {
  78 |                 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a,
  79 |                 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1,
  80 |         };
  81 |         CSipHash hash = C_SIPHASH_NULL;
  82 |         struct timespec ts;
  83 |         const uint8_t *p;
  84 |         int r;
  85 | 
  86 |         /*
  87 |          * We need random jitter for all timeouts when handling ARP probes. Use
  88 |          * AT_RANDOM to get a seed for rand_r(3p), if available (should always
  89 |          * be available on linux). See the time-out scheduler for details.
  90 |          * Additionally, we include the current time in the seed. This avoids
  91 |          * using the same jitter in case you run multiple ACD engines in the
  92 |          * same process. Lastly, the seed is hashed with SipHash24 to avoid
  93 |          * exposing the value of AT_RANDOM on the network.
  94 |          */
  95 |         c_siphash_init(&hash, hash_seed);
  96 | 
  97 |         p = (const uint8_t *)getauxval(AT_RANDOM);
  98 |         if (p)
  99 |                 c_siphash_append(&hash, p, 16);
 100 | 
 101 |         r = clock_gettime(CLOCK_MONOTONIC, &ts);
 102 |         if (r < 0)
 103 |                 return -c_errno();
 104 | 
 105 |         c_siphash_append(&hash, (const uint8_t *)&ts.tv_sec, sizeof(ts.tv_sec));
 106 |         c_siphash_append(&hash, (const uint8_t *)&ts.tv_nsec, sizeof(ts.tv_nsec));
 107 | 
 108 |         *random = c_siphash_finalize(&hash);
 109 |         return 0;
 110 | }
 111 | 
 112 | static int n_acd_socket_new(int *fdp, int fd_bpf_prog, NAcdConfig *config) {
 113 |         const struct sockaddr_ll address = {
 114 |                 .sll_family = AF_PACKET,
 115 |                 .sll_protocol = htobe16(ETH_P_ARP),
 116 |                 .sll_ifindex = config->ifindex,
 117 |                 .sll_halen = ETH_ALEN,
 118 |                 .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
 119 |         };
 120 |         int r, s = -1;
 121 | 
 122 |         s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
 123 |         if (s < 0) {
 124 |                 r = -c_errno();
 125 |                 goto error;
 126 |         }
 127 | 
 128 |         if (fd_bpf_prog >= 0) {
 129 |                 r = setsockopt(s, SOL_SOCKET, SO_ATTACH_BPF, &fd_bpf_prog, sizeof(fd_bpf_prog));
 130 |                 if (r < 0) {
 131 |                         r = -c_errno();
 132 |                         goto error;
 133 |                 }
 134 |         }
 135 | 
 136 |         r = bind(s, (struct sockaddr *)&address, sizeof(address));
 137 |         if (r < 0) {
 138 |                 r = -c_errno();
 139 |                 goto error;
 140 |         }
 141 | 
 142 |         *fdp = s;
 143 |         s = -1;
 144 |         return 0;
 145 | 
 146 | error:
 147 |         if (s >= 0)
 148 |                 close(s);
 149 |         return r;
 150 | }
 151 | 
 152 | /**
 153 |  * n_acd_config_new() - create configuration object
 154 |  * @configp:                    output argument for new configuration
 155 |  *
 156 |  * This creates a new configuration object and provides it to the caller. The
 157 |  * object is fully owned by the caller upon function return.
 158 |  *
 159 |  * A configuration object is a passive structure that is used to collect
 160 |  * information that is then passed to a constructor or other function. A
 161 |  * configuration never validates the data, but it is up to the consumer of a
 162 |  * configuration to do that.
 163 |  *
 164 |  * Return: 0 on success, negative error code on failure.
 165 |  */
 166 | _c_public_ int n_acd_config_new(NAcdConfig **configp) {
 167 |         _c_cleanup_(n_acd_config_freep) NAcdConfig *config = NULL;
 168 | 
 169 |         config = malloc(sizeof(*config));
 170 |         if (!config)
 171 |                 return -ENOMEM;
 172 | 
 173 |         *config = (NAcdConfig)N_ACD_CONFIG_NULL(*config);
 174 | 
 175 |         *configp = config;
 176 |         config = NULL;
 177 |         return 0;
 178 | }
 179 | 
 180 | /**
 181 |  * n_acd_config_free() - destroy configuration object
 182 |  * @config:                     configuration to operate on, or NULL
 183 |  *
 184 |  * This destroys the configuration object @config. If @config is NULL, this is
 185 |  * a no-op.
 186 |  *
 187 |  * Return: NULL is returned.
 188 |  */
 189 | _c_public_ NAcdConfig *n_acd_config_free(NAcdConfig *config) {
 190 |         if (!config)
 191 |                 return NULL;
 192 | 
 193 |         free(config);
 194 | 
 195 |         return NULL;
 196 | }
 197 | 
 198 | /**
 199 |  * n_acd_config_set_ifindex() - set ifindex property
 200 |  * @config:                     configuration to operate on
 201 |  * @ifindex:                    ifindex to set
 202 |  *
 203 |  * This sets the @ifindex property of the configuration object. Any previous
 204 |  * value is overwritten.
 205 |  *
 206 |  * A valid ifindex is a 32bit integer greater than 0. Any other value is
 207 |  * treated as unspecified.
 208 |  *
 209 |  * The ifindex corresponds to the interface index provided by the linux kernel.
 210 |  * It specifies the network device to be used.
 211 |  */
 212 | _c_public_ void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex) {
 213 |         config->ifindex = ifindex;
 214 | }
 215 | 
 216 | /**
 217 |  * n_acd_config_set_transport() - set transport property
 218 |  * @config:                     configuration to operate on
 219 |  * @transport:                  transport to set
 220 |  *
 221 |  * This specifies the transport to use. A transport must be one of the
 222 |  * `N_ACD_TRANSPORT_*` identifiers. It selects which transport protocol `n-acd`
 223 |  * will run on.
 224 |  */
 225 | _c_public_ void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport) {
 226 |         config->transport = transport;
 227 | }
 228 | 
 229 | /**
 230 |  * n_acd_config_set_mac() - set mac property
 231 |  * @config:                     configuration to operate on
 232 |  * @mac:                        mac to set
 233 |  *
 234 |  * This specifies the hardware address (also referred to as `MAC Address`) to
 235 |  * use. Any hardware address can be specified. It is the caller's
 236 |  * responsibility to make sure the address can actually be used.
 237 |  *
 238 |  * The address in @mac is copied into @config. It does not have to be retained
 239 |  * by the caller.
 240 |  */
 241 | _c_public_ void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac) {
 242 |         /*
 243 |          * We truncate the address at the maximum we support. We still remember
 244 |          * the original length, so any consumer of this configuration can then
 245 |          * complain about an unsupported address length. This allows us to
 246 |          * avoid a memory allocation here and having to return `int`.
 247 |          */
 248 |         config->n_mac = n_mac;
 249 |         memcpy(config->mac, mac, n_mac > ETH_ALEN ? ETH_ALEN : n_mac);
 250 | }
 251 | 
 252 | int n_acd_event_node_new(NAcdEventNode **nodep) {
 253 |         NAcdEventNode *node;
 254 | 
 255 |         node = malloc(sizeof(*node));
 256 |         if (!node)
 257 |                 return -ENOMEM;
 258 | 
 259 |         *node = (NAcdEventNode)N_ACD_EVENT_NODE_NULL(*node);
 260 | 
 261 |         *nodep = node;
 262 |         return 0;
 263 | }
 264 | 
 265 | NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) {
 266 |         if (!node)
 267 |                 return NULL;
 268 | 
 269 |         c_list_unlink(&node->probe_link);
 270 |         c_list_unlink(&node->acd_link);
 271 |         free(node);
 272 | 
 273 |         return NULL;
 274 | }
 275 | 
 276 | int n_acd_ensure_bpf_map_space(NAcd *acd) {
 277 |         NAcdProbe *probe;
 278 |         _c_cleanup_(c_closep) int fd_map = -1, fd_prog = -1;
 279 |         size_t  max_map;
 280 |         int r;
 281 | 
 282 |         if (acd->n_bpf_map < acd->max_bpf_map)
 283 |                 return 0;
 284 | 
 285 |         max_map = 2 * acd->max_bpf_map;
 286 | 
 287 |         r = n_acd_bpf_map_create(&fd_map, max_map);
 288 |         if (r)
 289 |                 return r;
 290 | 
 291 |         c_rbtree_for_each_entry(probe, &acd->ip_tree, ip_node) {
 292 |                 r = n_acd_bpf_map_add(fd_map, &probe->ip);
 293 |                 if (r)
 294 |                         return r;
 295 |         }
 296 | 
 297 |         r = n_acd_bpf_compile(&fd_prog, fd_map, (struct ether_addr*) acd->mac);
 298 |         if (r)
 299 |                 return r;
 300 | 
 301 |         if (fd_prog >= 0) {
 302 |                 r = setsockopt(acd->fd_socket, SOL_SOCKET, SO_ATTACH_BPF, &fd_prog, sizeof(fd_prog));
 303 |                 if (r)
 304 |                         return -c_errno();
 305 |         }
 306 | 
 307 |         if (acd->fd_bpf_map >= 0)
 308 |                 close(acd->fd_bpf_map);
 309 |         acd->fd_bpf_map = fd_map;
 310 |         fd_map = -1;
 311 |         acd->max_bpf_map = max_map;
 312 |         return 0;
 313 | }
 314 | 
 315 | /**
 316 |  * n_acd_new() - create a new ACD context
 317 |  * @acdp:                       output argument for new context object
 318 |  * @config:                     configuration parameters
 319 |  *
 320 |  * Create a new ACD context and return it in @acdp. The configuration @config
 321 |  * must be initialized by the caller and must specify a valid network
 322 |  * interface, transport mechanism, as well as hardware address compatible with
 323 |  * the selected transport. The configuration is copied into the context. The
 324 |  * @config object thus does not have to be retained by the caller.
 325 |  *
 326 |  * Return: 0 on success, negative error code on failure.
 327 |  */
 328 | _c_public_ int n_acd_new(NAcd **acdp, NAcdConfig *config) {
 329 |         _c_cleanup_(n_acd_unrefp) NAcd *acd = NULL;
 330 |         _c_cleanup_(c_closep) int fd_bpf_prog = -1;
 331 |         struct epoll_event eevent;
 332 |         int r;
 333 | 
 334 |         if (config->ifindex <= 0 ||
 335 |             config->transport != N_ACD_TRANSPORT_ETHERNET ||
 336 |             config->n_mac != ETH_ALEN ||
 337 |             !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN))
 338 |                 return N_ACD_E_INVALID_ARGUMENT;
 339 | 
 340 |         acd = malloc(sizeof(*acd));
 341 |         if (!acd)
 342 |                 return -ENOMEM;
 343 | 
 344 |         *acd = (NAcd)N_ACD_NULL(*acd);
 345 |         acd->ifindex = config->ifindex;
 346 |         memcpy(acd->mac, config->mac, ETH_ALEN);
 347 | 
 348 |         r = n_acd_get_random(&acd->seed);
 349 |         if (r)
 350 |                 return r;
 351 | 
 352 |         acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC);
 353 |         if (acd->fd_epoll < 0)
 354 |                 return -c_errno();
 355 | 
 356 |         r = timer_init(&acd->timer);
 357 |         if (r < 0)
 358 |                 return r;
 359 | 
 360 |         acd->max_bpf_map = 8;
 361 | 
 362 |         r = n_acd_bpf_map_create(&acd->fd_bpf_map, acd->max_bpf_map);
 363 |         if (r)
 364 |                 return r;
 365 | 
 366 |         r = n_acd_bpf_compile(&fd_bpf_prog, acd->fd_bpf_map, (struct ether_addr*) acd->mac);
 367 |         if (r)
 368 |                 return r;
 369 | 
 370 |         r = n_acd_socket_new(&acd->fd_socket, fd_bpf_prog, config);
 371 |         if (r)
 372 |                 return r;
 373 | 
 374 |         eevent = (struct epoll_event){
 375 |                 .events = EPOLLIN,
 376 |                 .data.u32 = N_ACD_EPOLL_TIMER,
 377 |         };
 378 |         r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->timer.fd, &eevent);
 379 |         if (r < 0)
 380 |                 return -c_errno();
 381 | 
 382 |         eevent = (struct epoll_event){
 383 |                 .events = EPOLLIN,
 384 |                 .data.u32 = N_ACD_EPOLL_SOCKET,
 385 |         };
 386 |         r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_socket, &eevent);
 387 |         if (r < 0)
 388 |                 return -c_errno();
 389 | 
 390 |         *acdp = acd;
 391 |         acd = NULL;
 392 |         return 0;
 393 | }
 394 | 
 395 | static void n_acd_free_internal(NAcd *acd) {
 396 |         NAcdEventNode *node, *t_node;
 397 | 
 398 |         if (!acd)
 399 |                 return;
 400 | 
 401 |         c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link)
 402 |                 n_acd_event_node_free(node);
 403 | 
 404 |         c_assert(c_rbtree_is_empty(&acd->ip_tree));
 405 | 
 406 |         if (acd->fd_socket >= 0) {
 407 |                 c_assert(acd->fd_epoll >= 0);
 408 |                 epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_socket, NULL);
 409 |                 close(acd->fd_socket);
 410 |                 acd->fd_socket = -1;
 411 |         }
 412 | 
 413 |         if (acd->fd_bpf_map >= 0) {
 414 |                 close(acd->fd_bpf_map);
 415 |                 acd->fd_bpf_map = -1;
 416 |         }
 417 | 
 418 |         if (acd->timer.fd >= 0) {
 419 |                 c_assert(acd->fd_epoll >= 0);
 420 |                 epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->timer.fd, NULL);
 421 |                 timer_deinit(&acd->timer);
 422 |         }
 423 | 
 424 |         if (acd->fd_epoll >= 0) {
 425 |                 close(acd->fd_epoll);
 426 |                 acd->fd_epoll = -1;
 427 |         }
 428 | 
 429 |         free(acd);
 430 | }
 431 | 
 432 | /**
 433 |  * n_acd_ref() - acquire reference
 434 |  * @acd:                        context to operate on, or NULL
 435 |  *
 436 |  * This acquires a single reference to the context specified as @acd. If @acd
 437 |  * is NULL, this is a no-op.
 438 |  *
 439 |  * Return: @acd is returned.
 440 |  */
 441 | _c_public_ NAcd *n_acd_ref(NAcd *acd) {
 442 |         if (acd)
 443 |                 ++acd->n_refs;
 444 |         return acd;
 445 | }
 446 | 
 447 | /**
 448 |  * n_acd_unref() - release reference
 449 |  * @acd:                        context to operate on, or NULL
 450 |  *
 451 |  * This releases a single reference to the context @acd. If this is the last
 452 |  * reference, the context is torn down and deallocated.
 453 |  *
 454 |  * Return: NULL is returned.
 455 |  */
 456 | _c_public_ NAcd *n_acd_unref(NAcd *acd) {
 457 |         if (acd && !--acd->n_refs)
 458 |                 n_acd_free_internal(acd);
 459 |         return NULL;
 460 | }
 461 | 
 462 | int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event) {
 463 |         NAcdEventNode *node;
 464 |         int r;
 465 | 
 466 |         r = n_acd_event_node_new(&node);
 467 |         if (r)
 468 |                 return r;
 469 | 
 470 |         node->event.event = event;
 471 |         c_list_link_tail(&acd->event_list, &node->acd_link);
 472 | 
 473 |         if (nodep)
 474 |                 *nodep = node;
 475 |         return 0;
 476 | }
 477 | 
 478 | int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa) {
 479 |         struct sockaddr_ll address = {
 480 |                 .sll_family = AF_PACKET,
 481 |                 .sll_protocol = htobe16(ETH_P_ARP),
 482 |                 .sll_ifindex = acd->ifindex,
 483 |                 .sll_halen = ETH_ALEN,
 484 |                 .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
 485 |         };
 486 |         struct ether_arp arp = {
 487 |                 .ea_hdr = {
 488 |                         .ar_hrd = htobe16(ARPHRD_ETHER),
 489 |                         .ar_pro = htobe16(ETHERTYPE_IP),
 490 |                         .ar_hln = sizeof(acd->mac),
 491 |                         .ar_pln = sizeof(uint32_t),
 492 |                         .ar_op = htobe16(ARPOP_REQUEST),
 493 |                 },
 494 |         };
 495 |         ssize_t l;
 496 |         int r;
 497 | 
 498 |         memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac));
 499 |         memcpy(arp.arp_tpa, &tpa->s_addr, sizeof(uint32_t));
 500 | 
 501 |         if (spa)
 502 |                 memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr));
 503 | 
 504 |         l = sendto(acd->fd_socket,
 505 |                    &arp,
 506 |                    sizeof(arp),
 507 |                    MSG_NOSIGNAL,
 508 |                    (struct sockaddr *)&address,
 509 |                    sizeof(address));
 510 |         if (l < 0) {
 511 |                 if (errno == EAGAIN || errno == ENOBUFS) {
 512 |                         /*
 513 |                          * We never maintain outgoing queues. We rely on the
 514 |                          * network device to do that for us. In case the queues
 515 |                          * are full, or the kernel refuses to queue the packet
 516 |                          * for other reasons, we must tell our caller that the
 517 |                          * packet was dropped.
 518 |                          */
 519 |                         return N_ACD_E_DROPPED;
 520 |                 } else if (errno == ENETDOWN || errno == ENXIO) {
 521 |                         /*
 522 |                          * These errors happen if the network device went down
 523 |                          * or was actually removed. We always propagate this as
 524 |                          * event, so the user can react accordingly (similarly
 525 |                          * to the recvmmsg(2) handler). In case the user does
 526 |                          * not immediately react, we also tell our caller that
 527 |                          * the packet was dropped, so we don't erroneously
 528 |                          * treat this as success.
 529 |                          */
 530 | 
 531 |                         r = n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
 532 |                         if (r)
 533 |                                 return r;
 534 | 
 535 |                         return N_ACD_E_DROPPED;
 536 |                 }
 537 | 
 538 |                 /*
 539 |                  * Random network error. We treat this as fatal and propagate
 540 |                  * the error, so it is noticed and can be investigated.
 541 |                  */
 542 |                 return -c_errno();
 543 |         } else if (l != (ssize_t)sizeof(arp)) {
 544 |                 /*
 545 |                  * Ugh, the kernel modified the packet. This is unexpected. We
 546 |                  * consider the packet lost.
 547 |                  */
 548 |                 return N_ACD_E_DROPPED;
 549 |         }
 550 | 
 551 |         return 0;
 552 | }
 553 | 
 554 | /**
 555 |  * n_acd_get_fd() - get pollable file descriptor
 556 |  * @acd:                        context object to operate on
 557 |  * @fdp:                        output argument for file descriptor
 558 |  *
 559 |  * This returns the backing file-descriptor of the context object @acd. The
 560 |  * file-descriptor is owned by @acd and valid as long as @acd is. The
 561 |  * file-descriptor never changes, so it can be cached by the caller as long as
 562 |  * they hold a reference to @acd.
 563 |  *
 564 |  * The file-descriptor is internal to the @acd context and should not be
 565 |  * modified by the caller. It is only exposed to allow the caller to poll on
 566 |  * it. Whenever the file-descriptor polls readable, n_acd_dispatch() should be
 567 |  * called.
 568 |  *
 569 |  * Currently, the file-descriptor is an epoll-fd.
 570 |  */
 571 | _c_public_ void n_acd_get_fd(NAcd *acd, int *fdp) {
 572 |         *fdp = acd->fd_epoll;
 573 | }
 574 | 
 575 | static int n_acd_handle_timeout(NAcd *acd) {
 576 |         NAcdProbe *probe;
 577 |         uint64_t now;
 578 |         int r;
 579 | 
 580 |         /*
 581 |          * Read the current time once, and handle all timeouts that triggered
 582 |          * before the current time. Rereading the current time in each loop
 583 |          * might risk creating a live-lock, and the fact that we read the
 584 |          * time after reading the timer guarantees that the timeout which
 585 |          * woke us up is handled.
 586 |          *
 587 |          * When there are no more timeouts to handle at the given time, we
 588 |          * rearm the timer to potentially wake us up again in the future.
 589 |          */
 590 |         timer_now(&acd->timer, &now);
 591 | 
 592 |         for (;;) {
 593 |                 Timeout *timeout;
 594 | 
 595 |                 r = timer_pop_timeout(&acd->timer, now, &timeout);
 596 |                 if (r < 0) {
 597 |                         return r;
 598 |                 } else if (!timeout) {
 599 |                         /*
 600 |                          * There are no more timeouts pending before @now. Rearm
 601 |                          * the timer to fire again at the next timeout.
 602 |                          */
 603 |                         timer_rearm(&acd->timer);
 604 |                         break;
 605 |                 }
 606 | 
 607 |                 probe = (void *)timeout - offsetof(NAcdProbe, timeout);
 608 |                 r = n_acd_probe_handle_timeout(probe);
 609 |                 if (r)
 610 |                         return r;
 611 |         }
 612 | 
 613 |         return 0;
 614 | }
 615 | 
 616 | static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) {
 617 |         bool hard_conflict;
 618 |         NAcdProbe *probe;
 619 |         uint32_t addr;
 620 |         CRBNode *node;
 621 |         int r;
 622 | 
 623 |         /*
 624 |          * We are interested in 2 kinds of ARP messages:
 625 |          *
 626 |          *  1) Someone who is *NOT* us sends *ANY* ARP message with our IP
 627 |          *     address as sender. This is never good, because it implies an
 628 |          *     address conflict.
 629 |          *     We call this a hard-conflict.
 630 |          *
 631 |          *  2) Someone who is *NOT* us sends an ARP REQUEST without any sender
 632 |          *     IP, but our IP as target. This implies someone else performs an
 633 |          *     ARP Probe with our address. This also implies a conflict, but
 634 |          *     one that can be resolved by responding to the probe.
 635 |          *     We call this a soft-conflict.
 636 |          *
 637 |          * We are never interested in any other ARP message. The kernel already
 638 |          * deals with everything else, hence, we can silently ignore those.
 639 |          *
 640 |          * Now, we simply check whether a sender-address is set. This allows us
 641 |          * to distinguish both cases. We then check further conditions, so we
 642 |          * can bail out early if neither is the case.
 643 |          *
 644 |          * Lastly, we perform a lookup in our probe-set to check whether the
 645 |          * address actually matches, so we can let these probes dispatch the
 646 |          * message. Note that we allow duplicate probes, so we need to dispatch
 647 |          * each matching probe, not just one.
 648 |          */
 649 | 
 650 |         if (memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa))) {
 651 |                 memcpy(&addr, packet->arp_spa, sizeof(addr));
 652 |                 hard_conflict = true;
 653 |         } else if (packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) {
 654 |                 memcpy(&addr, packet->arp_tpa, sizeof(addr));
 655 |                 hard_conflict = false;
 656 |         } else {
 657 |                 /*
 658 |                  * The BPF filter will not let through any other packet.
 659 |                  */
 660 |                 return -EIO;
 661 |         }
 662 | 
 663 |         /* Find top-most node that matches @addr. */
 664 |         node = acd->ip_tree.root;
 665 |         while (node) {
 666 |                 probe = c_rbnode_entry(node, NAcdProbe, ip_node);
 667 |                 if (addr < probe->ip.s_addr)
 668 |                         node = node->left;
 669 |                 else if (addr > probe->ip.s_addr)
 670 |                         node = node->right;
 671 |                 else
 672 |                         break;
 673 |         }
 674 | 
 675 |         /*
 676 |          * If the address is unknown, we drop the package. This might happen if
 677 |          * the kernel queued the packet and passed the BPF filter, but we
 678 |          * modified the set before dequeuing the message.
 679 |          */
 680 |         if (!node)
 681 |                 return 0;
 682 | 
 683 |         /* Forward to left-most child that still matches @addr. */
 684 |         while (node->left && addr == c_rbnode_entry(node->left,
 685 |                                                     NAcdProbe,
 686 |                                                     ip_node)->ip.s_addr)
 687 |                 node = node->left;
 688 | 
 689 |         /* Iterate all matching entries in-order. */
 690 |         do {
 691 |                 probe = c_rbnode_entry(node, NAcdProbe, ip_node);
 692 | 
 693 |                 r = n_acd_probe_handle_packet(probe, packet, hard_conflict);
 694 |                 if (r)
 695 |                         return r;
 696 | 
 697 |                 node = c_rbnode_next(node);
 698 |         } while (node && addr == c_rbnode_entry(node,
 699 |                                                 NAcdProbe,
 700 |                                                 ip_node)->ip.s_addr);
 701 | 
 702 |         return 0;
 703 | }
 704 | 
 705 | static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) {
 706 |         int r;
 707 | 
 708 |         if (event->events & (EPOLLHUP | EPOLLERR)) {
 709 |                 /*
 710 |                  * There is no way to handle either gracefully. If we ignored
 711 |                  * them, we would busy-loop, so lets rather forward the error
 712 |                  * to the caller.
 713 |                  */
 714 |                 return -EIO;
 715 |         }
 716 | 
 717 |         if (event->events & EPOLLIN) {
 718 |                 r = timer_read(&acd->timer);
 719 |                 if (r <= 0)
 720 |                         return r;
 721 | 
 722 |                 c_assert(r == TIMER_E_TRIGGERED);
 723 | 
 724 |                 /*
 725 |                  * A timer triggered, handle all pending timeouts at a given
 726 |                  * point in time. There can only be a finite number of pending
 727 |                  * timeouts, any new ones will be in the future, so not handled
 728 |                  * now, but guaranteed to wake us up again when they do trigger.
 729 |                  */
 730 |                 r = n_acd_handle_timeout(acd);
 731 |                 if (r)
 732 |                         return r;
 733 |         }
 734 | 
 735 |         return 0;
 736 | }
 737 | 
 738 | static bool n_acd_packet_is_valid(NAcd *acd, void *packet, size_t n_packet) {
 739 |         struct ether_arp *arp;
 740 | 
 741 |         /*
 742 |          * The eBPF filter will ensure that this function always returns true, however,
 743 |          * this allows the eBPF filter to be an optional optimization which is necessary
 744 |          * on older kernels.
 745 |          *
 746 |          * See comments in n-acd-bpf.c for details.
 747 |          */
 748 | 
 749 |         if (n_packet != sizeof(*arp))
 750 |                 return false;
 751 | 
 752 |         arp = packet;
 753 | 
 754 |         if (arp->arp_hrd != htobe16(ARPHRD_ETHER))
 755 |                 return false;
 756 | 
 757 |         if (arp->arp_pro != htobe16(ETHERTYPE_IP))
 758 |                 return false;
 759 | 
 760 |         if (arp->arp_hln != sizeof(struct ether_addr))
 761 |                 return false;
 762 | 
 763 |         if (arp->arp_pln != sizeof(struct in_addr))
 764 |                 return false;
 765 | 
 766 |         if (!memcmp(arp->arp_sha, acd->mac, sizeof(struct ether_addr)))
 767 |                 return false;
 768 | 
 769 |         if (memcmp(arp->arp_spa, &((struct in_addr) { INADDR_ANY }), sizeof(struct in_addr))) {
 770 |                 if (arp->arp_op != htobe16(ARPOP_REQUEST) && arp->arp_op != htobe16(ARPOP_REPLY))
 771 |                         return false;
 772 |         } else if (arp->arp_op != htobe16(ARPOP_REQUEST)) {
 773 |                 return false;
 774 |         }
 775 | 
 776 |         return true;
 777 | }
 778 | 
 779 | static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
 780 |         const size_t n_batch = 8;
 781 |         struct mmsghdr msgs[n_batch];
 782 |         struct iovec iovecs[n_batch];
 783 |         struct ether_arp data[n_batch];
 784 |         size_t i;
 785 |         int r, n;
 786 | 
 787 |         for (i = 0; i < n_batch; ++i) {
 788 |                 iovecs[i].iov_base = data + i;
 789 |                 iovecs[i].iov_len = sizeof(data[i]);
 790 |                 msgs[i].msg_hdr = (struct msghdr){
 791 |                         .msg_iov = iovecs + i,
 792 |                         .msg_iovlen = 1,
 793 |                 };
 794 |         }
 795 | 
 796 |         /*
 797 |          * We always directly call into recvmmsg(2), regardless which EPOLL*
 798 |          * event is signalled. On sockets, the recv(2)-family of syscalls does
 799 |          * a suitable job of handling all possible scenarios and telling us
 800 |          * about it. Hence, lets take the easy route and always ask the kernel
 801 |          * about the current state.
 802 |          */
 803 |         n = recvmmsg(acd->fd_socket, msgs, n_batch, 0, NULL);
 804 |         if (n < 0) {
 805 |                 if (errno == ENETDOWN) {
 806 |                         /*
 807 |                          * We get ENETDOWN if the network-device goes down or
 808 |                          * is removed. This error is temporary and only queued
 809 |                          * once. Subsequent reads will simply return EAGAIN
 810 |                          * until the device is up again and has data queued.
 811 |                          * Usually, the caller should tear down all probes when
 812 |                          * an interface goes down, but we leave it up to the
 813 |                          * caller to decide what to do. We propagate the code
 814 |                          * and continue.
 815 |                          */
 816 |                         return n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
 817 |                 } else if (errno == EAGAIN) {
 818 |                         /*
 819 |                          * There is no more data queued and we did not get
 820 |                          * preempted. Everything is good to go.
 821 |                          * As a safety-net against busy-looping, we do check
 822 |                          * for HUP/ERR. Neither should be set, since they imply
 823 |                          * error-dequeue behavior on all socket calls. Lets
 824 |                          * fail hard if we trigger it, so we can investigate.
 825 |                          */
 826 |                         if (event->events & (EPOLLHUP | EPOLLERR))
 827 |                                 return -EIO;
 828 | 
 829 |                         return 0;
 830 |                 } else {
 831 |                         /*
 832 |                          * Something went wrong. Propagate the error-code, so
 833 |                          * this can be investigated.
 834 |                          */
 835 |                         return -c_errno();
 836 |                 }
 837 |         } else if (n >= (ssize_t)n_batch) {
 838 |                 /*
 839 |                  * If all buffers were filled with data, we cannot be sure that
 840 |                  * there is nothing left to read. But to avoid starvation, we
 841 |                  * cannot loop on this condition. Instead, we mark the context
 842 |                  * as preempted so the caller can call us again.
 843 |                  * Note that in level-triggered event-loops this condition can
 844 |                  * be neglected, but in edge-triggered event-loops it is
 845 |                  * crucial to forward this information.
 846 |                  *
 847 |                  * On the other hand, there are several conditions where the
 848 |                  * kernel might return less batches than requested, but was
 849 |                  * still preempted. However, all of those cases require the
 850 |                  * preemption to have triggered a wakeup *after* we entered
 851 |                  * recvmmsg(). Hence, even if we did not recognize the
 852 |                  * preemption, an edge must have triggered and as such we will
 853 |                  * handle the event on the next turn.
 854 |                  */
 855 |                 acd->preempted = true;
 856 |         }
 857 | 
 858 |         for (i = 0; (ssize_t)i < n; ++i) {
 859 |                 if (!n_acd_packet_is_valid(acd, data + i, msgs[i].msg_len))
 860 |                         continue;
 861 |                 /*
 862 |                  * Handle the packet. Bail out if something went wrong. Note
 863 |                  * that this must be fatal errors, since we discard all other
 864 |                  * packets that follow.
 865 |                  */
 866 |                 r = n_acd_handle_packet(acd, data + i);
 867 |                 if (r)
 868 |                         return r;
 869 |         }
 870 | 
 871 |         return 0;
 872 | }
 873 | 
 874 | /**
 875 |  * n_acd_dispatch() - dispatch context
 876 |  * @acd:                        context object to operate on
 877 |  *
 878 |  * This dispatches the internal state-machine of all probes and operations
 879 |  * running on the context @acd.
 880 |  *
 881 |  * Any outside effect or event triggered by this dispatcher will be queued on
 882 |  * the event-queue of @acd. Whenever the dispatcher returns, the caller is
 883 |  * required to drain the event-queue via n_acd_pop_event() until it is empty.
 884 |  *
 885 |  * This function dispatches as many events as possible up to a static limit to
 886 |  * prevent stalling execution. If the static limit is reached, this function
 887 |  * will return with N_ACD_E_PREEMPTED, otherwise 0 is returned. In most cases
 888 |  * preemption can be ignored, because level-triggered event notification
 889 |  * handles it automatically. However, in case of edge-triggered event
 890 |  * mechanisms, the caller must make sure to call the dispatcher again.
 891 |  *
 892 |  * Return: 0 on success, N_ACD_E_PREEMPTED on preemption, negative error code
 893 |  *         on failure.
 894 |  */
 895 | _c_public_ int n_acd_dispatch(NAcd *acd) {
 896 |         struct epoll_event events[2];
 897 |         int n, i, r = 0;
 898 | 
 899 |         n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0);
 900 |         if (n < 0) {
 901 |                 /* Linux never returns EINTR if `timeout == 0'. */
 902 |                 return -c_errno();
 903 |         }
 904 | 
 905 |         acd->preempted = false;
 906 | 
 907 |         for (i = 0; i < n; ++i) {
 908 |                 switch (events[i].data.u32) {
 909 |                 case N_ACD_EPOLL_TIMER:
 910 |                         r = n_acd_dispatch_timer(acd, events + i);
 911 |                         break;
 912 |                 case N_ACD_EPOLL_SOCKET:
 913 |                         r = n_acd_dispatch_socket(acd, events + i);
 914 |                         break;
 915 |                 default:
 916 |                         c_assert(0);
 917 |                         r = 0;
 918 |                         break;
 919 |                 }
 920 | 
 921 |                 if (r)
 922 |                         return r;
 923 |         }
 924 | 
 925 |         return acd->preempted ? N_ACD_E_PREEMPTED : 0;
 926 | }
 927 | 
 928 | /**
 929 |  * n_acd_pop_event() - get the next pending event
 930 |  * @acd:                        context object to operate on
 931 |  * @eventp:                     output argument for the event
 932 |  *
 933 |  * Returns a pointer to the next pending event. The event is still owend by
 934 |  * the context, and is only valid until the next call to n_acd_pop_event()
 935 |  * or until the owning object is freed (either the ACD context or the indicated
 936 |  * probe object).
 937 |  *
 938 |  * An event either originates on the ACD context, or one of the configured
 939 |  * probes. If the event-type has a 'probe' pointer, it originated on the
 940 |  * indicated probe (which is *never* NULL), otherwise it originated on the
 941 |  * context.
 942 |  *
 943 |  * Users must call this function repeatedly until either an error is returned,
 944 |  * or the event-pointer is NULL. Wakeups on the epoll-fd are only guaranteed
 945 |  * for each batch of events. Hence, it is the callers responsibility to drain
 946 |  * the event-queue somehow after each call to n_acd_dispatch(). Note that
 947 |  * events can only be added by n_acd_dispatch(), hence, you cannot live-lock
 948 |  * when draining the event queue.
 949 |  *
 950 |  * The possible events are:
 951 |  *  * N_ACD_EVENT_READY:    A configured IP address was probed successfully
 952 |  *                          and is ready to be used. Once configured on the
 953 |  *                          interface, the caller must call n_acd_announce()
 954 |  *                          to announce and start defending the address.
 955 |  *  * N_ACD_EVENT_USED:     Someone is already using the IP address being
 956 |  *                          probed. The probe is put into stopped state and
 957 |  *                          should be freed by the caller.
 958 |  *  * N_ACD_EVENT_DEFENDED: A conflict was detected for an announced IP
 959 |  *                          address, and the engine attempted to defend it.
 960 |  *                          This is purely informational, and no action is
 961 |  *                          required by the caller.
 962 |  *  * N_ACD_EVENT_CONFLICT: A conflict was detected for an announced IP
 963 |  *                          address, and the probe was not able to defend
 964 |  *                          it (according to the configured policy). The
 965 |  *                          probe halted, the caller must stop using
 966 |  *                          the address immediately, and should free the probe.
 967 |  *  * N_ACD_EVENT_DOWN:     The specified network interface was put down. The
 968 |  *                          user is recommended to free *ALL* probes and
 969 |  *                          recreate them as soon as the interface is up again.
 970 |  *                          Note that this event is purely informational. The
 971 |  *                          probes will continue running, but all packets will
 972 |  *                          be blackholed, and no network packets are received,
 973 |  *                          until the network is back up again. Hence, from an
 974 |  *                          operational perspective, the legitimacy of the ACD
 975 |  *                          probes is lost and the user better re-probes all
 976 |  *                          addresses.
 977 |  *
 978 |  * Returns: 0 on success, negative error code on failure. The popped event is
 979 |  *          returned in @eventp. If no event is pending, NULL is placed in
 980 |  *          @eventp and 0 is returned. If an error is returned, @eventp is left
 981 |  *          untouched.
 982 |  */
 983 | _c_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) {
 984 |         NAcdEventNode *node, *t_node;
 985 | 
 986 |         c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) {
 987 |                 if (node->is_public) {
 988 |                         n_acd_event_node_free(node);
 989 |                         continue;
 990 |                 }
 991 | 
 992 |                 node->is_public = true;
 993 |                 *eventp = &node->event;
 994 |                 return 0;
 995 |         }
 996 | 
 997 |         *eventp = NULL;
 998 |         return 0;
 999 | }
1000 | 
1001 | /**
1002 |  * n_acd_probe() - start new probe
1003 |  * @acd:                        context object to operate on
1004 |  * @probep:                     output argument for new probe
1005 |  * @config:                     probe configuration
1006 |  *
1007 |  * This creates a new probe on the context @acd and returns the probe in
1008 |  * @probep. The configuration @config must provide valid probe parameters. At
1009 |  * least a valid IP address must be provided through the configuration.
1010 |  *
1011 |  * This function does not reject duplicate probes for the same address. It is
1012 |  * the caller's decision whether duplicates are allowed or not. But note that
1013 |  * duplicate probes on the same context will not conflict each other. That is,
1014 |  * running a probe for the same address twice on the same context will not
1015 |  * cause them to consider each other a duplicate.
1016 |  *
1017 |  * Probes are rather lightweight objects. They do not create any
1018 |  * file-descriptors or other kernel objects. Probes always re-use the
1019 |  * infrastructure provided by the context object @acd. This allows running many
1020 |  * probes simultaneously without exhausting resources.
1021 |  *
1022 |  * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT on invalid configuration
1023 |  *         parameters, negative error code on failure.
1024 |  */
1025 | _c_public_ int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config) {
1026 |         return n_acd_probe_new(probep, acd, config);
1027 | }
1028 | 


--------------------------------------------------------------------------------
/src/n-acd.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * IPv4 Address Conflict Detection
  5 |  *
  6 |  * This is the public header of the n-acd library, implementing IPv4 Address
  7 |  * Conflict Detection as described in RFC-5227. This header defines the public
  8 |  * API and all entry points of n-acd.
  9 |  */
 10 | 
 11 | #ifdef __cplusplus
 12 | extern "C" {
 13 | #endif
 14 | 
 15 | #include <netinet/in.h>
 16 | #include <inttypes.h>
 17 | #include <stdbool.h>
 18 | #include <stdlib.h>
 19 | 
 20 | typedef struct NAcd NAcd;
 21 | typedef struct NAcdConfig NAcdConfig;
 22 | typedef struct NAcdEvent NAcdEvent;
 23 | typedef struct NAcdProbe NAcdProbe;
 24 | typedef struct NAcdProbeConfig NAcdProbeConfig;
 25 | 
 26 | #define N_ACD_TIMEOUT_RFC5227 (UINT64_C(9000))
 27 | 
 28 | enum {
 29 |         _N_ACD_E_SUCCESS,
 30 | 
 31 |         N_ACD_E_PREEMPTED,
 32 |         N_ACD_E_INVALID_ARGUMENT,
 33 | 
 34 |         _N_ACD_E_N,
 35 | };
 36 | 
 37 | enum {
 38 |         N_ACD_TRANSPORT_ETHERNET,
 39 |         _N_ACD_TRANSPORT_N,
 40 | };
 41 | 
 42 | enum {
 43 |         N_ACD_EVENT_READY,
 44 |         N_ACD_EVENT_USED,
 45 |         N_ACD_EVENT_DEFENDED,
 46 |         N_ACD_EVENT_CONFLICT,
 47 |         N_ACD_EVENT_DOWN,
 48 |         _N_ACD_EVENT_N,
 49 | };
 50 | 
 51 | enum {
 52 |         N_ACD_DEFEND_NEVER,
 53 |         N_ACD_DEFEND_ONCE,
 54 |         N_ACD_DEFEND_ALWAYS,
 55 |         _N_ACD_DEFEND_N,
 56 | };
 57 | 
 58 | struct NAcdEvent {
 59 |         unsigned int event;
 60 |         union {
 61 |                 struct {
 62 |                         NAcdProbe *probe;
 63 |                 } ready;
 64 |                 struct {
 65 |                 } down;
 66 |                 struct {
 67 |                         NAcdProbe *probe;
 68 |                         uint8_t *sender;
 69 |                         size_t n_sender;
 70 |                 } used, defended, conflict;
 71 |         };
 72 | };
 73 | 
 74 | /* configs */
 75 | 
 76 | int n_acd_config_new(NAcdConfig **configp);
 77 | NAcdConfig *n_acd_config_free(NAcdConfig *config);
 78 | 
 79 | void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex);
 80 | void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport);
 81 | void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac);
 82 | 
 83 | int n_acd_probe_config_new(NAcdProbeConfig **configp);
 84 | NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config);
 85 | 
 86 | void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip);
 87 | void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs);
 88 | 
 89 | /* contexts */
 90 | 
 91 | int n_acd_new(NAcd **acdp, NAcdConfig *config);
 92 | NAcd *n_acd_ref(NAcd *acd);
 93 | NAcd *n_acd_unref(NAcd *acd);
 94 | 
 95 | void n_acd_get_fd(NAcd *acd, int *fdp);
 96 | int n_acd_dispatch(NAcd *acd);
 97 | int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp);
 98 | 
 99 | int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config);
100 | 
101 | /* probes */
102 | 
103 | NAcdProbe *n_acd_probe_free(NAcdProbe *probe);
104 | 
105 | void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata);
106 | void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap);
107 | 
108 | int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend);
109 | 
110 | /* inline helpers */
111 | 
112 | static inline void n_acd_config_freep(NAcdConfig **config) {
113 |         if (*config)
114 |                 n_acd_config_free(*config);
115 | }
116 | 
117 | static inline void n_acd_config_freev(NAcdConfig *config) {
118 |         n_acd_config_free(config);
119 | }
120 | 
121 | static inline void n_acd_probe_config_freep(NAcdProbeConfig **config) {
122 |         if (*config)
123 |                 n_acd_probe_config_free(*config);
124 | }
125 | 
126 | static inline void n_acd_probe_config_freev(NAcdProbeConfig *config) {
127 |         n_acd_probe_config_free(config);
128 | }
129 | 
130 | static inline void n_acd_unrefp(NAcd **acd) {
131 |         if (*acd)
132 |                 n_acd_unref(*acd);
133 | }
134 | 
135 | static inline void n_acd_unrefv(NAcd *acd) {
136 |         n_acd_unref(acd);
137 | }
138 | 
139 | static inline void n_acd_probe_freep(NAcdProbe **probe) {
140 |         if (*probe)
141 |                 n_acd_probe_free(*probe);
142 | }
143 | 
144 | static inline void n_acd_probe_freev(NAcdProbe *probe) {
145 |         n_acd_probe_free(probe);
146 | }
147 | 
148 | #ifdef __cplusplus
149 | }
150 | #endif
151 | 


--------------------------------------------------------------------------------
/src/test-api.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Tests for n-acd API
 3 |  * This verifies the visibility and availability of the public API.
 4 |  */
 5 | 
 6 | #undef NDEBUG
 7 | #include <assert.h>
 8 | #include <stdlib.h>
 9 | #include "n-acd.h"
10 | 
11 | static void test_api_constants(void) {
12 |         assert(1 + N_ACD_TIMEOUT_RFC5227);
13 | 
14 |         assert(1 + _N_ACD_E_SUCCESS);
15 |         assert(1 + N_ACD_E_PREEMPTED);
16 |         assert(1 + N_ACD_E_INVALID_ARGUMENT);
17 |         assert(1 + _N_ACD_E_N);
18 | 
19 |         assert(1 + N_ACD_TRANSPORT_ETHERNET);
20 |         assert(1 + _N_ACD_TRANSPORT_N);
21 | 
22 |         assert(1 + N_ACD_EVENT_READY);
23 |         assert(1 + N_ACD_EVENT_USED);
24 |         assert(1 + N_ACD_EVENT_DEFENDED);
25 |         assert(1 + N_ACD_EVENT_CONFLICT);
26 |         assert(1 + N_ACD_EVENT_DOWN);
27 |         assert(1 + _N_ACD_EVENT_N);
28 | 
29 |         assert(1 + N_ACD_DEFEND_NEVER);
30 |         assert(1 + N_ACD_DEFEND_ONCE);
31 |         assert(1 + N_ACD_DEFEND_ALWAYS);
32 |         assert(1 + _N_ACD_DEFEND_N);
33 | }
34 | 
35 | static void test_api_types(void) {
36 |         assert(sizeof(NAcdEvent*));
37 |         assert(sizeof(NAcdConfig*));
38 |         assert(sizeof(NAcdProbeConfig*));
39 |         assert(sizeof(NAcd*));
40 |         assert(sizeof(NAcdProbe*));
41 | }
42 | 
43 | static void test_api_functions(void) {
44 |         void *fns[] = {
45 |                 (void *)n_acd_config_new,
46 |                 (void *)n_acd_config_free,
47 |                 (void *)n_acd_config_set_ifindex,
48 |                 (void *)n_acd_config_set_transport,
49 |                 (void *)n_acd_config_set_mac,
50 |                 (void *)n_acd_probe_config_new,
51 |                 (void *)n_acd_probe_config_free,
52 |                 (void *)n_acd_probe_config_set_ip,
53 |                 (void *)n_acd_probe_config_set_timeout,
54 | 
55 |                 (void *)n_acd_new,
56 |                 (void *)n_acd_ref,
57 |                 (void *)n_acd_unref,
58 |                 (void *)n_acd_get_fd,
59 |                 (void *)n_acd_dispatch,
60 |                 (void *)n_acd_pop_event,
61 |                 (void *)n_acd_probe,
62 | 
63 |                 (void *)n_acd_probe_free,
64 |                 (void *)n_acd_probe_set_userdata,
65 |                 (void *)n_acd_probe_get_userdata,
66 |                 (void *)n_acd_probe_announce,
67 | 
68 |                 (void *)n_acd_config_freep,
69 |                 (void *)n_acd_config_freev,
70 |                 (void *)n_acd_probe_config_freep,
71 |                 (void *)n_acd_probe_config_freev,
72 |                 (void *)n_acd_unrefp,
73 |                 (void *)n_acd_unrefv,
74 |                 (void *)n_acd_probe_freep,
75 |                 (void *)n_acd_probe_freev,
76 |         };
77 |         size_t i;
78 | 
79 |         for (i = 0; i < sizeof(fns) / sizeof(*fns); ++i)
80 |                 assert(!!fns[i]);
81 | }
82 | 
83 | int main(int argc, char **argv) {
84 |         test_api_constants();
85 |         test_api_types();
86 |         test_api_functions();
87 |         return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/src/test-bpf.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * eBPF socket filter tests
  3 |  */
  4 | 
  5 | #undef NDEBUG
  6 | #include <assert.h>
  7 | #include <c-stdaux.h>
  8 | #include <errno.h>
  9 | #include <inttypes.h>
 10 | #include <netinet/if_ether.h>
 11 | #include <netinet/in.h>
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | #include <sys/types.h>
 16 | #include <sys/socket.h>
 17 | #include <unistd.h>
 18 | #include "n-acd.h"
 19 | #include "n-acd-private.h"
 20 | #include "test.h"
 21 | 
 22 | #define ETHER_ARP_PACKET_INIT(_op, _mac, _sip, _tip) {                  \
 23 |                 .ea_hdr = {                                             \
 24 |                         .ar_hrd = htobe16(ARPHRD_ETHER),                \
 25 |                         .ar_pro = htobe16(ETHERTYPE_IP),                \
 26 |                         .ar_hln = 6,                                    \
 27 |                         .ar_pln = 4,                                    \
 28 |                         .ar_op = htobe16(_op),                          \
 29 |                 },                                                      \
 30 |                 .arp_sha[0] = (_mac)->ether_addr_octet[0],              \
 31 |                 .arp_sha[1] = (_mac)->ether_addr_octet[1],              \
 32 |                 .arp_sha[2] = (_mac)->ether_addr_octet[2],              \
 33 |                 .arp_sha[3] = (_mac)->ether_addr_octet[3],              \
 34 |                 .arp_sha[4] = (_mac)->ether_addr_octet[4],              \
 35 |                 .arp_sha[5] = (_mac)->ether_addr_octet[5],              \
 36 |                 .arp_spa[0] = (be32toh((_sip)->s_addr) >> 24) & 0xff,   \
 37 |                 .arp_spa[1] = (be32toh((_sip)->s_addr) >> 16) & 0xff,   \
 38 |                 .arp_spa[2] = (be32toh((_sip)->s_addr) >> 8) & 0xff,    \
 39 |                 .arp_spa[3] =  be32toh((_sip)->s_addr) & 0xff,          \
 40 |                 .arp_tpa[0] = (be32toh((_tip)->s_addr) >> 24) & 0xff,   \
 41 |                 .arp_tpa[1] = (be32toh((_tip)->s_addr) >> 16) & 0xff,   \
 42 |                 .arp_tpa[2] = (be32toh((_tip)->s_addr) >> 8) & 0xff,    \
 43 |                 .arp_tpa[3] =  be32toh((_tip)->s_addr) & 0xff,          \
 44 |         }
 45 | 
 46 | static void test_map(void) {
 47 |         int r, mapfd = -1;
 48 |         struct in_addr addr = { 1 };
 49 | 
 50 |         r = n_acd_bpf_map_create(&mapfd, 8);
 51 |         c_assert(r >= 0);
 52 |         c_assert(mapfd >= 0);
 53 | 
 54 |         r = n_acd_bpf_map_remove(mapfd, &addr);
 55 |         c_assert(r == -ENOENT);
 56 | 
 57 |         r = n_acd_bpf_map_add(mapfd, &addr);
 58 |         c_assert(r >= 0);
 59 | 
 60 |         r = n_acd_bpf_map_add(mapfd, &addr);
 61 |         c_assert(r == -EEXIST);
 62 | 
 63 |         r = n_acd_bpf_map_remove(mapfd, &addr);
 64 |         c_assert(r >= 0);
 65 | 
 66 |         r = n_acd_bpf_map_remove(mapfd, &addr);
 67 |         c_assert(r == -ENOENT);
 68 | 
 69 |         close(mapfd);
 70 | }
 71 | 
 72 | static void verify_success(struct ether_arp *packet, int out_fd, int in_fd) {
 73 |         uint8_t buf[sizeof(struct ether_arp)];
 74 |         int r;
 75 | 
 76 |         r = send(out_fd, packet, sizeof(struct ether_arp), 0);
 77 |         c_assert(r == sizeof(struct ether_arp));
 78 | 
 79 |         r = recv(in_fd, buf, sizeof(buf), 0);
 80 |         c_assert(r == sizeof(struct ether_arp));
 81 | }
 82 | 
 83 | static void verify_failure(struct ether_arp *packet, int out_fd, int in_fd) {
 84 |         uint8_t buf[sizeof(struct ether_arp)];
 85 |         int r;
 86 | 
 87 |         r = send(out_fd, packet, sizeof(struct ether_arp), 0);
 88 |         c_assert(r == sizeof(struct ether_arp));
 89 | 
 90 |         r = recv(in_fd, buf, sizeof(buf), 0);
 91 |         c_assert(r < 0);
 92 |         c_assert(errno == EAGAIN);
 93 | }
 94 | 
 95 | static void test_filter(void) {
 96 |         uint8_t buf[sizeof(struct ether_arp) + 1] = {};
 97 |         struct ether_addr mac1 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 } };
 98 |         struct ether_addr mac2 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x07 } };
 99 |         struct in_addr ip0 = { 0 };
100 |         struct in_addr ip1 = { 1 };
101 |         struct in_addr ip2 = { 2 };
102 |         struct ether_arp *packet = (struct ether_arp *)buf;
103 |         int r, mapfd = -1, progfd = -1, pair[2];
104 | 
105 |         r = n_acd_bpf_map_create(&mapfd, 1);
106 |         c_assert(r >= 0);
107 | 
108 |         r = n_acd_bpf_compile(&progfd, mapfd, &mac1);
109 |         c_assert(r >= 0);
110 |         c_assert(progfd >= 0);
111 | 
112 |         r = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, pair);
113 |         c_assert(r >= 0);
114 | 
115 |         r = setsockopt(pair[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd,
116 |                        sizeof(progfd));
117 |         c_assert(r >= 0);
118 | 
119 |         r = n_acd_bpf_map_add(mapfd, &ip1);
120 |         c_assert(r >= 0);
121 | 
122 |         /* valid */
123 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
124 |         verify_success(packet, pair[0], pair[1]);
125 | 
126 |         /* valid: reply instead of request */
127 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip1, &ip2);
128 |         verify_success(packet, pair[0], pair[1]);
129 | 
130 |         /* valid: to us instead of from us */
131 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip1);
132 |         verify_success(packet, pair[0], pair[1]);
133 | 
134 |         /* invalid header type */
135 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
136 |         packet->arp_hrd += 1;
137 |         verify_failure(packet, pair[0], pair[1]);
138 | 
139 |         /* invalid protocol */
140 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
141 |         packet->arp_pro += 1;
142 |         verify_failure(packet, pair[0], pair[1]);
143 | 
144 |         /* invalid hw addr length */
145 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
146 |         packet->arp_hln += 1;
147 |         verify_failure(packet, pair[0], pair[1]);
148 | 
149 |         /* invalid protocol addr length */
150 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
151 |         packet->arp_pln += 1;
152 |         verify_failure(packet, pair[0], pair[1]);
153 | 
154 |         /* invalid operation */
155 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_NAK, &mac2, &ip1, &ip2);
156 |         packet->arp_hln += 1;
157 |         verify_failure(packet, pair[0], pair[1]);
158 | 
159 |         /* own mac */
160 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac1, &ip1, &ip2);
161 |         verify_failure(packet, pair[0], pair[1]);
162 | 
163 |         /* not to, nor from us, with source */
164 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip2, &ip2);
165 |         verify_failure(packet, pair[0], pair[1]);
166 | 
167 |         /* not to, nor from us, without source */
168 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip2);
169 |         verify_failure(packet, pair[0], pair[1]);
170 | 
171 |         /* to us instead of from us, but reply */
172 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip0, &ip1);
173 |         verify_failure(packet, pair[0], pair[1]);
174 | 
175 |         /* long */
176 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
177 |         r = send(pair[0], buf, sizeof(struct ether_arp) + 1, 0);
178 |         c_assert(r == sizeof(struct ether_arp) + 1);
179 | 
180 |         r = recv(pair[1], buf, sizeof(buf), 0);
181 |         c_assert(r == sizeof(struct ether_arp));
182 | 
183 |         /* short */
184 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
185 |         r = send(pair[0], buf, sizeof(struct ether_arp) - 1, 0);
186 |         c_assert(r == sizeof(struct ether_arp) - 1);
187 | 
188 |         r = recv(pair[1], buf, sizeof(buf), 0);
189 |         c_assert(r < 0);
190 |         c_assert(errno == EAGAIN);
191 | 
192 |         /*
193 |          * Send one packet before and one packet after modifying the map,
194 |          * verify that the modification applies at the time of send(), not recv().
195 |          */
196 |         *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
197 |         r = send(pair[0], buf, sizeof(struct ether_arp), 0);
198 |         c_assert(r == sizeof(struct ether_arp));
199 | 
200 |         r = n_acd_bpf_map_remove(mapfd, &ip1);
201 |         c_assert(r >= 0);
202 | 
203 |         r = send(pair[0], buf, sizeof(struct ether_arp), 0);
204 |         c_assert(r == sizeof(struct ether_arp));
205 | 
206 |         r = recv(pair[1], buf, sizeof(buf), 0);
207 |         c_assert(r == sizeof(struct ether_arp));
208 | 
209 |         r = recv(pair[1], buf, sizeof(buf), 0);
210 |         c_assert(r < 0);
211 |         c_assert(errno == EAGAIN);
212 | 
213 |         close(pair[0]);
214 |         close(pair[1]);
215 |         close(progfd);
216 |         close(mapfd);
217 | }
218 | 
219 | int main(int argc, char **argv) {
220 |         test_setup();
221 | 
222 |         test_map();
223 |         test_filter();
224 | 
225 |         return 0;
226 | }
227 | 


--------------------------------------------------------------------------------
/src/test-loopback.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Test on loopback device
 3 |  * This runs the ACD engine on the loopback device, effectively testing the BPF
 4 |  * filter of ACD to discard its own packets. This might happen on
 5 |  * non-spanning-tree networks, or on networks that echo packets.
 6 |  */
 7 | 
 8 | #undef NDEBUG
 9 | #include <c-stdaux.h>
10 | #include <stdlib.h>
11 | #include "test.h"
12 | 
13 | static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) {
14 |         NAcdConfig *config;
15 |         NAcd *acd;
16 |         struct pollfd pfds;
17 |         int r, fd;
18 | 
19 |         r = n_acd_config_new(&config);
20 |         c_assert(!r);
21 | 
22 |         n_acd_config_set_ifindex(config, ifindex);
23 |         n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
24 |         n_acd_config_set_mac(config, mac, n_mac);
25 | 
26 |         r = n_acd_new(&acd, config);
27 |         c_assert(!r);
28 | 
29 |         n_acd_config_free(config);
30 | 
31 |         {
32 |                 NAcdProbeConfig *probe_config;
33 |                 NAcdProbe *probe;
34 |                 struct in_addr ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) };
35 | 
36 |                 r = n_acd_probe_config_new(&probe_config);
37 |                 c_assert(!r);
38 | 
39 |                 n_acd_probe_config_set_ip(probe_config, ip);
40 |                 n_acd_probe_config_set_timeout(probe_config, 100);
41 | 
42 |                 r = n_acd_probe(acd, &probe, probe_config);
43 |                 c_assert(!r);
44 | 
45 |                 n_acd_probe_config_free(probe_config);
46 | 
47 |                 n_acd_get_fd(acd, &fd);
48 | 
49 |                 for (;;) {
50 |                         NAcdEvent *event;
51 |                         pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
52 |                         r = poll(&pfds, 1, -1);
53 |                         c_assert(r >= 0);
54 | 
55 |                         r = n_acd_dispatch(acd);
56 |                         c_assert(!r);
57 | 
58 |                         r = n_acd_pop_event(acd, &event);
59 |                         c_assert(!r);
60 |                         if (event) {
61 |                                 c_assert(event->event == N_ACD_EVENT_READY);
62 |                                 break;
63 |                         }
64 |                 }
65 | 
66 |                 n_acd_probe_free(probe);
67 |         }
68 | 
69 |         n_acd_unref(acd);
70 | }
71 | 
72 | int main(int argc, char **argv) {
73 |         struct ether_addr mac;
74 |         int ifindex;
75 | 
76 |         test_setup();
77 | 
78 |         test_loopback_up(&ifindex, &mac);
79 |         test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
80 | 
81 |         return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/src/test-twice.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Test with unused address twice in parallel
 3 |  * This runs the ACD engine with an unused address on a veth pair, but it runs
 4 |  * it on both ends. We expect the PROBE to fail on at least one of the devices.
 5 |  */
 6 | 
 7 | #undef NDEBUG
 8 | #include <c-stdaux.h>
 9 | #include <stdlib.h>
10 | #include "test.h"
11 | 
12 | static void test_unused(int ifindex1, uint8_t *mac1, size_t n_mac1, int ifindex2, uint8_t *mac2, size_t n_mac2) {
13 |         NAcdConfig config1 = {
14 |                 .ifindex = ifindex1,
15 |                 .transport = N_ACD_TRANSPORT_ETHERNET,
16 |                 .mac = mac1,
17 |                 .n_mac = n_mac1,
18 |                 .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
19 |                 .timeout_msec = 100,
20 |         };
21 |         NAcdConfig config2 = {
22 |                 .ifindex = ifindex2,
23 |                 .transport = N_ACD_TRANSPORT_ETHERNET,
24 |                 .mac = mac2,
25 |                 .n_mac = n_mac2,
26 |                 .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
27 |                 .timeout_msec = 100,
28 |         };
29 |         struct pollfd pfds[2];
30 |         NAcd *acd1, *acd2;
31 |         int r, fd1, fd2, state1, state2;
32 | 
33 |         r = n_acd_new(&acd1);
34 |         c_assert(!r);
35 |         r = n_acd_new(&acd2);
36 |         c_assert(!r);
37 | 
38 |         n_acd_get_fd(acd1, &fd1);
39 |         n_acd_get_fd(acd2, &fd2);
40 | 
41 |         r = n_acd_start(acd1, &config1);
42 |         c_assert(!r);
43 |         r = n_acd_start(acd2, &config2);
44 |         c_assert(!r);
45 | 
46 |         for (state1 = state2 = -1; state1 == -1 || state2 == -1; ) {
47 |                 NAcdEvent *event;
48 |                 pfds[0] = (struct pollfd){ .fd = fd1, .events = (state1 == -1) ? POLLIN : 0 };
49 |                 pfds[1] = (struct pollfd){ .fd = fd2, .events = (state2 == -1) ? POLLIN : 0 };
50 | 
51 |                 r = poll(pfds, sizeof(pfds) / sizeof(*pfds), -1);
52 |                 c_assert(r >= 0);
53 | 
54 |                 if (state1 == -1) {
55 |                         r = n_acd_dispatch(acd1);
56 |                         c_assert(!r);
57 | 
58 |                         r = n_acd_pop_event(acd1, &event);
59 |                         if (!r) {
60 |                                 c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED);
61 |                                 state1 = !!(event->event == N_ACD_EVENT_READY);
62 |                         } else {
63 |                                 c_assert(r == N_ACD_E_DONE);
64 |                         }
65 |                 }
66 | 
67 |                 if (state2 == -1) {
68 |                         r = n_acd_dispatch(acd2);
69 |                         c_assert(!r);
70 | 
71 |                         r = n_acd_pop_event(acd2, &event);
72 |                         if (!r) {
73 |                                 c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED);
74 |                                 state2 = !!(event->event == N_ACD_EVENT_READY);
75 |                         } else {
76 |                                 c_assert(r == N_ACD_E_DONE);
77 |                         }
78 |                 }
79 |         }
80 | 
81 |         n_acd_free(acd1);
82 |         n_acd_free(acd2);
83 | 
84 |         c_assert(!state1 || !state2);
85 | }
86 | 
87 | int main(int argc, char **argv) {
88 |         struct ether_addr mac1, mac2;
89 |         int ifindex1, ifindex2;
90 | 
91 |         test_setup();
92 | 
93 |         test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
94 |         test_unused(ifindex1, mac1.ether_addr_octet, sizeof(mac2.ether_addr_octet), ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
95 | 
96 |         return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/src/test-unplug.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Unplug device during test run
 3 |  * Run the ACD engine with an address that is not used by anyone else on the
 4 |  * link, but DOWN or UNPLUG the device while running.
 5 |  */
 6 | 
 7 | #undef NDEBUG
 8 | #include <c-stdaux.h>
 9 | #include <stdlib.h>
10 | #include "test.h"
11 | 
12 | static void test_unplug_down(int ifindex, uint8_t *mac, size_t n_mac, unsigned int run) {
13 |         NAcdConfig config = {
14 |                 .ifindex = ifindex,
15 |                 .transport = N_ACD_TRANSPORT_ETHERNET,
16 |                 .mac = mac,
17 |                 .n_mac = n_mac,
18 |                 .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
19 |                 .timeout_msec = 100,
20 |         };
21 |         struct pollfd pfds;
22 |         NAcd *acd;
23 |         int r, fd;
24 | 
25 |         if (!run--)
26 |                 test_veth_cmd(ifindex, "down");
27 | 
28 |         r = n_acd_new(&acd);
29 |         c_assert(!r);
30 | 
31 |         if (!run--)
32 |                 test_veth_cmd(ifindex, "down");
33 | 
34 |         n_acd_get_fd(acd, &fd);
35 |         r = n_acd_start(acd, &config);
36 |         c_assert(!r);
37 | 
38 |         if (!run--)
39 |                 test_veth_cmd(ifindex, "down");
40 | 
41 |         for (;;) {
42 |                 NAcdEvent *event;
43 |                 pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
44 |                 r = poll(&pfds, 1, -1);
45 |                 c_assert(r >= 0);
46 | 
47 |                 if (!run--)
48 |                         test_veth_cmd(ifindex, "down");
49 | 
50 |                 r = n_acd_dispatch(acd);
51 |                 c_assert(!r);
52 | 
53 |                 r = n_acd_pop_event(acd, &event);
54 |                 if (!r) {
55 |                         if (event->event == N_ACD_EVENT_DOWN) {
56 |                                 break;
57 |                         } else {
58 |                                 c_assert(event->event == N_ACD_EVENT_READY);
59 |                                 test_veth_cmd(ifindex, "down");
60 |                         }
61 |                 } else {
62 |                         c_assert(r == N_ACD_E_DONE);
63 |                 }
64 |         }
65 | 
66 |         n_acd_free(acd);
67 | }
68 | 
69 | int main(int argc, char **argv) {
70 |         struct ether_addr mac;
71 |         unsigned int i;
72 |         int ifindex;
73 | 
74 |         test_setup();
75 | 
76 |         test_veth_new(&ifindex, &mac, NULL, NULL);
77 | 
78 |         for (i = 0; i < 5; ++i) {
79 |                 test_unplug_down(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet), i);
80 |                 test_veth_cmd(ifindex, "up");
81 |         }
82 | 
83 |         return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/src/test-unused.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Test with unused address
 3 |  * Run the ACD engine with an address that is not used by anyone else on the
 4 |  * link. This should just pass through, with a short, random timeout.
 5 |  */
 6 | 
 7 | #undef NDEBUG
 8 | #include <c-stdaux.h>
 9 | #include <stdlib.h>
10 | #include "test.h"
11 | 
12 | static void test_unused(int ifindex, const uint8_t *mac, size_t n_mac) {
13 |         NAcdConfig config = {
14 |                 .ifindex = ifindex,
15 |                 .transport = N_ACD_TRANSPORT_ETHERNET,
16 |                 .mac = mac,
17 |                 .n_mac = n_mac,
18 |                 .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
19 |                 .timeout_msec = 100,
20 |         };
21 |         struct pollfd pfds;
22 |         NAcd *acd;
23 |         int r, fd;
24 | 
25 |         r = n_acd_new(&acd);
26 |         c_assert(!r);
27 | 
28 |         n_acd_get_fd(acd, &fd);
29 |         r = n_acd_start(acd, &config);
30 |         c_assert(!r);
31 | 
32 |         for (;;) {
33 |                 NAcdEvent *event;
34 |                 pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
35 |                 r = poll(&pfds, 1, -1);
36 |                 c_assert(r >= 0);
37 | 
38 |                 r = n_acd_dispatch(acd);
39 |                 c_assert(!r);
40 | 
41 |                 r = n_acd_pop_event(acd, &event);
42 |                 if (!r) {
43 |                         c_assert(event->event == N_ACD_EVENT_READY);
44 |                         break;
45 |                 } else {
46 |                         c_assert(r == N_ACD_E_DONE);
47 |                 }
48 |         }
49 | 
50 |         n_acd_free(acd);
51 | }
52 | 
53 | int main(int argc, char **argv) {
54 |         struct ether_addr mac;
55 |         int ifindex;
56 | 
57 |         test_setup();
58 | 
59 |         test_veth_new(&ifindex, &mac, NULL, NULL);
60 |         test_unused(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
61 | 
62 |         return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/src/test-veth.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Test on a veth link
  3 |  *
  4 |  * This essentially mimics a real network with two peers.
  5 |  *
  6 |  * Run one ACD context on each end of the tunnel. On one end probe for N,
  7 |  * addresses on the other end pre-configure N/3 of the same addresses and probe
  8 |  * for another N/3 of the addresses.
  9 |  *
 10 |  * Verify that in the case of simultaneous probes of the same address at most one
 11 |  * succeed, in the case of probing for a configured address it always fails, and
 12 |  * probing for a non-existent address always succeeds.
 13 |  *
 14 |  * Make sure to keep N fairly high as the protocol is probabilistic, and we also
 15 |  * want to verify that resizing the internal maps works correctly.
 16 |  */
 17 | 
 18 | #undef NDEBUG
 19 | #include <c-stdaux.h>
 20 | #include <stdlib.h>
 21 | #include "test.h"
 22 | 
 23 | #define TEST_ACD_N_PROBES (9)
 24 | 
 25 | typedef enum {
 26 |         TEST_ACD_STATE_UNKNOWN,
 27 |         TEST_ACD_STATE_USED,
 28 |         TEST_ACD_STATE_READY,
 29 | } TestAcdState;
 30 | 
 31 | static void test_veth(int ifindex1, uint8_t *mac1, size_t n_mac1,
 32 |                       int ifindex2, uint8_t *mac2, size_t n_mac2) {
 33 |         NAcdConfig *config;
 34 |         NAcd *acd1, *acd2;
 35 |         NAcdProbe *probes1[TEST_ACD_N_PROBES];
 36 |         NAcdProbe *probes2[TEST_ACD_N_PROBES];
 37 |         unsigned long state1, state2;
 38 |         size_t n_running = 0;
 39 |         int r;
 40 | 
 41 |         r = n_acd_config_new(&config);
 42 |         c_assert(!r);
 43 | 
 44 |         n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
 45 | 
 46 |         n_acd_config_set_ifindex(config, ifindex1);
 47 |         n_acd_config_set_mac(config, mac1, n_mac1);
 48 |         r = n_acd_new(&acd1, config);
 49 |         c_assert(!r);
 50 | 
 51 |         n_acd_config_set_ifindex(config, ifindex2);
 52 |         n_acd_config_set_mac(config, mac2, n_mac2);
 53 |         r = n_acd_new(&acd2, config);
 54 |         c_assert(!r);
 55 | 
 56 |         n_acd_config_free(config);
 57 | 
 58 |         {
 59 |                 NAcdProbeConfig *probe_config;
 60 | 
 61 |                 r = n_acd_probe_config_new(&probe_config);
 62 |                 c_assert(!r);
 63 |                 n_acd_probe_config_set_timeout(probe_config, 1024);
 64 | 
 65 |                 c_assert(TEST_ACD_N_PROBES <= 10 << 24);
 66 | 
 67 |                 for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
 68 |                         struct in_addr ip = { htobe32((10 << 24) | i) };
 69 | 
 70 |                         n_acd_probe_config_set_ip(probe_config, ip);
 71 | 
 72 |                         switch (i % 3) {
 73 |                         case 0:
 74 |                                 /*
 75 |                                  * Probe on one side, and leave the address
 76 |                                  * unset on the other. The probe must succeed.
 77 |                                  */
 78 |                                 break;
 79 |                         case 1:
 80 |                                 /*
 81 |                                  * Preconfigure the address on one side, and
 82 |                                  * probe on the other. The probe must fail.
 83 |                                  */
 84 |                                 test_add_child_ip(&ip);
 85 |                                 break;
 86 |                         case 2:
 87 |                                 /*
 88 |                                  * Probe both sides for the same address, at
 89 |                                  * most one may succeed.
 90 |                                  */
 91 | 
 92 |                                 r = n_acd_probe(acd2, &probes2[i], probe_config);
 93 |                                 c_assert(!r);
 94 | 
 95 |                                 ++n_running;
 96 |                                 break;
 97 |                         default:
 98 |                                 c_assert(0);
 99 |                                 abort();
100 |                                 break;
101 |                         }
102 | 
103 |                         r = n_acd_probe(acd1, &probes1[i], probe_config);
104 |                         c_assert(!r);
105 | 
106 |                         ++n_running;
107 |                 }
108 | 
109 |                 n_acd_probe_config_free(probe_config);
110 | 
111 |                 while (n_running > 0) {
112 |                         NAcdEvent *event;
113 |                         struct pollfd pfds[2] = {
114 |                                 { .events = POLLIN },
115 |                                 { .events = POLLIN },
116 |                         };
117 | 
118 |                         n_acd_get_fd(acd1, &pfds[0].fd);
119 |                         n_acd_get_fd(acd2, &pfds[1].fd);
120 | 
121 |                         r = poll(pfds, 2, -1);
122 |                         c_assert(r >= 0);
123 | 
124 |                         if (pfds[0].revents & POLLIN) {
125 |                                 r = n_acd_dispatch(acd1);
126 |                                 c_assert(!r || r == N_ACD_E_PREEMPTED);
127 | 
128 |                                 for (;;) {
129 |                                         r = n_acd_pop_event(acd1, &event);
130 |                                         c_assert(!r);
131 |                                         if (event) {
132 |                                                 switch (event->event) {
133 |                                                 case N_ACD_EVENT_READY:
134 |                                                         n_acd_probe_get_userdata(event->ready.probe, (void**)&state1);
135 |                                                         c_assert(state1 == TEST_ACD_STATE_UNKNOWN);
136 |                                                         state1 = TEST_ACD_STATE_READY;
137 |                                                         n_acd_probe_set_userdata(event->ready.probe, (void*)state1);
138 | 
139 |                                                         break;
140 |                                                 case N_ACD_EVENT_USED:
141 |                                                         n_acd_probe_get_userdata(event->used.probe, (void**)&state1);
142 |                                                         c_assert(state1 == TEST_ACD_STATE_UNKNOWN);
143 |                                                         state1 = TEST_ACD_STATE_USED;
144 |                                                         n_acd_probe_set_userdata(event->used.probe, (void*)state1);
145 | 
146 |                                                         break;
147 |                                                 default:
148 |                                                         c_assert(0);
149 |                                                 }
150 | 
151 |                                                 --n_running;
152 |                                         } else {
153 |                                                 break;
154 |                                         }
155 |                                 }
156 |                         }
157 | 
158 |                         if (pfds[1].revents & POLLIN) {
159 |                                 r = n_acd_dispatch(acd2);
160 |                                 c_assert(!r || r == N_ACD_E_PREEMPTED);
161 | 
162 |                                 for (;;) {
163 |                                         r = n_acd_pop_event(acd2, &event);
164 |                                         c_assert(!r);
165 |                                         if (event) {
166 |                                                 switch (event->event) {
167 |                                                 case N_ACD_EVENT_READY:
168 |                                                         n_acd_probe_get_userdata(event->ready.probe, (void**)&state2);
169 |                                                         c_assert(state2 == TEST_ACD_STATE_UNKNOWN);
170 |                                                         state2 = TEST_ACD_STATE_READY;
171 |                                                         n_acd_probe_set_userdata(event->ready.probe, (void*)state2);
172 | 
173 |                                                         break;
174 |                                                 case N_ACD_EVENT_USED:
175 |                                                         n_acd_probe_get_userdata(event->used.probe, (void**)&state2);
176 |                                                         c_assert(state2 == TEST_ACD_STATE_UNKNOWN);
177 |                                                         state2 = TEST_ACD_STATE_USED;
178 |                                                         n_acd_probe_set_userdata(event->used.probe, (void*)state2);
179 | 
180 |                                                         break;
181 |                                                 default:
182 |                                                         c_assert(0);
183 |                                                 }
184 | 
185 |                                                 --n_running;
186 |                                         } else {
187 |                                                 break;
188 |                                         }
189 |                                 }
190 |                         }
191 |                 }
192 | 
193 |                 for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
194 |                         struct in_addr ip = { htobe32((10 << 24) | i) };
195 | 
196 |                         switch (i % 3) {
197 |                         case 0:
198 |                                 n_acd_probe_get_userdata(probes1[i], (void **)&state1);
199 |                                 c_assert(state1 == TEST_ACD_STATE_READY);
200 | 
201 |                                 break;
202 |                         case 1:
203 |                                 test_del_child_ip(&ip);
204 | 
205 |                                 n_acd_probe_get_userdata(probes1[i], (void **)&state1);
206 |                                 c_assert(state1 == TEST_ACD_STATE_USED);
207 | 
208 |                                 break;
209 |                         case 2:
210 |                                 n_acd_probe_get_userdata(probes1[i], (void **)&state1);
211 |                                 n_acd_probe_get_userdata(probes2[i], (void **)&state2);
212 |                                 c_assert(state1 != TEST_ACD_STATE_UNKNOWN);
213 |                                 c_assert(state2 != TEST_ACD_STATE_UNKNOWN);
214 |                                 c_assert(state1 == TEST_ACD_STATE_USED || state2 == TEST_ACD_STATE_USED);
215 |                                 n_acd_probe_free(probes2[i]);
216 | 
217 |                                 break;
218 |                         }
219 |                         n_acd_probe_free(probes1[i]);
220 |                 }
221 |         }
222 | 
223 |         n_acd_unref(acd2);
224 |         n_acd_unref(acd1);
225 | }
226 | 
227 | int main(int argc, char **argv) {
228 |         struct ether_addr mac1, mac2;
229 |         int ifindex1, ifindex2;
230 | 
231 |         test_setup();
232 | 
233 |         test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
234 |         for (unsigned int i = 0; i < 8; ++i) {
235 |                 test_veth(ifindex1, mac1.ether_addr_octet, sizeof(mac1.ether_addr_octet),
236 |                           ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
237 |         }
238 | 
239 |         return 0;
240 | }
241 | 


--------------------------------------------------------------------------------
/src/test.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Test Helpers
  5 |  * Bunch of helpers to setup the environment for networking tests. This
  6 |  * includes net-namespace setups, veth setups, and more.
  7 |  */
  8 | 
  9 | #undef NDEBUG
 10 | #include <assert.h>
 11 | #include <c-stdaux.h>
 12 | #include <endian.h>
 13 | #include <errno.h>
 14 | #include <fcntl.h>
 15 | #include <net/ethernet.h>
 16 | #include <net/if.h>
 17 | #include <sys/socket.h>
 18 | #include <netinet/in.h>
 19 | #include <arpa/inet.h>
 20 | #include <poll.h>
 21 | #include <sched.h>
 22 | #include <stdbool.h>
 23 | #include <stdio.h>
 24 | #include <stdlib.h>
 25 | #include <string.h>
 26 | #include <sys/ioctl.h>
 27 | #include <sys/mount.h>
 28 | #include <sys/resource.h>
 29 | #include <sys/stat.h>
 30 | #include <sys/types.h>
 31 | #include <unistd.h>
 32 | #include "n-acd.h"
 33 | 
 34 | static inline void test_add_child_ip(const struct in_addr *addr) {
 35 |         char *p;
 36 |         int r;
 37 | 
 38 |         r = asprintf(&p, "ip addr add dev veth1 %s/8", inet_ntoa(*addr));
 39 |         c_assert(r >= 0);
 40 | 
 41 |         r = system(p);
 42 |         c_assert(r >= 0);
 43 | 
 44 |         free(p);
 45 | }
 46 | 
 47 | static inline void test_del_child_ip(const struct in_addr *addr) {
 48 |         char *p;
 49 |         int r;
 50 | 
 51 |         r = asprintf(&p, "ip addr del dev veth1 %s/8", inet_ntoa(*addr));
 52 |         c_assert(r >= 0);
 53 | 
 54 |         r = system(p);
 55 |         c_assert(r >= 0);
 56 | 
 57 |         free(p);
 58 | }
 59 | 
 60 | static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) {
 61 |         struct ifreq ifr = {};
 62 |         size_t l;
 63 |         int r, s;
 64 | 
 65 |         l = strlen(name);
 66 |         c_assert(l <= IF_NAMESIZE);
 67 | 
 68 |         if (indexp) {
 69 |                 *indexp = if_nametoindex(name);
 70 |                 c_assert(*indexp > 0);
 71 |         }
 72 | 
 73 |         if (macp) {
 74 |                 s = socket(AF_INET, SOCK_DGRAM, 0);
 75 |                 c_assert(s >= 0);
 76 | 
 77 |                 strncpy(ifr.ifr_name, name, l + 1);
 78 |                 r = ioctl(s, SIOCGIFHWADDR, &ifr);
 79 |                 c_assert(r >= 0);
 80 | 
 81 |                 memcpy(macp->ether_addr_octet, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
 82 | 
 83 |                 close(s);
 84 |         }
 85 | }
 86 | 
 87 | static inline void test_veth_cmd(int ifindex, const char *cmd) {
 88 |         char *p, name[IF_NAMESIZE + 1] = {};
 89 |         int r;
 90 | 
 91 |         p = if_indextoname(ifindex, name);
 92 |         c_assert(p);
 93 | 
 94 |         r = asprintf(&p, "ip link set %s %s", name, cmd);
 95 |         c_assert(r >= 0);
 96 | 
 97 |         /* Again: Ewwww... */
 98 |         r = system(p);
 99 |         c_assert(r == 0);
100 | 
101 |         free(p);
102 | }
103 | 
104 | static inline void test_veth_new(int *parent_indexp,
105 |                                  struct ether_addr *parent_macp,
106 |                                  int *child_indexp,
107 |                                  struct ether_addr *child_macp) {
108 |         int r;
109 | 
110 |         /* Eww... but it works. */
111 |         r = system("ip link add type veth");
112 |         c_assert(r == 0);
113 |         r = system("ip link set veth0 up");
114 |         c_assert(r == 0);
115 |         r = system("ip link set veth1 up");
116 |         c_assert(r == 0);
117 | 
118 |         test_if_query("veth0", parent_indexp, parent_macp);
119 |         test_if_query("veth1", child_indexp, child_macp);
120 | }
121 | 
122 | static inline void test_loopback_up(int *indexp, struct ether_addr *macp) {
123 |         int r;
124 | 
125 |         r = system("ip link set lo up");
126 |         c_assert(r == 0);
127 | 
128 |         test_if_query("lo", indexp, macp);
129 | }
130 | 
131 | static inline void test_raise_memlock(void) {
132 |         const size_t wanted = 64 * 1024 * 1024;
133 |         struct rlimit get, set;
134 |         int r;
135 | 
136 |         r = getrlimit(RLIMIT_MEMLOCK, &get);
137 |         c_assert(!r);
138 | 
139 |         /* try raising limit to @wanted */
140 |         set.rlim_cur = wanted;
141 |         set.rlim_max = (wanted > get.rlim_max) ? wanted : get.rlim_max;
142 |         r = setrlimit(RLIMIT_MEMLOCK, &set);
143 |         if (r) {
144 |                 c_assert(errno == EPERM);
145 | 
146 |                 /* not privileged to raise limit, so maximize soft limit */
147 |                 set.rlim_cur = get.rlim_max;
148 |                 set.rlim_max = get.rlim_max;
149 |                 r = setrlimit(RLIMIT_MEMLOCK, &set);
150 |                 c_assert(!r);
151 |         }
152 | }
153 | 
154 | static inline void test_unshare_user_namespace(void) {
155 |         uid_t euid;
156 |         gid_t egid;
157 |         int r, fd;
158 | 
159 |         /*
160 |          * Enter a new user namespace as root:root.
161 |          */
162 | 
163 |         euid = geteuid();
164 |         egid = getegid();
165 | 
166 |         r = unshare(CLONE_NEWUSER);
167 |         c_assert(r >= 0);
168 | 
169 |         fd = open("/proc/self/uid_map", O_WRONLY);
170 |         c_assert(fd >= 0);
171 |         r = dprintf(fd, "0 %d 1\n", euid);
172 |         c_assert(r >= 0);
173 |         close(fd);
174 | 
175 |         fd = open("/proc/self/setgroups", O_WRONLY);
176 |         c_assert(fd >= 0);
177 |         r = dprintf(fd, "deny");
178 |         c_assert(r >= 0);
179 |         close(fd);
180 | 
181 |         fd = open("/proc/self/gid_map", O_WRONLY);
182 |         c_assert(fd >= 0);
183 |         r = dprintf(fd, "0 %d 1\n", egid);
184 |         c_assert(r >= 0);
185 |         close(fd);
186 | }
187 | 
188 | static inline void test_setup(void) {
189 |         int r;
190 | 
191 |         /*
192 |          * Move into a new network and mount namespace both associated
193 |          * with a new user namespace where the current eUID is mapped to
194 |          * 0. Then create a private instance of /run/netns. This ensures
195 |          * that any network devices or network namespaces are private to
196 |          * the test process.
197 |          */
198 | 
199 |         test_raise_memlock();
200 |         test_unshare_user_namespace();
201 | 
202 |         r = unshare(CLONE_NEWNET | CLONE_NEWNS);
203 |         c_assert(r >= 0);
204 | 
205 |         r = mount(NULL, "/", "", MS_PRIVATE | MS_REC, NULL);
206 |         c_assert(r >= 0);
207 | 
208 |         r = mount(NULL, "/run", "tmpfs", 0, NULL);
209 |         c_assert(r >= 0);
210 | 
211 |         r = mkdir("/run/netns", 0755);
212 |         c_assert(r >= 0);
213 | }
214 | 


--------------------------------------------------------------------------------
/src/util/test-timer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Tests for timer utility library
  3 |  */
  4 | 
  5 | #undef NDEBUG
  6 | #include <c-stdaux.h>
  7 | #include <errno.h>
  8 | #include <poll.h>
  9 | #include <stdbool.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <sys/timerfd.h>
 13 | #include "timer.h"
 14 | 
 15 | #define N_TIMEOUTS (10000)
 16 | 
 17 | static void test_api(void) {
 18 |         Timer timer = TIMER_NULL(timer);
 19 |         Timeout t1 = TIMEOUT_INIT(t1), t2 = TIMEOUT_INIT(t2), *t;
 20 |         int r;
 21 | 
 22 |         r = timer_init(&timer);
 23 |         c_assert(!r);
 24 | 
 25 |         timeout_schedule(&t1, &timer, 1);
 26 |         timeout_schedule(&t2, &timer, 2);
 27 | 
 28 |         r = timer_pop_timeout(&timer, 10, &t);
 29 |         c_assert(!r);
 30 |         c_assert(t == &t1);
 31 | 
 32 |         timeout_unschedule(&t2);
 33 | 
 34 |         r = timer_pop_timeout(&timer, 10, &t);
 35 |         c_assert(!r);
 36 |         c_assert(!t);
 37 | 
 38 |         timer_deinit(&timer);
 39 | }
 40 | 
 41 | static void test_pop(void) {
 42 |         Timer timer = TIMER_NULL(timer);
 43 |         Timeout timeouts[N_TIMEOUTS] = {};
 44 |         uint64_t times[N_TIMEOUTS] = {};
 45 |         size_t n_timeouts = 0;
 46 |         bool armed;
 47 |         Timeout *t;
 48 |         int r;
 49 | 
 50 |         r = timer_init(&timer);
 51 |         c_assert(!r);
 52 | 
 53 |         for(size_t i = 0; i < N_TIMEOUTS; ++i) {
 54 |                 timeouts[i] = (Timeout)TIMEOUT_INIT(timeouts[i]);
 55 |                 times[i] = rand() % 128 + 1;
 56 |                 timeout_schedule(&timeouts[i], &timer, times[i]);
 57 |         }
 58 | 
 59 |         armed = true;
 60 | 
 61 |         for(size_t i = 0; i <= 128; ++i) {
 62 |                 if (armed) {
 63 |                         struct pollfd pfd = {
 64 |                                 .fd = timer.fd,
 65 |                                 .events = POLLIN,
 66 |                         };
 67 |                         uint64_t count;
 68 | 
 69 |                         r = poll(&pfd, 1, -1);
 70 |                         c_assert(r == 1);
 71 | 
 72 |                         r = read(timer.fd, &count, sizeof(count));
 73 |                         c_assert(r == sizeof(count));
 74 |                         c_assert(count == 1);
 75 |                         armed = false;
 76 |                 }
 77 | 
 78 |                 for (;;) {
 79 |                         uint64_t current_time;
 80 | 
 81 |                         r = timer_pop_timeout(&timer, i, &t);
 82 |                         c_assert(!r);
 83 |                         if (!t) {
 84 |                                 timer_rearm(&timer);
 85 |                                 break;
 86 |                         }
 87 | 
 88 |                         current_time = times[t - timeouts];
 89 |                         c_assert(current_time == i);
 90 |                         ++n_timeouts;
 91 |                         armed = true;
 92 |                 }
 93 |         }
 94 | 
 95 |         c_assert(n_timeouts == N_TIMEOUTS);
 96 | 
 97 |         r = timer_pop_timeout(&timer, (uint64_t)-1, &t);
 98 |         c_assert(!r);
 99 |         c_assert(!t);
100 | 
101 |         timer_deinit(&timer);
102 | }
103 | 
104 | void test_arm(void) {
105 |         struct itimerspec spec = {
106 |                 .it_value = {
107 |                         .tv_sec = 1000,
108 |                 },
109 |         };
110 |         int fd1, fd2, r;
111 | 
112 |         fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
113 |         c_assert(fd1 >= 0);
114 | 
115 |         fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
116 |         c_assert(fd1 >= 0);
117 | 
118 |         r = timerfd_settime(fd1, 0, &spec, NULL);
119 |         c_assert(r >= 0);
120 | 
121 |         r = timerfd_settime(fd2, 0, &spec, NULL);
122 |         c_assert(r >= 0);
123 | 
124 |         r = timerfd_gettime(fd1, &spec);
125 |         c_assert(r >= 0);
126 |         c_assert(spec.it_value.tv_sec);
127 | 
128 |         r = timerfd_gettime(fd2, &spec);
129 |         c_assert(r >= 0);
130 |         c_assert(spec.it_value.tv_sec);
131 | 
132 |         spec = (struct itimerspec){};
133 | 
134 |         r = timerfd_settime(fd1, 0, &spec, NULL);
135 |         c_assert(r >= 0);
136 | 
137 |         r = timerfd_gettime(fd1, &spec);
138 |         c_assert(r >= 0);
139 |         c_assert(!spec.it_value.tv_sec);
140 |         c_assert(!spec.it_value.tv_nsec);
141 | 
142 |         r = timerfd_gettime(fd2, &spec);
143 |         c_assert(r >= 0);
144 |         c_assert(spec.it_value.tv_sec);
145 | 
146 |         spec = (struct itimerspec){ .it_value = { .tv_nsec = 1, }, };
147 | 
148 |         r = timerfd_settime(fd1, 0, &spec, NULL);
149 |         c_assert(r >= 0);
150 | 
151 |         r = poll(&(struct pollfd) { .fd = fd1, .events = POLLIN }, 1, -1);
152 |         c_assert(r == 1);
153 | 
154 |         r = timerfd_settime(fd2, 0, &spec, NULL);
155 |         c_assert(r >= 0);
156 | 
157 |         r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
158 |         c_assert(r == 1);
159 | 
160 |         spec = (struct itimerspec){};
161 | 
162 |         r = timerfd_settime(fd1, 0, &spec, NULL);
163 |         c_assert(r >= 0);
164 | 
165 |         r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
166 |         c_assert(r == 1);
167 | 
168 |         close(fd2);
169 |         close(fd1);
170 | }
171 | 
172 | int main(int argc, char **argv) {
173 |         test_arm();
174 |         test_api();
175 |         test_pop();
176 |         return 0;
177 | }
178 | 


--------------------------------------------------------------------------------
/src/util/timer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Timer Utility Library
  3 |  */
  4 | 
  5 | #include <assert.h>
  6 | #include <c-rbtree.h>
  7 | #include <c-stdaux.h>
  8 | #include <errno.h>
  9 | #include <stdlib.h>
 10 | #include <sys/timerfd.h>
 11 | #include <time.h>
 12 | #include "timer.h"
 13 | 
 14 | int timer_init(Timer *timer) {
 15 |         clockid_t clock = CLOCK_BOOTTIME;
 16 |         int r;
 17 | 
 18 |         r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
 19 |         if (r < 0 && errno == EINVAL) {
 20 |                 clock = CLOCK_MONOTONIC;
 21 |                 r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
 22 |         }
 23 |         if (r < 0)
 24 |                 return -errno;
 25 | 
 26 |         *timer = (Timer)TIMER_NULL(*timer);
 27 |         timer->fd = r;
 28 |         timer->clock = clock;
 29 | 
 30 |         return 0;
 31 | }
 32 | 
 33 | void timer_deinit(Timer *timer) {
 34 |         c_assert(c_rbtree_is_empty(&timer->tree));
 35 | 
 36 |         if (timer->fd >= 0) {
 37 |                 close(timer->fd);
 38 |                 timer->fd = -1;
 39 |         }
 40 | }
 41 | 
 42 | void timer_now(Timer *timer, uint64_t *nowp) {
 43 |         struct timespec ts;
 44 |         int r;
 45 | 
 46 |         r = clock_gettime(timer->clock, &ts);
 47 |         c_assert(r >= 0);
 48 | 
 49 |         *nowp = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
 50 | }
 51 | 
 52 | void timer_rearm(Timer *timer) {
 53 |         uint64_t time;
 54 |         Timeout *timeout;
 55 |         int r;
 56 | 
 57 |         /*
 58 |          * A timeout value of 0 clears the timer, we should only set that if
 59 |          * no timeout exists in the tree.
 60 |          */
 61 | 
 62 |         timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
 63 |         c_assert(!timeout || timeout->timeout);
 64 | 
 65 |         time = timeout ? timeout->timeout : 0;
 66 | 
 67 |         if (time != timer->scheduled_timeout) {
 68 |                 r = timerfd_settime(timer->fd,
 69 |                                     TFD_TIMER_ABSTIME,
 70 |                                     &(struct itimerspec){
 71 |                                             .it_value = {
 72 |                                                     .tv_sec = time / UINT64_C(1000000000),
 73 |                                                     .tv_nsec = time % UINT64_C(1000000000),
 74 |                                             },
 75 |                                     },
 76 |                                     NULL);
 77 |                 c_assert(r >= 0);
 78 | 
 79 |                 timer->scheduled_timeout = time;
 80 |         }
 81 | }
 82 | 
 83 | int timer_read(Timer *timer) {
 84 |         uint64_t v;
 85 |         int r;
 86 | 
 87 |         r = read(timer->fd, &v, sizeof(v));
 88 |         if (r < 0) {
 89 |                 if (errno == EAGAIN) {
 90 |                         /*
 91 |                          * No more pending events.
 92 |                          */
 93 |                         return 0;
 94 |                 } else {
 95 |                         /*
 96 |                          * Something failed. We use CLOCK_BOOTTIME/MONOTONIC,
 97 |                          * so ECANCELED cannot happen. Hence, there is no
 98 |                          * error that we could gracefully handle. Fail hard
 99 |                          * and let the caller deal with it.
100 |                          */
101 |                         return -errno;
102 |                 }
103 |         } else if (r != sizeof(v) || v == 0) {
104 |                 /*
105 |                  * Kernel guarantees 8-byte reads, and only to return
106 |                  * data if at least one timer triggered; fail hard if
107 |                  * it suddenly starts doing weird shit.
108 |                  */
109 |                 return -EIO;
110 |         }
111 | 
112 |         return TIMER_E_TRIGGERED;
113 | }
114 | 
115 | 
116 | int timer_pop_timeout(Timer *timer, uint64_t until, Timeout **timeoutp) {
117 |         Timeout *timeout;
118 | 
119 |         /*
120 |          * If the first timeout is scheduled before @until, then unlink
121 |          * it and return it. Otherwise, return NULL.
122 |          */
123 |         timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
124 |         if (timeout && timeout->timeout <= until) {
125 |                 c_rbnode_unlink(&timeout->node);
126 |                 timeout->timeout = 0;
127 |                 *timeoutp = timeout;
128 |         } else {
129 |                 *timeoutp = NULL;
130 |         }
131 | 
132 |         return 0;
133 | }
134 | 
135 | void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time) {
136 |         c_assert(time);
137 | 
138 |         /*
139 |          * In case @timeout was already scheduled, remove it from the
140 |          * tree. If we are moving it to a new timer, rearm the old one.
141 |          */
142 |         if (timeout->timer) {
143 |                 c_rbnode_unlink(&timeout->node);
144 |                 if (timeout->timer != timer)
145 |                         timer_rearm(timeout->timer);
146 |         }
147 |         timeout->timer = timer;
148 |         timeout->timeout = time;
149 | 
150 |         /*
151 |          * Now insert it back into the tree in the correct new position.
152 |          * We allow duplicates in the tree, so this insertion is open-coded.
153 |          */
154 |         {
155 |                 Timeout *other;
156 |                 CRBNode **slot, *parent;
157 | 
158 |                 slot = &timer->tree.root;
159 |                 parent = NULL;
160 |                 while (*slot) {
161 |                         other = c_rbnode_entry(*slot, Timeout, node);
162 |                         parent = *slot;
163 |                         if (timeout->timeout < other->timeout)
164 |                                 slot = &(*slot)->left;
165 |                         else
166 |                                 slot = &(*slot)->right;
167 |                 }
168 | 
169 |                 c_rbtree_add(&timer->tree, parent, slot, &timeout->node);
170 |         }
171 | 
172 |         /*
173 |          * Rearm the timer as we updated the timeout tree.
174 |          */
175 |         timer_rearm(timer);
176 | }
177 | 
178 | void timeout_unschedule(Timeout *timeout) {
179 |         Timer *timer = timeout->timer;
180 | 
181 |         if (!timer)
182 |                 return;
183 | 
184 |         c_rbnode_unlink(&timeout->node);
185 |         timeout->timeout = 0;
186 |         timeout->timer = NULL;
187 | 
188 |         timer_rearm(timer);
189 | }
190 | 


--------------------------------------------------------------------------------
/src/util/timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c-rbtree.h>
 4 | #include <c-stdaux.h>
 5 | #include <inttypes.h>
 6 | #include <stdlib.h>
 7 | #include <time.h>
 8 | #include <unistd.h>
 9 | 
10 | typedef struct Timer Timer;
11 | typedef struct Timeout Timeout;
12 | 
13 | enum {
14 |         _TIMER_E_SUCCESS,
15 | 
16 |         TIMER_E_TRIGGERED,
17 | 
18 |         _TIMER_E_N,
19 | };
20 | 
21 | struct Timer {
22 |         int fd;
23 |         clockid_t clock;
24 |         CRBTree tree;
25 |         uint64_t scheduled_timeout;
26 | };
27 | 
28 | #define TIMER_NULL(_x) {                                                        \
29 |                 .fd = -1,                                                       \
30 |                 .tree = C_RBTREE_INIT,                                          \
31 |         }
32 | 
33 | struct Timeout {
34 |         Timer *timer;
35 |         CRBNode node;
36 |         uint64_t timeout;
37 | };
38 | 
39 | #define TIMEOUT_INIT(_x) {                                                      \
40 |                 .node = C_RBNODE_INIT((_x).node),                               \
41 |         }
42 | 
43 | int timer_init(Timer *timer);
44 | void timer_deinit(Timer *timer);
45 | 
46 | void timer_now(Timer *timer, uint64_t *nowp);
47 | 
48 | int timer_pop_timeout(Timer *timer, uint64_t now, Timeout **timerp);
49 | void timer_rearm(Timer *timer);
50 | int timer_read(Timer *timer);
51 | 
52 | void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time);
53 | void timeout_unschedule(Timeout *timeout);
54 | 
55 | 


--------------------------------------------------------------------------------