├── .editorconfig
├── .github
└── workflows
│ └── ci.yml
├── .gitmodules
├── AUTHORS
├── NEWS.md
├── README.md
├── meson.build
├── meson_options.txt
└── src
├── libnacd.sym
├── meson.build
├── n-acd-bpf-fallback.c
├── n-acd-bpf.c
├── n-acd-private.h
├── n-acd-probe.c
├── n-acd.c
├── n-acd.h
├── test-api.c
├── test-bpf.c
├── test-loopback.c
├── test-twice.c
├── test-unplug.c
├── test-unused.c
├── test-veth.c
├── test.h
└── util
├── test-timer.c
├── timer.c
└── timer.h
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = lf
5 | insert_final_newline = true
6 | trim_trailing_whitespace = true
7 | charset = utf-8
8 |
9 | [*.{c,h}]
10 | indent_style = space
11 | indent_size = 8
12 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: Continuous Integration
2 |
3 | on:
4 | push:
5 | pull_request:
6 | schedule:
7 | - cron: '0 0 * * *'
8 |
9 | jobs:
10 | ci:
11 | name: CI with Default Configuration
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | #
16 | # Prepare CI
17 | #
18 | # We cannot use the github-action of the `ci-c-util` project, because we
19 | # need privileges in the container. Therefore, fetch the CI sources and
20 | # build the container manually.
21 | #
22 | - name: Fetch CI
23 | uses: actions/checkout@v2
24 | with:
25 | repository: c-util/automation
26 | ref: v1
27 | path: automation
28 | - name: Build CI
29 | working-directory: automation/src/ci-c-util
30 | run: docker build --tag ci-c-util:v1 .
31 |
32 | #
33 | # Run CI
34 | #
35 | # Take the CI image we built and run the CI with the default project
36 | # configuration. We do not use valgrind, since it falls-over with bpf(2)
37 | # syscalls.
38 | #
39 | - name: Fetch Sources
40 | uses: actions/checkout@v2
41 | with:
42 | path: source
43 | - name: Run through C-Util CI
44 | run: |
45 | docker run \
46 | --privileged \
47 | -v "$(pwd)/source:/github/workspace" \
48 | "ci-c-util:v1" \
49 | "--m32=1" \
50 | "--source=/github/workspace"
51 |
52 | ci-no-ebpf:
53 | name: CI without eBPF
54 | runs-on: ubuntu-latest
55 |
56 | steps:
57 | # See above in 'ci' job.
58 | - name: Fetch CI
59 | uses: actions/checkout@v2
60 | with:
61 | repository: c-util/automation
62 | ref: v1
63 | path: automation
64 | - name: Build CI
65 | working-directory: automation/src/ci-c-util
66 | run: docker build --tag ci-c-util:v1 .
67 |
68 | #
69 | # Run CI
70 | #
71 | # This again runs the CI, but this time disables eBPF. We do support the
72 | # legacy BPF fallback, so lets make sure we test for it.
73 | #
74 | - name: Fetch Sources
75 | uses: actions/checkout@v2
76 | with:
77 | path: source
78 | - name: Run through C-Util CI
79 | run: |
80 | docker run \
81 | --privileged \
82 | -v "$(pwd)/source:/github/workspace" \
83 | "ci-c-util:v1" \
84 | "--m32=1" \
85 | "--mesonargs=-Debpf=false" \
86 | "--source=/github/workspace"
87 |
88 | ci-valgrind:
89 | name: CI through Valgrind
90 | runs-on: ubuntu-latest
91 |
92 | steps:
93 | # See above in 'ci' job.
94 | - name: Fetch CI
95 | uses: actions/checkout@v2
96 | with:
97 | repository: c-util/automation
98 | ref: v1
99 | path: automation
100 | - name: Build CI
101 | working-directory: automation/src/ci-c-util
102 | run: docker build --tag ci-c-util:v1 .
103 |
104 | #
105 | # Run CI
106 | #
107 | # This again runs the CI, but this time through valgrind. Since some
108 | # syscalls are not implemented on x86-64 32bit compat (e.g., bpf(2)), we
109 | # disable the m32 mode.
110 | #
111 | - name: Fetch Sources
112 | uses: actions/checkout@v2
113 | with:
114 | path: source
115 | - name: Run through C-Util CI
116 | run: |
117 | docker run \
118 | --privileged \
119 | -v "$(pwd)/source:/github/workspace" \
120 | "ci-c-util:v1" \
121 | "--source=/github/workspace" \
122 | "--valgrind=1"
123 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "subprojects/c-list"]
2 | path = subprojects/c-list
3 | url = https://github.com/c-util/c-list.git
4 | [submodule "subprojects/c-siphash"]
5 | path = subprojects/c-siphash
6 | url = https://github.com/c-util/c-siphash.git
7 | [submodule "subprojects/c-rbtree"]
8 | path = subprojects/c-rbtree
9 | url = https://github.com/c-util/c-rbtree.git
10 | [submodule "subprojects/c-stdaux"]
11 | path = subprojects/c-stdaux
12 | url = https://github.com/c-util/c-stdaux.git
13 |
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | LICENSE:
2 | This project is dual-licensed under both the Apache License, Version
3 | 2.0, and the GNU Lesser General Public License, Version 2.1+.
4 |
5 | AUTHORS-ASL:
6 | Licensed under the Apache License, Version 2.0 (the "License");
7 | you may not use this file except in compliance with the License.
8 | You may obtain a copy of the License at
9 |
10 | http://www.apache.org/licenses/LICENSE-2.0
11 |
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 |
18 | AUTHORS-LGPL:
19 | This program is free software; you can redistribute it and/or modify it
20 | under the terms of the GNU Lesser General Public License as published
21 | by the Free Software Foundation; either version 2.1 of the License, or
22 | (at your option) any later version.
23 |
24 | This program is distributed in the hope that it will be useful, but
25 | WITHOUT ANY WARRANTY; without even the implied warranty of
26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 | Lesser General Public License for more details.
28 |
29 | You should have received a copy of the GNU Lesser General Public License
30 | along with this program; If not, see .
31 |
32 | COPYRIGHT: (ordered alphabetically)
33 | Copyright (C) 2015-2019 Red Hat, Inc.
34 |
35 | AUTHORS: (ordered alphabetically)
36 | Beniamino Galvani
37 | David Rheinsberg
38 | Thomas Haller
39 | Tom Gundersen
40 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # n-acd - IPv4 Address Conflict Detection
2 |
3 | ## CHANGES WITH 2:
4 |
5 | * All public destructors now include a variant that returns `void`.
6 | This was requested for easier integration with `glib` and friends.
7 | Similar to the `cleanup` variants, these variants are denoted by a
8 | single-character function-name suffix. E.g., `n_acd_freev()`
9 |
10 | * A fallback to `CLOCK_MONOTONIC` is now provided in case
11 | `CLOCK_BOOTTIME` is not supported by the kernel. Note that this is in
12 | no way signalled through the API, so if timers should follow the
13 | `BOOTTIME` rather than monotonic clock, a kernel with this clock is
14 | required.
15 |
16 | * The `c-sundry` dependency is no longer needed.
17 |
18 | * The `transport` configuration property is now mandatory for
19 | `n_acd_new()`. It defaulted to `ETHERNET` before, by mistake.
20 |
21 | * In-source documentation for the public API is now provided.
22 |
23 | Contributions from: Beniamino Galvani, David Herrmann, David
24 | Rheinsberg, Thomas Haller, Tom Gundersen
25 |
26 | - Tübingen, 2019-03-20
27 |
28 | ## CHANGES WITH 1:
29 |
30 | * Initial release of n-acd. This project implements the IPv4 Address
31 | Conflict Detection standard as defined in RFC-5227. The state machine
32 | is implemented in a shared library and provides a stable ISO-C11 API.
33 | The implementation is linux-only and relies heavily on the API
34 | behavior of recent linux kernel releases.
35 |
36 | * Compared to the pre-releases, this release supports many parallel
37 | probes on a single n-acd context. This reduces the number of
38 | allocated network resources to O(1), based on the number of running
39 | parallel probes.
40 |
41 | * The n-acd project is now dual-licensed: ASL-2.0 and LGPL-2.1+
42 |
43 | Contributions from: Beniamino Galvani, David Herrmann, Thomas Haller,
44 | Tom Gundersen
45 |
46 | - Tübingen, 2018-08-08
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | n-acd
2 | =====
3 |
4 | IPv4 Address Conflict Detection
5 |
6 | The n-acd project implements the IPv4 Address Conflict Detection standard as
7 | defined in RFC-5227. The state machine is implemented in a shared library and
8 | provides a stable ISO-C11 API. The implementation is linux-only and relies
9 | heavily on the API behavior of recent linux kernel releases.
10 |
11 | ### Project
12 |
13 | * **Website**:
14 | * **Bug Tracker**:
15 | * **Mailing-List**:
16 |
17 | ### Requirements
18 |
19 | The requirements for this project are:
20 |
21 | * `Linux kernel >= 3.19`
22 | * `libc` (e.g., `glibc >= 2.16`)
23 |
24 | At build-time, the following software is required:
25 |
26 | * `meson >= 0.41`
27 | * `pkg-config >= 0.29`
28 |
29 | ### Build
30 |
31 | The meson build-system is used for this project. Contact upstream
32 | documentation for detailed help. In most situations the following
33 | commands are sufficient to build and install from source:
34 |
35 | ```sh
36 | mkdir build
37 | cd build
38 | meson setup ..
39 | ninja
40 | meson test
41 | ninja install
42 | ```
43 |
44 | The following configuration options are available:
45 |
46 | * `ebpf`: This boolean controls whether `ebpf` features are used to improve
47 | the package filtering performance. If disabled, classic bpf will be
48 | used. This feature requires a rather recent kernel (>=3.19).
49 | Default is: true
50 |
51 | ### Repository:
52 |
53 | - **web**:
54 | - **https**: `https://github.com/nettools/n-acd.git`
55 | - **ssh**: `git@github.com:nettools/n-acd.git`
56 |
57 | ### License:
58 |
59 | - **Apache-2.0** OR **LGPL-2.1-or-later**
60 | - See AUTHORS file for details.
61 |
--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
1 | project(
2 | 'n-acd',
3 | 'c',
4 | version: '2',
5 | license: 'Apache',
6 | default_options: [
7 | 'c_std=c11',
8 | ],
9 | )
10 | project_description = 'IPv4 Address Conflict Detection'
11 |
12 | add_project_arguments('-D_GNU_SOURCE', language: 'c')
13 | mod_pkgconfig = import('pkgconfig')
14 |
15 | sub_clist = subproject('c-list')
16 | sub_crbtree = subproject('c-rbtree')
17 | sub_csiphash = subproject('c-siphash')
18 | sub_cstdaux = subproject('c-stdaux')
19 |
20 | dep_clist = sub_clist.get_variable('libclist_dep')
21 | dep_crbtree = sub_crbtree.get_variable('libcrbtree_dep')
22 | dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep')
23 | dep_cstdaux = sub_cstdaux.get_variable('libcstdaux_dep')
24 |
25 | use_ebpf = get_option('ebpf')
26 |
27 | subdir('src')
28 |
--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
1 | option('ebpf', type: 'boolean', value: true, description: 'Enable eBPF packet filtering')
2 |
--------------------------------------------------------------------------------
/src/libnacd.sym:
--------------------------------------------------------------------------------
1 | LIBNACD_2 {
2 | global:
3 | n_acd_config_new;
4 | n_acd_config_free;
5 | n_acd_config_set_ifindex;
6 | n_acd_config_set_transport;
7 | n_acd_config_set_mac;
8 |
9 | n_acd_probe_config_new;
10 | n_acd_probe_config_free;
11 | n_acd_probe_config_set_ip;
12 | n_acd_probe_config_set_timeout;
13 |
14 | n_acd_new;
15 | n_acd_ref;
16 | n_acd_unref;
17 | n_acd_get_fd;
18 | n_acd_dispatch;
19 | n_acd_pop_event;
20 | n_acd_probe;
21 |
22 | n_acd_probe_free;
23 | n_acd_probe_set_userdata;
24 | n_acd_probe_get_userdata;
25 | n_acd_probe_announce;
26 | local:
27 | *;
28 | };
29 |
--------------------------------------------------------------------------------
/src/meson.build:
--------------------------------------------------------------------------------
1 | #
2 | # target: libnacd.so
3 | #
4 |
5 | libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym')
6 |
7 | libnacd_deps = [
8 | dep_clist,
9 | dep_crbtree,
10 | dep_csiphash,
11 | dep_cstdaux,
12 | ]
13 |
14 | libnacd_sources = [
15 | 'n-acd.c',
16 | 'n-acd-probe.c',
17 | 'util/timer.c',
18 | ]
19 |
20 | if use_ebpf
21 | libnacd_sources += [
22 | 'n-acd-bpf.c',
23 | ]
24 | else
25 | libnacd_sources += [
26 | 'n-acd-bpf-fallback.c',
27 | ]
28 | endif
29 |
30 | libnacd_private = static_library(
31 | 'nacd-private',
32 | libnacd_sources,
33 | c_args: [
34 | '-fvisibility=hidden',
35 | '-fno-common'
36 | ],
37 | dependencies: libnacd_deps,
38 | pic: true,
39 | )
40 |
41 | libnacd_shared = shared_library(
42 | 'nacd',
43 | objects: libnacd_private.extract_all_objects(),
44 | dependencies: libnacd_deps,
45 | install: not meson.is_subproject(),
46 | soversion: 0,
47 | link_depends: libnacd_symfile,
48 | link_args: [
49 | '-Wl,--no-undefined',
50 | '-Wl,--version-script=@0@'.format(libnacd_symfile)
51 | ],
52 | )
53 |
54 | libnacd_dep = declare_dependency(
55 | include_directories: include_directories('.'),
56 | link_with: libnacd_private,
57 | dependencies: libnacd_deps,
58 | version: meson.project_version(),
59 | )
60 |
61 | if not meson.is_subproject()
62 | install_headers('n-acd.h')
63 |
64 | mod_pkgconfig.generate(
65 | libraries: libnacd_shared,
66 | version: meson.project_version(),
67 | name: 'libnacd',
68 | filebase: 'libnacd',
69 | description: project_description,
70 | )
71 | endif
72 |
73 | #
74 | # target: test-*
75 | #
76 |
77 | test_api = executable('test-api', ['test-api.c'], link_with: libnacd_shared)
78 | test('API Symbol Visibility', test_api)
79 |
80 | if use_ebpf
81 | test_bpf = executable('test-bpf', ['test-bpf.c'], dependencies: libnacd_dep)
82 | test('eBPF socket filtering', test_bpf)
83 | endif
84 |
85 | test_loopback = executable('test-loopback', ['test-loopback.c'], dependencies: libnacd_dep)
86 | test('Echo Suppression via Loopback', test_loopback)
87 |
88 | test_timer = executable('test-timer', ['util/test-timer.c'], dependencies: libnacd_dep)
89 | test('Timer helper', test_timer)
90 |
91 | #test_unplug = executable('test-unplug', ['test-unplug.c'], dependencies: libnacd_dep)
92 | #test('Async Interface Hotplug', test_unplug)
93 |
94 | test_veth = executable('test-veth', ['test-veth.c'], dependencies: libnacd_dep)
95 | test('Parallel ACD instances', test_veth)
96 |
--------------------------------------------------------------------------------
/src/n-acd-bpf-fallback.c:
--------------------------------------------------------------------------------
1 | /*
2 | * A noop implementation of eBPF filter for IPv4 Address Conflict Detection
3 | *
4 | * These are a collection of dummy functions that have no effect, but allows
5 | * n-acd to compile without eBPF support.
6 | *
7 | * See n-acd-bpf.c for documentation.
8 | */
9 |
10 | #include
11 | #include
12 | #include "n-acd-private.h"
13 |
14 | int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
15 | *mapfdp = -1;
16 | return 0;
17 | }
18 |
19 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
20 | return 0;
21 | }
22 |
23 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
24 | return 0;
25 | }
26 |
27 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
28 | *progfdp = -1;
29 | return 0;
30 | }
31 |
--------------------------------------------------------------------------------
/src/n-acd-bpf.c:
--------------------------------------------------------------------------------
1 | /*
2 | * eBPF filter for IPv4 Address Conflict Detection
3 | *
4 | * An eBPF map and an eBPF program are provided. The map contains all the
5 | * addresses address conflict detection is performed on, and the program
6 | * filters out all packets except exactly the packets relevant to the ACD
7 | * protocol on the addresses currently in the map.
8 | *
9 | * Note that userspace still has to filter the incoming packets, as filter
10 | * are applied when packets are queued on the socket, not when userspace
11 | * receives them. It is therefore possible to receive packets about addresses
12 | * that have already been removed.
13 | */
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include "n-acd-private.h"
27 |
28 | #define BPF_LD_ABS(SIZE, IMM) \
29 | ((struct bpf_insn) { \
30 | .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
31 | .dst_reg = 0, \
32 | .src_reg = 0, \
33 | .off = 0, \
34 | .imm = IMM, \
35 | })
36 |
37 | #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
38 | ((struct bpf_insn) { \
39 | .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
40 | .dst_reg = DST, \
41 | .src_reg = SRC, \
42 | .off = OFF, \
43 | .imm = 0, \
44 | })
45 |
46 | #define BPF_LD_MAP_FD(DST, MAP_FD) \
47 | ((struct bpf_insn) { \
48 | .code = BPF_LD | BPF_DW | BPF_IMM, \
49 | .dst_reg = DST, \
50 | .src_reg = BPF_PSEUDO_MAP_FD, \
51 | .off = 0, \
52 | .imm = (__u32) (MAP_FD), \
53 | }), \
54 | ((struct bpf_insn) { \
55 | .code = 0, /* zero is reserved opcode */ \
56 | .dst_reg = 0, \
57 | .src_reg = 0, \
58 | .off = 0, \
59 | .imm = ((__u64) (MAP_FD)) >> 32, \
60 | })
61 |
62 | #define BPF_ALU_REG(OP, DST, SRC) \
63 | ((struct bpf_insn) { \
64 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
65 | .dst_reg = DST, \
66 | .src_reg = SRC, \
67 | .off = 0, \
68 | .imm = 0, \
69 | })
70 |
71 | #define BPF_ALU_IMM(OP, DST, IMM) \
72 | ((struct bpf_insn) { \
73 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
74 | .dst_reg = DST, \
75 | .src_reg = 0, \
76 | .off = 0, \
77 | .imm = IMM, \
78 | })
79 |
80 | #define BPF_MOV_REG(DST, SRC) \
81 | ((struct bpf_insn) { \
82 | .code = BPF_ALU64 | BPF_MOV | BPF_X, \
83 | .dst_reg = DST, \
84 | .src_reg = SRC, \
85 | .off = 0, \
86 | .imm = 0, \
87 | })
88 |
89 | #define BPF_MOV_IMM(DST, IMM) \
90 | ((struct bpf_insn) { \
91 | .code = BPF_ALU64 | BPF_MOV | BPF_K, \
92 | .dst_reg = DST, \
93 | .src_reg = 0, \
94 | .off = 0, \
95 | .imm = IMM, \
96 | })
97 |
98 | #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
99 | ((struct bpf_insn) { \
100 | .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
101 | .dst_reg = DST, \
102 | .src_reg = SRC, \
103 | .off = OFF, \
104 | .imm = 0, \
105 | })
106 |
107 | #define BPF_JMP_REG(OP, DST, SRC, OFF) \
108 | ((struct bpf_insn) { \
109 | .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
110 | .dst_reg = DST, \
111 | .src_reg = SRC, \
112 | .off = OFF, \
113 | .imm = 0, \
114 | })
115 |
116 | #define BPF_JMP_IMM(OP, DST, IMM, OFF) \
117 | ((struct bpf_insn) { \
118 | .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
119 | .dst_reg = DST, \
120 | .src_reg = 0, \
121 | .off = OFF, \
122 | .imm = IMM, \
123 | })
124 |
125 | #define BPF_EMIT_CALL(FUNC) \
126 | ((struct bpf_insn) { \
127 | .code = BPF_JMP | BPF_CALL, \
128 | .dst_reg = 0, \
129 | .src_reg = 0, \
130 | .off = 0, \
131 | .imm = FUNC, \
132 | })
133 |
134 | #define BPF_EXIT_INSN() \
135 | ((struct bpf_insn) { \
136 | .code = BPF_JMP | BPF_EXIT, \
137 | .dst_reg = 0, \
138 | .src_reg = 0, \
139 | .off = 0, \
140 | .imm = 0, \
141 | })
142 |
143 | static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) {
144 | return (int)syscall(__NR_bpf, cmd, attr, size);
145 | }
146 |
147 | int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
148 | union bpf_attr attr;
149 | int mapfd;
150 |
151 | memset(&attr, 0, sizeof(attr));
152 | attr = (union bpf_attr){
153 | .map_type = BPF_MAP_TYPE_HASH,
154 | .key_size = sizeof(uint32_t),
155 | .value_size = sizeof(uint8_t), /* values are never used, but must be set */
156 | .max_entries = max_entries,
157 | };
158 |
159 | mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
160 | if (mapfd < 0)
161 | return -errno;
162 |
163 | *mapfdp = mapfd;
164 | return 0;
165 | }
166 |
167 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
168 | union bpf_attr attr;
169 | uint32_t addr = be32toh(addrp->s_addr);
170 | uint8_t _dummy = 0;
171 | int r;
172 |
173 | memset(&attr, 0, sizeof(attr));
174 | attr = (union bpf_attr){
175 | .map_fd = mapfd,
176 | .key = (uint64_t)(unsigned long)&addr,
177 | .value = (uint64_t)(unsigned long)&_dummy,
178 | .flags = BPF_NOEXIST,
179 | };
180 |
181 | r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
182 | if (r < 0)
183 | return -errno;
184 |
185 | return 0;
186 | }
187 |
188 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
189 | uint32_t addr = be32toh(addrp->s_addr);
190 | union bpf_attr attr;
191 | int r;
192 |
193 | memset(&attr, 0, sizeof(attr));
194 | attr = (union bpf_attr){
195 | .map_fd = mapfd,
196 | .key = (uint64_t)(unsigned long)&addr,
197 | };
198 |
199 | r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
200 | if (r < 0)
201 | return -errno;
202 |
203 | return 0;
204 | }
205 |
206 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
207 | const union {
208 | uint8_t u8[6];
209 | uint16_t u16[3];
210 | uint32_t u32[1];
211 | } mac = {
212 | .u8 = {
213 | macp->ether_addr_octet[0],
214 | macp->ether_addr_octet[1],
215 | macp->ether_addr_octet[2],
216 | macp->ether_addr_octet[3],
217 | macp->ether_addr_octet[4],
218 | macp->ether_addr_octet[5],
219 | },
220 | };
221 | struct bpf_insn prog[] = {
222 | /* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */
223 | BPF_MOV_REG(6, 1), /* r6 = r1 */
224 |
225 | /* drop the packet if it is too short */
226 | BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)), /* r0 = skb->len */
227 | BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2), /* if (r0 >= sizeof(ether_arp)) skip 2 */
228 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
229 | BPF_EXIT_INSN(), /* return */
230 |
231 | /* drop the packet if the header is not as expected */
232 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)), /* r0 = header type */
233 | BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2), /* if (r0 == ethernet) skip 2 */
234 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
235 | BPF_EXIT_INSN(), /* return */
236 |
237 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)), /* r0 = protocol */
238 | BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2), /* if (r0 == IP) skip 2 */
239 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
240 | BPF_EXIT_INSN(), /* return */
241 |
242 | BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)), /* r0 = hw addr length */
243 | BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2), /* if (r0 == sizeof(ether_addr)) skip 2 */
244 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
245 | BPF_EXIT_INSN(), /* return */
246 |
247 | BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)), /* r0 = protocol addr length */
248 | BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2), /* if (r0 == sizeof(in_addr)) skip 2 */
249 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
250 | BPF_EXIT_INSN(), /* return */
251 |
252 | /* drop packets from our own mac address */
253 | BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)), /* r0 = first four bytes of packet mac address */
254 | BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4), /* if (r0 != first four bytes of our mac address) skip 4 */
255 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4), /* r0 = last two bytes of packet mac address */
256 | BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2), /* if (r0 != last two bytes of our mac address) skip 2 */
257 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
258 | BPF_EXIT_INSN(), /* return */
259 |
260 | /*
261 | * We listen for two kinds of packets:
262 | * Conflicts)
263 | * These are requests or replies with the sender address not set to INADDR_ANY. The
264 | * conflicted address is the sender address, remember this in r7.
265 | * Probes)
266 | * These are requests with the sender address set to INADDR_ANY. The probed address
267 | * is the target address, remember this in r7.
268 | * Any other packets are dropped.
269 | */
270 | BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)), /* r0 = sender ip address */
271 | BPF_JMP_IMM(BPF_JEQ, 0, 0, 7), /* if (r0 == 0) skip 7 */
272 | BPF_MOV_REG(7, 0), /* r7 = r0 */
273 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
274 | BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3), /* if (r0 == request) skip 3 */
275 | BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2), /* if (r0 == reply) skip 2 */
276 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
277 | BPF_EXIT_INSN(), /* return */
278 | BPF_JMP_IMM(BPF_JA, 0, 0, 6), /* skip 6 */
279 | BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)), /* r0 = target ip address */
280 | BPF_MOV_REG(7, 0), /* r7 = r0 */
281 | BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
282 | BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2), /* if (r0 == request) skip 2 */
283 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
284 | BPF_EXIT_INSN(), /* return */
285 |
286 | /* check if the probe or conflict is for an address we are monitoring */
287 | BPF_STX_MEM(BPF_W, 10, 7, -4), /* *(uint32_t*)fp - 4 = r7 */
288 | BPF_MOV_REG(2, 10), /* r2 = fp */
289 | BPF_ALU_IMM(BPF_ADD, 2, -4), /* r2 -= 4 */
290 | BPF_LD_MAP_FD(1, mapfd), /* r1 = mapfd */
291 | BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), /* r0 = map_lookup_elem(r1, r2) */
292 | BPF_JMP_IMM(BPF_JNE, 0, 0, 2), /* if (r0 != NULL) skip 2 */
293 | BPF_MOV_IMM(0, 0), /* r0 = 0 */
294 | BPF_EXIT_INSN(), /* return */
295 |
296 | /* return exactly the packet length*/
297 | BPF_MOV_IMM(0, sizeof(struct ether_arp)), /* r0 = sizeof(struct ether_arp) */
298 | BPF_EXIT_INSN(), /* return */
299 | };
300 | union bpf_attr attr;
301 | int progfd;
302 |
303 | memset(&attr, 0, sizeof(attr));
304 | attr = (union bpf_attr){
305 | .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
306 | .insns = (uint64_t)(unsigned long)prog,
307 | .insn_cnt = sizeof(prog) / sizeof(*prog),
308 | .license = (uint64_t)(unsigned long)"ASL",
309 | };
310 |
311 | progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
312 | if (progfd < 0)
313 | return -errno;
314 |
315 | *progfdp = progfd;
316 | return 0;
317 | }
318 |
--------------------------------------------------------------------------------
/src/n-acd-private.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "util/timer.h"
13 | #include "n-acd.h"
14 |
15 | typedef struct NAcdEventNode NAcdEventNode;
16 |
17 | /* This augments the error-codes with internal ones that are never exposed. */
18 | enum {
19 | _N_ACD_INTERNAL = _N_ACD_E_N,
20 |
21 | N_ACD_E_DROPPED,
22 | };
23 |
24 | enum {
25 | N_ACD_PROBE_STATE_PROBING,
26 | N_ACD_PROBE_STATE_CONFIGURING,
27 | N_ACD_PROBE_STATE_ANNOUNCING,
28 | N_ACD_PROBE_STATE_FAILED,
29 | };
30 |
31 | struct NAcdConfig {
32 | int ifindex;
33 | unsigned int transport;
34 | uint8_t mac[ETH_ALEN];
35 | size_t n_mac;
36 | };
37 |
38 | #define N_ACD_CONFIG_NULL(_x) { \
39 | .transport = _N_ACD_TRANSPORT_N, \
40 | }
41 |
42 | struct NAcdProbeConfig {
43 | struct in_addr ip;
44 | uint64_t timeout_msecs;
45 | };
46 |
47 | #define N_ACD_PROBE_CONFIG_NULL(_x) { \
48 | .timeout_msecs = N_ACD_TIMEOUT_RFC5227, \
49 | }
50 |
51 | struct NAcdEventNode {
52 | CList acd_link;
53 | CList probe_link;
54 | NAcdEvent event;
55 | uint8_t sender[ETH_ALEN];
56 | bool is_public : 1;
57 | };
58 |
59 | #define N_ACD_EVENT_NODE_NULL(_x) { \
60 | .acd_link = C_LIST_INIT((_x).acd_link), \
61 | .probe_link = C_LIST_INIT((_x).probe_link), \
62 | }
63 |
64 | struct NAcd {
65 | unsigned long n_refs;
66 | unsigned int seed;
67 | int fd_epoll;
68 | int fd_socket;
69 | CRBTree ip_tree;
70 | CList event_list;
71 | Timer timer;
72 |
73 | /* BPF map */
74 | int fd_bpf_map;
75 | size_t n_bpf_map;
76 | size_t max_bpf_map;
77 |
78 | /* configuration */
79 | int ifindex;
80 | uint8_t mac[ETH_ALEN];
81 |
82 | /* flags */
83 | bool preempted : 1;
84 | };
85 |
86 | #define N_ACD_NULL(_x) { \
87 | .n_refs = 1, \
88 | .fd_epoll = -1, \
89 | .fd_socket = -1, \
90 | .ip_tree = C_RBTREE_INIT, \
91 | .event_list = C_LIST_INIT((_x).event_list), \
92 | .timer = TIMER_NULL((_x).timer), \
93 | .fd_bpf_map = -1, \
94 | }
95 |
96 | struct NAcdProbe {
97 | NAcd *acd;
98 | CRBNode ip_node;
99 | CList event_list;
100 | Timeout timeout;
101 |
102 | /* configuration */
103 | struct in_addr ip;
104 | uint64_t timeout_multiplier;
105 | void *userdata;
106 |
107 | /* state */
108 | unsigned int state;
109 | unsigned int n_iteration;
110 | unsigned int defend;
111 | uint64_t last_defend;
112 | };
113 |
114 | #define N_ACD_PROBE_NULL(_x) { \
115 | .ip_node = C_RBNODE_INIT((_x).ip_node), \
116 | .event_list = C_LIST_INIT((_x).event_list), \
117 | .timeout = TIMEOUT_INIT((_x).timeout), \
118 | .state = N_ACD_PROBE_STATE_PROBING, \
119 | .defend = N_ACD_DEFEND_NEVER, \
120 | }
121 |
122 | /* events */
123 |
124 | int n_acd_event_node_new(NAcdEventNode **nodep);
125 | NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node);
126 |
127 | /* contexts */
128 |
129 | void n_acd_remember(NAcd *acd, uint64_t now, bool success);
130 | int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event);
131 | int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa);
132 | int n_acd_ensure_bpf_map_space(NAcd *acd);
133 |
134 | /* probes */
135 |
136 | int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config);
137 | int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event);
138 | int n_acd_probe_handle_timeout(NAcdProbe *probe);
139 | int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict);
140 |
141 | /* eBPF */
142 |
143 | int n_acd_bpf_map_create(int *mapfdp, size_t max_elements);
144 | int n_acd_bpf_map_add(int mapfd, struct in_addr *addr);
145 | int n_acd_bpf_map_remove(int mapfd, struct in_addr *addr);
146 |
147 | int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *mac);
148 |
149 | /* inline helpers */
150 |
151 | static inline void n_acd_event_node_freep(NAcdEventNode **node) {
152 | if (*node)
153 | n_acd_event_node_free(*node);
154 | }
155 |
--------------------------------------------------------------------------------
/src/n-acd-probe.c:
--------------------------------------------------------------------------------
1 | /*
2 | * IPv4 Address Conflict Detection
3 | *
4 | * This file implements the probe object. A probe is basically the
5 | * state-machine of a single ACD run. It takes an address to probe for, checks
6 | * for conflicts and then defends it once configured.
7 | */
8 |
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include "n-acd.h"
24 | #include "n-acd-private.h"
25 |
26 | /*
27 | * These parameters and timing intervals are specified in RFC-5227. The
28 | * original values are:
29 | *
30 | * PROBE_NUM 3
31 | * PROBE_WAIT 1s
32 | * PROBE_MIN 1s
33 | * PROBE_MAX 3s
34 | * ANNOUNCE_NUM 3
35 | * ANNOUNCE_WAIT 2s
36 | * ANNOUNCE_INTERVAL 2s
37 | * MAX_CONFLICTS 10
38 | * RATE_LIMIT_INTERVAL 60s
39 | * DEFEND_INTERVAL 10s
40 | *
41 | * If we assume a best-case and worst-case scenario for non-conflicted runs, we
42 | * end up with a runtime between 4s and 9s to finish the probe. Then it still
43 | * takes a fixed 4s to finish the announcements.
44 | *
45 | * RFC 5227 section 1.1:
46 | * [...] (Note that the values listed here are fixed constants; they are
47 | * not intended to be modifiable by implementers, operators, or end users.
48 | * These constants are given symbolic names here to facilitate the writing
49 | * of future standards that may want to reference this document with
50 | * different values for these named constants; however, at the present time
51 | * no such future standards exist.) [...]
52 | *
53 | * Unfortunately, no-one ever stepped up to write a "future standard" to revise
54 | * the timings. A 9s timeout for successful link setups is not acceptable today.
55 | * Hence, we will just go forward and ignore the proposed values. On both
56 | * wired and wireless local links round-trip latencies of below 3ms are common.
57 | * We require the caller to set a timeout multiplier, where 1 corresponds to a
58 | * total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier
59 | * of about 100 should be a reasonable default. To comply with the RFC select a
60 | * multiplier of 9000.
61 | */
62 | #define N_ACD_RFC_PROBE_NUM (3)
63 | #define N_ACD_RFC_PROBE_WAIT_NSEC (UINT64_C(111111)) /* 1/9 ms */
64 | #define N_ACD_RFC_PROBE_MIN_NSEC (UINT64_C(111111)) /* 1/9 ms */
65 | #define N_ACD_RFC_PROBE_MAX_NSEC (UINT64_C(333333)) /* 3/9 ms */
66 | #define N_ACD_RFC_ANNOUNCE_NUM (3)
67 | #define N_ACD_RFC_ANNOUNCE_WAIT_NSEC (UINT64_C(222222)) /* 2/9 ms */
68 | #define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC (UINT64_C(222222)) /* 2/9 ms */
69 | #define N_ACD_RFC_MAX_CONFLICTS (10)
70 | #define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC (UINT64_C(60000000000)) /* 60s */
71 | #define N_ACD_RFC_DEFEND_INTERVAL_NSEC (UINT64_C(10000000000)) /* 10s */
72 |
73 | /**
74 | * n_acd_probe_config_new() - create probe configuration
75 | * @configp: output argument for new probe configuration
76 | *
77 | * This creates a new probe configuration. It will be returned in @configp to
78 | * the caller, which upon return fully owns the object.
79 | *
80 | * A probe configuration collects parameters for probes. It never validates the
81 | * input, but this is left to the consumer of the configuration to do.
82 | *
83 | * Return: 0 on success, negative error code on failure.
84 | */
85 | _c_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) {
86 | _c_cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL;
87 |
88 | config = malloc(sizeof(*config));
89 | if (!config)
90 | return -ENOMEM;
91 |
92 | *config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config);
93 |
94 | *configp = config;
95 | config = NULL;
96 | return 0;
97 | }
98 |
99 | /**
100 | * n_acd_probe_config_free() - destroy probe configuration
101 | * @config: configuration to operate on, or NULL
102 | *
103 | * This destroys the probe configuration and all associated objects. If @config
104 | * is NULL, this is a no-op.
105 | *
106 | * Return: NULL is returned.
107 | */
108 | _c_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) {
109 | if (!config)
110 | return NULL;
111 |
112 | free(config);
113 |
114 | return NULL;
115 | }
116 |
117 | /**
118 | * n_acd_probe_config_set_ip() - set ip property
119 | * @config: configuration to operate on
120 | * @ip: ip to set
121 | *
122 | * This sets the IP property to the value `ip`. The address is copied into the
123 | * configuration object. No validation is performed.
124 | *
125 | * The IP property selects the IP address that a probe checks for. It is the
126 | * caller's responsibility to guarantee the address is valid and can be used.
127 | */
128 | _c_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) {
129 | config->ip = ip;
130 | }
131 |
132 | /**
133 | * n_acd_probe_config_set_timeout() - set timeout property
134 | * @config: configuration to operate on
135 | * @msecs: timeout to set, in milliseconds
136 | *
137 | * This sets the timeout to use for a conflict detection probe. The
138 | * specification default is provided as `N_ACD_TIMEOUT_RFC5227` and corresponds
139 | * to 9 seconds.
140 | *
141 | * If set to 0, conflict detection is skipped and the address is immediately
142 | * advertised and defended.
143 | *
144 | * Depending on the transport used, the API user should select a suitable
145 | * timeout. Since `ACD` only operates on the link layer, timeouts in the
146 | * hundreds of milliseconds range should be more than enough for any modern
147 | * network. Note that increasing this value directly affects the time it takes
148 | * to connect to a network, since an address should not be used unless conflict
149 | * detection finishes.
150 | *
151 | * Using the specification default is **discouraged**. It is way too slow and
152 | * not appropriate for modern networks.
153 | *
154 | * Default value is `N_ACD_TIMEOUT_RFC5227`.
155 | */
156 | _c_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) {
157 | config->timeout_msecs = msecs;
158 | }
159 |
160 | static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) {
161 | uint64_t n_time;
162 |
163 | timer_now(&probe->acd->timer, &n_time);
164 | n_time += n_timeout;
165 |
166 | /*
167 | * ACD specifies jitter values to reduce packet storms on the local
168 | * link. This call accepts the maximum relative jitter value in
169 | * nanoseconds as @n_jitter. We then use rand_r(3p) to get a
170 | * pseudo-random jitter on top of the real timeout given as @n_timeout.
171 | */
172 | if (n_jitter) {
173 | uint64_t random;
174 |
175 | random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed);
176 | n_time += random % n_jitter;
177 | }
178 |
179 | timeout_schedule(&probe->timeout, &probe->acd->timer, n_time);
180 | }
181 |
182 | static void n_acd_probe_unschedule(NAcdProbe *probe) {
183 | timeout_unschedule(&probe->timeout);
184 | }
185 |
186 | static bool n_acd_probe_is_unique(NAcdProbe *probe) {
187 | NAcdProbe *sibling;
188 |
189 | if (!c_rbnode_is_linked(&probe->ip_node))
190 | return false;
191 |
192 | sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node);
193 | if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
194 | return false;
195 |
196 | sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node);
197 | if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
198 | return false;
199 |
200 | return true;
201 | }
202 |
203 | static int n_acd_probe_link(NAcdProbe *probe) {
204 | int r;
205 |
206 | /*
207 | * Make sure the kernel bpf map has space for at least one more
208 | * entry.
209 | */
210 | r = n_acd_ensure_bpf_map_space(probe->acd);
211 | if (r)
212 | return r;
213 |
214 | /*
215 | * Link entry into context, indexed by its IP. Note that we allow
216 | * duplicates just fine. It is up to you to decide whether to avoid
217 | * duplicates, if you don't want them. Duplicates on the same context
218 | * do not conflict with each other, though.
219 | */
220 | {
221 | CRBNode **slot, *parent;
222 | NAcdProbe *other;
223 |
224 | slot = &probe->acd->ip_tree.root;
225 | parent = NULL;
226 | while (*slot) {
227 | other = c_rbnode_entry(*slot, NAcdProbe, ip_node);
228 | parent = *slot;
229 | if (probe->ip.s_addr < other->ip.s_addr)
230 | slot = &(*slot)->left;
231 | else
232 | slot = &(*slot)->right;
233 | }
234 |
235 | c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node);
236 | }
237 |
238 | /*
239 | * Add the ip address to the map, if it is not already there.
240 | */
241 | if (n_acd_probe_is_unique(probe)) {
242 | r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip);
243 | if (r) {
244 | /*
245 | * Make sure the IP address is linked in userspace iff
246 | * it is linked in the kernel.
247 | */
248 | c_rbnode_unlink(&probe->ip_node);
249 | return r;
250 | }
251 | ++probe->acd->n_bpf_map;
252 | }
253 |
254 | return 0;
255 | }
256 |
257 | static void n_acd_probe_unlink(NAcdProbe *probe) {
258 | int r;
259 |
260 | /*
261 | * If this is the only probe for a given IP, remove the IP from the
262 | * kernel BPF map.
263 | */
264 | if (n_acd_probe_is_unique(probe)) {
265 | r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip);
266 | c_assert(r >= 0);
267 | --probe->acd->n_bpf_map;
268 | }
269 | c_rbnode_unlink(&probe->ip_node);
270 | }
271 |
272 | int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) {
273 | _c_cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL;
274 | int r;
275 |
276 | if (!config->ip.s_addr)
277 | return N_ACD_E_INVALID_ARGUMENT;
278 |
279 | probe = malloc(sizeof(*probe));
280 | if (!probe)
281 | return -ENOMEM;
282 |
283 | *probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe);
284 | probe->acd = n_acd_ref(acd);
285 | probe->ip = config->ip;
286 |
287 | /*
288 | * We use the provided timeout-length as multiplier for all our
289 | * timeouts. The provided timeout defines the maximum length of an
290 | * entire probe-interval until the first announcement. Given the
291 | * spec-provided parameters, this ends up as:
292 | *
293 | * PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT
294 | * = 1s + 3s + 3s + 2s
295 | * = 9s
296 | *
297 | * Hence, the default value for this timeout is 9000ms, which just
298 | * ends up matching the spec-provided values.
299 | *
300 | * What we now semantically do is divide this timeout by 1ns/1000000.
301 | * This first turns it into nanoseconds, then strips the unit by
302 | * turning it into a multiplier. However, rather than performing the
303 | * division here, we multiplier all our timeouts by 1000000 statically
304 | * at compile time. Therefore, we can use the user-provided timeout as
305 | * unmodified multiplier. No conversion necessary.
306 | */
307 | probe->timeout_multiplier = config->timeout_msecs;
308 |
309 | r = n_acd_probe_link(probe);
310 | if (r)
311 | return r;
312 |
313 | /*
314 | * Now that everything is set up, we have to send the first probe. This
315 | * is done after ~PROBE_WAIT seconds, hence we schedule our timer.
316 | * In case no timeout-multiplier is set, we pretend we already sent all
317 | * probes successfully and schedule the timer so we proceed with the
318 | * announcements. We must schedule a fake timer there, since we are not
319 | * allowed to advance the state machine outside of n_acd_dispatch().
320 | */
321 | if (probe->timeout_multiplier) {
322 | probe->n_iteration = 0;
323 | n_acd_probe_schedule(probe,
324 | 0,
325 | probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC);
326 | } else {
327 | probe->n_iteration = N_ACD_RFC_PROBE_NUM;
328 | n_acd_probe_schedule(probe, 0, 0);
329 | }
330 |
331 | *probep = probe;
332 | probe = NULL;
333 | return 0;
334 | }
335 |
336 | /**
337 | * n_acd_probe_free() - destroy a probe
338 | * @probe: probe to operate on, or NULL
339 | *
340 | * This destroys the probe specified by @probe. All operations are immediately
341 | * ceded and all associated objects are released.
342 | *
343 | * If @probe is NULL, this is a no-op.
344 | *
345 | * This function will flush all events associated with @probe from the event
346 | * queue. That is, no events will be returned for this @probe anymore.
347 | *
348 | * Return: NULL is returned.
349 | */
350 | _c_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) {
351 | NAcdEventNode *node, *t_node;
352 |
353 | if (!probe)
354 | return NULL;
355 |
356 | c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link)
357 | n_acd_event_node_free(node);
358 |
359 | n_acd_probe_unschedule(probe);
360 | n_acd_probe_unlink(probe);
361 | probe->acd = n_acd_unref(probe->acd);
362 | free(probe);
363 |
364 | return NULL;
365 | }
366 |
367 | int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) {
368 | _c_cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL;
369 | int r;
370 |
371 | r = n_acd_raise(probe->acd, &node, event);
372 | if (r)
373 | return r;
374 |
375 | switch (event) {
376 | case N_ACD_EVENT_READY:
377 | node->event.ready.probe = probe;
378 | break;
379 | case N_ACD_EVENT_USED:
380 | node->event.used.probe = probe;
381 | break;
382 | case N_ACD_EVENT_DEFENDED:
383 | node->event.defended.probe = probe;
384 | break;
385 | case N_ACD_EVENT_CONFLICT:
386 | node->event.conflict.probe = probe;
387 | break;
388 | default:
389 | c_assert(0);
390 | return -ENOTRECOVERABLE;
391 | }
392 |
393 | c_list_link_tail(&probe->event_list, &node->probe_link);
394 |
395 | if (nodep)
396 | *nodep = node;
397 | node = NULL;
398 | return 0;
399 | }
400 |
401 | int n_acd_probe_handle_timeout(NAcdProbe *probe) {
402 | int r;
403 |
404 | switch (probe->state) {
405 | case N_ACD_PROBE_STATE_PROBING:
406 | /*
407 | * We are still PROBING. We send 3 probes with a random timeout
408 | * scheduled between each. If, after a fixed timeout, we did
409 | * not receive any conflict we consider the probing successful.
410 | */
411 | if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) {
412 | /*
413 | * We have not sent all 3 probes, yet. A timer fired,
414 | * so we are ready to send the next probe. If this is
415 | * the third probe, schedule a timer for ANNOUNCE_WAIT
416 | * to give other peers a chance to answer. If this is
417 | * not the third probe, wait between PROBE_MIN and
418 | * PROBE_MAX for the next probe.
419 | */
420 |
421 | r = n_acd_send(probe->acd, &probe->ip, NULL);
422 | if (r) {
423 | if (r != N_ACD_E_DROPPED)
424 | return r;
425 |
426 | /*
427 | * Packet was dropped, and we know about it. It
428 | * never reached the network. Reasons are
429 | * manifold, and n_acd_send() raises events if
430 | * necessary.
431 | * From a probe-perspective, we simply pretend
432 | * we never sent the probe and schedule a
433 | * timeout for the next probe, effectively
434 | * doubling a single probe-interval.
435 | */
436 | } else {
437 | /* Successfully sent, so advance counter. */
438 | ++probe->n_iteration;
439 | }
440 |
441 | if (probe->n_iteration < N_ACD_RFC_PROBE_NUM)
442 | n_acd_probe_schedule(probe,
443 | probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC,
444 | probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC));
445 | else
446 | n_acd_probe_schedule(probe,
447 | probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC,
448 | 0);
449 | } else {
450 | /*
451 | * All 3 probes succeeded and we waited enough to
452 | * consider this address usable by now. Do not announce
453 | * the address, yet. We must first give the caller a
454 | * chance to configure the address (so they can answer
455 | * ARP requests), before announcing it.
456 | */
457 | r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY);
458 | if (r)
459 | return r;
460 |
461 | probe->state = N_ACD_PROBE_STATE_CONFIGURING;
462 | }
463 |
464 | break;
465 |
466 | case N_ACD_PROBE_STATE_ANNOUNCING:
467 | /*
468 | * We are ANNOUNCING, meaning the caller configured the address
469 | * on the interface and is actively using it. We send 3
470 | * announcements out, in a short interval, and then just
471 | * perform passive conflict detection.
472 | * Note that once all 3 announcements are sent, we no longer
473 | * schedule a timer, so this part should not trigger, anymore.
474 | */
475 |
476 | r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
477 | if (r) {
478 | if (r != N_ACD_E_DROPPED)
479 | return r;
480 |
481 | /*
482 | * See above in STATE_PROBING for details. We know the
483 | * packet was never sent, so we simply try again after
484 | * extending the timer.
485 | */
486 | } else {
487 | /* Successfully sent, so advance counter. */
488 | ++probe->n_iteration;
489 | }
490 |
491 | if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
492 | /*
493 | * Announcements are always scheduled according to the
494 | * time-intervals specified in the spec. We always use
495 | * the RFC5227-mandated multiplier.
496 | * If you reconsider this, note that timeout_multiplier
497 | * might be 0 here.
498 | */
499 | n_acd_probe_schedule(probe,
500 | N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC,
501 | 0);
502 | }
503 |
504 | break;
505 |
506 | case N_ACD_PROBE_STATE_CONFIGURING:
507 | case N_ACD_PROBE_STATE_FAILED:
508 | default:
509 | /*
510 | * There are no timeouts in these states. If we trigger one,
511 | * something is fishy.
512 | */
513 | c_assert(0);
514 | return -ENOTRECOVERABLE;
515 | }
516 |
517 | return 0;
518 | }
519 |
520 | int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) {
521 | NAcdEventNode *node;
522 | uint64_t now;
523 | int r;
524 |
525 | timer_now(&probe->acd->timer, &now);
526 |
527 | switch (probe->state) {
528 | case N_ACD_PROBE_STATE_PROBING:
529 | /*
530 | * Regardless whether this is a hard or soft conflict, we must
531 | * treat this as a probe failure. That is, notify the caller of
532 | * the conflict and wait for further instructions. We do not
533 | * react to this, until the caller tells us what to do, but we
534 | * do stop sending further probes.
535 | */
536 | r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED);
537 | if (r)
538 | return r;
539 |
540 | node->event.used.sender = node->sender;
541 | node->event.used.n_sender = ETH_ALEN;
542 | memcpy(node->sender, packet->arp_sha, ETH_ALEN);
543 |
544 | n_acd_probe_unschedule(probe);
545 | n_acd_probe_unlink(probe);
546 | probe->state = N_ACD_PROBE_STATE_FAILED;
547 |
548 | break;
549 |
550 | case N_ACD_PROBE_STATE_CONFIGURING:
551 | /*
552 | * We are waiting for the caller to configure the interface and
553 | * start ANNOUNCING. In this state, we cannot defend the
554 | * address as that would indicate that it is ready to be used,
555 | * and we cannot signal CONFLICT or USED as the caller may
556 | * already have started to use the address (and may have
557 | * configured the engine to always defend it, which means they
558 | * should be able to rely on never losing it after READY).
559 | * Simply drop the event, and rely on the anticipated ANNOUNCE
560 | * to trigger it again.
561 | */
562 |
563 | break;
564 |
565 | case N_ACD_PROBE_STATE_ANNOUNCING: {
566 | /*
567 | * We were already instructed to announce the address, which
568 | * means the address is configured and in use. Hence, the
569 | * caller is responsible to serve regular ARP queries. Meaning,
570 | * we can ignore any soft conflicts (other peers doing ACD).
571 | *
572 | * But if we see a hard-conflict, we either defend the address
573 | * according to the caller's instructions, or we report the
574 | * conflict and bail out.
575 | */
576 | bool conflict = false, rate_limited = false;
577 |
578 | if (!hard_conflict)
579 | break;
580 |
581 | rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC;
582 |
583 | switch (probe->defend) {
584 | case N_ACD_DEFEND_NEVER:
585 | conflict = true;
586 | break;
587 | case N_ACD_DEFEND_ONCE:
588 | if (rate_limited) {
589 | conflict = true;
590 | break;
591 | }
592 |
593 | /* fallthrough */
594 | case N_ACD_DEFEND_ALWAYS:
595 | if (!rate_limited) {
596 | r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
597 | if (r) {
598 | if (r != N_ACD_E_DROPPED)
599 | return r;
600 |
601 | if (probe->defend == N_ACD_DEFEND_ONCE) {
602 | conflict = true;
603 | break;
604 | }
605 | }
606 |
607 | if (r != N_ACD_E_DROPPED)
608 | probe->last_defend = now;
609 | }
610 |
611 | r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED);
612 | if (r)
613 | return r;
614 |
615 | node->event.defended.sender = node->sender;
616 | node->event.defended.n_sender = ETH_ALEN;
617 | memcpy(node->sender, packet->arp_sha, ETH_ALEN);
618 |
619 | break;
620 | }
621 |
622 | if (conflict) {
623 | r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT);
624 | if (r)
625 | return r;
626 |
627 | node->event.conflict.sender = node->sender;
628 | node->event.conflict.n_sender = ETH_ALEN;
629 | memcpy(node->sender, packet->arp_sha, ETH_ALEN);
630 |
631 | n_acd_probe_unschedule(probe);
632 | n_acd_probe_unlink(probe);
633 | probe->state = N_ACD_PROBE_STATE_FAILED;
634 | }
635 |
636 | break;
637 | }
638 |
639 | case N_ACD_PROBE_STATE_FAILED:
640 | default:
641 | /*
642 | * We are not listening for packets in these states. If we receive one,
643 | * something is fishy.
644 | */
645 | c_assert(0);
646 | return -ENOTRECOVERABLE;
647 | }
648 |
649 | return 0;
650 | }
651 |
652 | /**
653 | * n_acd_probe_set_userdata - set userdata
654 | * @probe: probe to operate on
655 | * @userdata: userdata pointer
656 | *
657 | * This can be used to set a caller-controlled user-data pointer on @probe. The
658 | * value of the pointer is never inspected or used by `n-acd` and is fully
659 | * under control of the caller.
660 | *
661 | * The default value is NULL.
662 | */
663 | _c_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) {
664 | probe->userdata = userdata;
665 | }
666 |
667 | /**
668 | * n_acd_probe_get_userdata - get userdata
669 | * @probe: probe to operate on
670 | *
671 | * This queries the userdata pointer that was previously set through
672 | * n_acd_probe_set_userdata().
673 | *
674 | * The default value is NULL.
675 | *
676 | * Return: The stored userdata pointer is returned.
677 | */
678 | _c_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) {
679 | *userdatap = probe->userdata;
680 | }
681 |
682 | /**
683 | * n_acd_probe_announce() - announce the configured IP address
684 | * @probe: probe to operate on
685 | * @defend: defense policy
686 | *
687 | * Announce the IP address on the local link, and start defending it according
688 | * to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
689 | * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
690 | *
691 | * This must be called in response to an N_ACD_EVENT_READY event, and only
692 | * after the given address has been configured on the given network interface.
693 | *
694 | * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defense policy
695 | * is invalid, negative error code on failure.
696 | */
697 | _c_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) {
698 | if (defend >= _N_ACD_DEFEND_N)
699 | return N_ACD_E_INVALID_ARGUMENT;
700 |
701 | probe->state = N_ACD_PROBE_STATE_ANNOUNCING;
702 | probe->defend = defend;
703 | probe->n_iteration = 0;
704 |
705 | /*
706 | * We must schedule a fake-timeout, since we are not allowed to
707 | * advance the state-machine outside of n_acd_dispatch().
708 | */
709 | n_acd_probe_schedule(probe, 0, 0);
710 |
711 | return 0;
712 | }
713 |
--------------------------------------------------------------------------------
/src/n-acd.c:
--------------------------------------------------------------------------------
1 | /*
2 | * IPv4 Address Conflict Detection
3 | *
4 | * This file contains the main context initialization and management functions,
5 | * as well as a bunch of utilities used through the n-acd modules.
6 | */
7 |
8 | /**
9 | * DOC: IPv4 Address Conflict Detection
10 | *
11 | * The `n-acd` project implements the IPv4 Address Conflict Detection protocol
12 | * as defined in RFC-5227. The protocol originates in the IPv4 Link Local
13 | * Address selection but was later on generalized and resulted in `ACD`. The
14 | * idea is to use `ARP` to query a link for an address to see whether it
15 | * already exists on the network, as well as defending an address that is in
16 | * use on a network interface. Furthermore, `ACD` provides passive diagnostics
17 | * for administrators, as it will detect address conflicts automatically, which
18 | * then can be logged or shown to a user.
19 | *
20 | * The main context object of `n-acd` is the `NAcd` structure. It is a passive
21 | * ref-counted context object which drives `ACD` probes running on it. A
22 | * context is specific to a linux network device and transport. If multiple
23 | * network devices are used, then separate `NAcd` contexts must be deployed.
24 | *
25 | * The `NAcdProbe` object drives a single `ACD` state-machine. A probe is
26 | * created on an `NAcd` context by providing an address to probe for. The probe
27 | * will then raise notifications whether the address conflict detection found
28 | * something, or whether the address is ready to be used. Optionally, the probe
29 | * will then enter into passive mode and defend the address as long as it is
30 | * kept active.
31 | *
32 | * Note that the `n-acd` project only implements the networking protocol. It
33 | * never queries or modifies network interfaces. It completely relies on the
34 | * API user to react to notifications and update network interfaces
35 | * respectively. `n-acd` uses an event-mechanism on every context object. All
36 | * events raise by any probe or operation on a given context will queue all
37 | * events on that context object. The event-queue can then be drained by the
38 | * API user. All events are properly asynchronous and designed in a way that no
39 | * synchronous reaction to any event is required. That is, the events are
40 | * carefully designed to allow forwarding via IPC (or even networks) to a
41 | * controller that handles them and specifies how to react. Furthermore, none
42 | * of the function calls of `n-acd` require synchronous error handling.
43 | * Instead, functions only ever return values on fatal errors. Everything else
44 | * is queued as events, thus guaranteeing that synchronous handling of return
45 | * values is not required. Exceptions are functions that do not affect internal
46 | * state or do not have an associated context object.
47 | */
48 |
49 | #include
50 | #include
51 | #include
52 | #include
53 | #include
54 | #include
55 | #include
56 | #include
57 | #include
58 | #include
59 | #include
60 | #include
61 | #include
62 | #include
63 | #include
64 | #include
65 | #include
66 | #include
67 | #include
68 | #include "n-acd.h"
69 | #include "n-acd-private.h"
70 |
71 | enum {
72 | N_ACD_EPOLL_TIMER,
73 | N_ACD_EPOLL_SOCKET,
74 | };
75 |
76 | static int n_acd_get_random(unsigned int *random) {
77 | uint8_t hash_seed[] = {
78 | 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a,
79 | 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1,
80 | };
81 | CSipHash hash = C_SIPHASH_NULL;
82 | struct timespec ts;
83 | const uint8_t *p;
84 | int r;
85 |
86 | /*
87 | * We need random jitter for all timeouts when handling ARP probes. Use
88 | * AT_RANDOM to get a seed for rand_r(3p), if available (should always
89 | * be available on linux). See the time-out scheduler for details.
90 | * Additionally, we include the current time in the seed. This avoids
91 | * using the same jitter in case you run multiple ACD engines in the
92 | * same process. Lastly, the seed is hashed with SipHash24 to avoid
93 | * exposing the value of AT_RANDOM on the network.
94 | */
95 | c_siphash_init(&hash, hash_seed);
96 |
97 | p = (const uint8_t *)getauxval(AT_RANDOM);
98 | if (p)
99 | c_siphash_append(&hash, p, 16);
100 |
101 | r = clock_gettime(CLOCK_MONOTONIC, &ts);
102 | if (r < 0)
103 | return -c_errno();
104 |
105 | c_siphash_append(&hash, (const uint8_t *)&ts.tv_sec, sizeof(ts.tv_sec));
106 | c_siphash_append(&hash, (const uint8_t *)&ts.tv_nsec, sizeof(ts.tv_nsec));
107 |
108 | *random = c_siphash_finalize(&hash);
109 | return 0;
110 | }
111 |
112 | static int n_acd_socket_new(int *fdp, int fd_bpf_prog, NAcdConfig *config) {
113 | const struct sockaddr_ll address = {
114 | .sll_family = AF_PACKET,
115 | .sll_protocol = htobe16(ETH_P_ARP),
116 | .sll_ifindex = config->ifindex,
117 | .sll_halen = ETH_ALEN,
118 | .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
119 | };
120 | int r, s = -1;
121 |
122 | s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
123 | if (s < 0) {
124 | r = -c_errno();
125 | goto error;
126 | }
127 |
128 | if (fd_bpf_prog >= 0) {
129 | r = setsockopt(s, SOL_SOCKET, SO_ATTACH_BPF, &fd_bpf_prog, sizeof(fd_bpf_prog));
130 | if (r < 0) {
131 | r = -c_errno();
132 | goto error;
133 | }
134 | }
135 |
136 | r = bind(s, (struct sockaddr *)&address, sizeof(address));
137 | if (r < 0) {
138 | r = -c_errno();
139 | goto error;
140 | }
141 |
142 | *fdp = s;
143 | s = -1;
144 | return 0;
145 |
146 | error:
147 | if (s >= 0)
148 | close(s);
149 | return r;
150 | }
151 |
152 | /**
153 | * n_acd_config_new() - create configuration object
154 | * @configp: output argument for new configuration
155 | *
156 | * This creates a new configuration object and provides it to the caller. The
157 | * object is fully owned by the caller upon function return.
158 | *
159 | * A configuration object is a passive structure that is used to collect
160 | * information that is then passed to a constructor or other function. A
161 | * configuration never validates the data, but it is up to the consumer of a
162 | * configuration to do that.
163 | *
164 | * Return: 0 on success, negative error code on failure.
165 | */
166 | _c_public_ int n_acd_config_new(NAcdConfig **configp) {
167 | _c_cleanup_(n_acd_config_freep) NAcdConfig *config = NULL;
168 |
169 | config = malloc(sizeof(*config));
170 | if (!config)
171 | return -ENOMEM;
172 |
173 | *config = (NAcdConfig)N_ACD_CONFIG_NULL(*config);
174 |
175 | *configp = config;
176 | config = NULL;
177 | return 0;
178 | }
179 |
180 | /**
181 | * n_acd_config_free() - destroy configuration object
182 | * @config: configuration to operate on, or NULL
183 | *
184 | * This destroys the configuration object @config. If @config is NULL, this is
185 | * a no-op.
186 | *
187 | * Return: NULL is returned.
188 | */
189 | _c_public_ NAcdConfig *n_acd_config_free(NAcdConfig *config) {
190 | if (!config)
191 | return NULL;
192 |
193 | free(config);
194 |
195 | return NULL;
196 | }
197 |
198 | /**
199 | * n_acd_config_set_ifindex() - set ifindex property
200 | * @config: configuration to operate on
201 | * @ifindex: ifindex to set
202 | *
203 | * This sets the @ifindex property of the configuration object. Any previous
204 | * value is overwritten.
205 | *
206 | * A valid ifindex is a 32bit integer greater than 0. Any other value is
207 | * treated as unspecified.
208 | *
209 | * The ifindex corresponds to the interface index provided by the linux kernel.
210 | * It specifies the network device to be used.
211 | */
212 | _c_public_ void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex) {
213 | config->ifindex = ifindex;
214 | }
215 |
216 | /**
217 | * n_acd_config_set_transport() - set transport property
218 | * @config: configuration to operate on
219 | * @transport: transport to set
220 | *
221 | * This specifies the transport to use. A transport must be one of the
222 | * `N_ACD_TRANSPORT_*` identifiers. It selects which transport protocol `n-acd`
223 | * will run on.
224 | */
225 | _c_public_ void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport) {
226 | config->transport = transport;
227 | }
228 |
229 | /**
230 | * n_acd_config_set_mac() - set mac property
231 | * @config: configuration to operate on
232 | * @mac: mac to set
233 | *
234 | * This specifies the hardware address (also referred to as `MAC Address`) to
235 | * use. Any hardware address can be specified. It is the caller's
236 | * responsibility to make sure the address can actually be used.
237 | *
238 | * The address in @mac is copied into @config. It does not have to be retained
239 | * by the caller.
240 | */
241 | _c_public_ void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac) {
242 | /*
243 | * We truncate the address at the maximum we support. We still remember
244 | * the original length, so any consumer of this configuration can then
245 | * complain about an unsupported address length. This allows us to
246 | * avoid a memory allocation here and having to return `int`.
247 | */
248 | config->n_mac = n_mac;
249 | memcpy(config->mac, mac, n_mac > ETH_ALEN ? ETH_ALEN : n_mac);
250 | }
251 |
252 | int n_acd_event_node_new(NAcdEventNode **nodep) {
253 | NAcdEventNode *node;
254 |
255 | node = malloc(sizeof(*node));
256 | if (!node)
257 | return -ENOMEM;
258 |
259 | *node = (NAcdEventNode)N_ACD_EVENT_NODE_NULL(*node);
260 |
261 | *nodep = node;
262 | return 0;
263 | }
264 |
265 | NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) {
266 | if (!node)
267 | return NULL;
268 |
269 | c_list_unlink(&node->probe_link);
270 | c_list_unlink(&node->acd_link);
271 | free(node);
272 |
273 | return NULL;
274 | }
275 |
276 | int n_acd_ensure_bpf_map_space(NAcd *acd) {
277 | NAcdProbe *probe;
278 | _c_cleanup_(c_closep) int fd_map = -1, fd_prog = -1;
279 | size_t max_map;
280 | int r;
281 |
282 | if (acd->n_bpf_map < acd->max_bpf_map)
283 | return 0;
284 |
285 | max_map = 2 * acd->max_bpf_map;
286 |
287 | r = n_acd_bpf_map_create(&fd_map, max_map);
288 | if (r)
289 | return r;
290 |
291 | c_rbtree_for_each_entry(probe, &acd->ip_tree, ip_node) {
292 | r = n_acd_bpf_map_add(fd_map, &probe->ip);
293 | if (r)
294 | return r;
295 | }
296 |
297 | r = n_acd_bpf_compile(&fd_prog, fd_map, (struct ether_addr*) acd->mac);
298 | if (r)
299 | return r;
300 |
301 | if (fd_prog >= 0) {
302 | r = setsockopt(acd->fd_socket, SOL_SOCKET, SO_ATTACH_BPF, &fd_prog, sizeof(fd_prog));
303 | if (r)
304 | return -c_errno();
305 | }
306 |
307 | if (acd->fd_bpf_map >= 0)
308 | close(acd->fd_bpf_map);
309 | acd->fd_bpf_map = fd_map;
310 | fd_map = -1;
311 | acd->max_bpf_map = max_map;
312 | return 0;
313 | }
314 |
315 | /**
316 | * n_acd_new() - create a new ACD context
317 | * @acdp: output argument for new context object
318 | * @config: configuration parameters
319 | *
320 | * Create a new ACD context and return it in @acdp. The configuration @config
321 | * must be initialized by the caller and must specify a valid network
322 | * interface, transport mechanism, as well as hardware address compatible with
323 | * the selected transport. The configuration is copied into the context. The
324 | * @config object thus does not have to be retained by the caller.
325 | *
326 | * Return: 0 on success, negative error code on failure.
327 | */
328 | _c_public_ int n_acd_new(NAcd **acdp, NAcdConfig *config) {
329 | _c_cleanup_(n_acd_unrefp) NAcd *acd = NULL;
330 | _c_cleanup_(c_closep) int fd_bpf_prog = -1;
331 | struct epoll_event eevent;
332 | int r;
333 |
334 | if (config->ifindex <= 0 ||
335 | config->transport != N_ACD_TRANSPORT_ETHERNET ||
336 | config->n_mac != ETH_ALEN ||
337 | !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN))
338 | return N_ACD_E_INVALID_ARGUMENT;
339 |
340 | acd = malloc(sizeof(*acd));
341 | if (!acd)
342 | return -ENOMEM;
343 |
344 | *acd = (NAcd)N_ACD_NULL(*acd);
345 | acd->ifindex = config->ifindex;
346 | memcpy(acd->mac, config->mac, ETH_ALEN);
347 |
348 | r = n_acd_get_random(&acd->seed);
349 | if (r)
350 | return r;
351 |
352 | acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC);
353 | if (acd->fd_epoll < 0)
354 | return -c_errno();
355 |
356 | r = timer_init(&acd->timer);
357 | if (r < 0)
358 | return r;
359 |
360 | acd->max_bpf_map = 8;
361 |
362 | r = n_acd_bpf_map_create(&acd->fd_bpf_map, acd->max_bpf_map);
363 | if (r)
364 | return r;
365 |
366 | r = n_acd_bpf_compile(&fd_bpf_prog, acd->fd_bpf_map, (struct ether_addr*) acd->mac);
367 | if (r)
368 | return r;
369 |
370 | r = n_acd_socket_new(&acd->fd_socket, fd_bpf_prog, config);
371 | if (r)
372 | return r;
373 |
374 | eevent = (struct epoll_event){
375 | .events = EPOLLIN,
376 | .data.u32 = N_ACD_EPOLL_TIMER,
377 | };
378 | r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->timer.fd, &eevent);
379 | if (r < 0)
380 | return -c_errno();
381 |
382 | eevent = (struct epoll_event){
383 | .events = EPOLLIN,
384 | .data.u32 = N_ACD_EPOLL_SOCKET,
385 | };
386 | r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_socket, &eevent);
387 | if (r < 0)
388 | return -c_errno();
389 |
390 | *acdp = acd;
391 | acd = NULL;
392 | return 0;
393 | }
394 |
395 | static void n_acd_free_internal(NAcd *acd) {
396 | NAcdEventNode *node, *t_node;
397 |
398 | if (!acd)
399 | return;
400 |
401 | c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link)
402 | n_acd_event_node_free(node);
403 |
404 | c_assert(c_rbtree_is_empty(&acd->ip_tree));
405 |
406 | if (acd->fd_socket >= 0) {
407 | c_assert(acd->fd_epoll >= 0);
408 | epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_socket, NULL);
409 | close(acd->fd_socket);
410 | acd->fd_socket = -1;
411 | }
412 |
413 | if (acd->fd_bpf_map >= 0) {
414 | close(acd->fd_bpf_map);
415 | acd->fd_bpf_map = -1;
416 | }
417 |
418 | if (acd->timer.fd >= 0) {
419 | c_assert(acd->fd_epoll >= 0);
420 | epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->timer.fd, NULL);
421 | timer_deinit(&acd->timer);
422 | }
423 |
424 | if (acd->fd_epoll >= 0) {
425 | close(acd->fd_epoll);
426 | acd->fd_epoll = -1;
427 | }
428 |
429 | free(acd);
430 | }
431 |
432 | /**
433 | * n_acd_ref() - acquire reference
434 | * @acd: context to operate on, or NULL
435 | *
436 | * This acquires a single reference to the context specified as @acd. If @acd
437 | * is NULL, this is a no-op.
438 | *
439 | * Return: @acd is returned.
440 | */
441 | _c_public_ NAcd *n_acd_ref(NAcd *acd) {
442 | if (acd)
443 | ++acd->n_refs;
444 | return acd;
445 | }
446 |
447 | /**
448 | * n_acd_unref() - release reference
449 | * @acd: context to operate on, or NULL
450 | *
451 | * This releases a single reference to the context @acd. If this is the last
452 | * reference, the context is torn down and deallocated.
453 | *
454 | * Return: NULL is returned.
455 | */
456 | _c_public_ NAcd *n_acd_unref(NAcd *acd) {
457 | if (acd && !--acd->n_refs)
458 | n_acd_free_internal(acd);
459 | return NULL;
460 | }
461 |
462 | int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event) {
463 | NAcdEventNode *node;
464 | int r;
465 |
466 | r = n_acd_event_node_new(&node);
467 | if (r)
468 | return r;
469 |
470 | node->event.event = event;
471 | c_list_link_tail(&acd->event_list, &node->acd_link);
472 |
473 | if (nodep)
474 | *nodep = node;
475 | return 0;
476 | }
477 |
478 | int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa) {
479 | struct sockaddr_ll address = {
480 | .sll_family = AF_PACKET,
481 | .sll_protocol = htobe16(ETH_P_ARP),
482 | .sll_ifindex = acd->ifindex,
483 | .sll_halen = ETH_ALEN,
484 | .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
485 | };
486 | struct ether_arp arp = {
487 | .ea_hdr = {
488 | .ar_hrd = htobe16(ARPHRD_ETHER),
489 | .ar_pro = htobe16(ETHERTYPE_IP),
490 | .ar_hln = sizeof(acd->mac),
491 | .ar_pln = sizeof(uint32_t),
492 | .ar_op = htobe16(ARPOP_REQUEST),
493 | },
494 | };
495 | ssize_t l;
496 | int r;
497 |
498 | memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac));
499 | memcpy(arp.arp_tpa, &tpa->s_addr, sizeof(uint32_t));
500 |
501 | if (spa)
502 | memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr));
503 |
504 | l = sendto(acd->fd_socket,
505 | &arp,
506 | sizeof(arp),
507 | MSG_NOSIGNAL,
508 | (struct sockaddr *)&address,
509 | sizeof(address));
510 | if (l < 0) {
511 | if (errno == EAGAIN || errno == ENOBUFS) {
512 | /*
513 | * We never maintain outgoing queues. We rely on the
514 | * network device to do that for us. In case the queues
515 | * are full, or the kernel refuses to queue the packet
516 | * for other reasons, we must tell our caller that the
517 | * packet was dropped.
518 | */
519 | return N_ACD_E_DROPPED;
520 | } else if (errno == ENETDOWN || errno == ENXIO) {
521 | /*
522 | * These errors happen if the network device went down
523 | * or was actually removed. We always propagate this as
524 | * event, so the user can react accordingly (similarly
525 | * to the recvmmsg(2) handler). In case the user does
526 | * not immediately react, we also tell our caller that
527 | * the packet was dropped, so we don't erroneously
528 | * treat this as success.
529 | */
530 |
531 | r = n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
532 | if (r)
533 | return r;
534 |
535 | return N_ACD_E_DROPPED;
536 | }
537 |
538 | /*
539 | * Random network error. We treat this as fatal and propagate
540 | * the error, so it is noticed and can be investigated.
541 | */
542 | return -c_errno();
543 | } else if (l != (ssize_t)sizeof(arp)) {
544 | /*
545 | * Ugh, the kernel modified the packet. This is unexpected. We
546 | * consider the packet lost.
547 | */
548 | return N_ACD_E_DROPPED;
549 | }
550 |
551 | return 0;
552 | }
553 |
554 | /**
555 | * n_acd_get_fd() - get pollable file descriptor
556 | * @acd: context object to operate on
557 | * @fdp: output argument for file descriptor
558 | *
559 | * This returns the backing file-descriptor of the context object @acd. The
560 | * file-descriptor is owned by @acd and valid as long as @acd is. The
561 | * file-descriptor never changes, so it can be cached by the caller as long as
562 | * they hold a reference to @acd.
563 | *
564 | * The file-descriptor is internal to the @acd context and should not be
565 | * modified by the caller. It is only exposed to allow the caller to poll on
566 | * it. Whenever the file-descriptor polls readable, n_acd_dispatch() should be
567 | * called.
568 | *
569 | * Currently, the file-descriptor is an epoll-fd.
570 | */
571 | _c_public_ void n_acd_get_fd(NAcd *acd, int *fdp) {
572 | *fdp = acd->fd_epoll;
573 | }
574 |
575 | static int n_acd_handle_timeout(NAcd *acd) {
576 | NAcdProbe *probe;
577 | uint64_t now;
578 | int r;
579 |
580 | /*
581 | * Read the current time once, and handle all timeouts that triggered
582 | * before the current time. Rereading the current time in each loop
583 | * might risk creating a live-lock, and the fact that we read the
584 | * time after reading the timer guarantees that the timeout which
585 | * woke us up is handled.
586 | *
587 | * When there are no more timeouts to handle at the given time, we
588 | * rearm the timer to potentially wake us up again in the future.
589 | */
590 | timer_now(&acd->timer, &now);
591 |
592 | for (;;) {
593 | Timeout *timeout;
594 |
595 | r = timer_pop_timeout(&acd->timer, now, &timeout);
596 | if (r < 0) {
597 | return r;
598 | } else if (!timeout) {
599 | /*
600 | * There are no more timeouts pending before @now. Rearm
601 | * the timer to fire again at the next timeout.
602 | */
603 | timer_rearm(&acd->timer);
604 | break;
605 | }
606 |
607 | probe = (void *)timeout - offsetof(NAcdProbe, timeout);
608 | r = n_acd_probe_handle_timeout(probe);
609 | if (r)
610 | return r;
611 | }
612 |
613 | return 0;
614 | }
615 |
616 | static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) {
617 | bool hard_conflict;
618 | NAcdProbe *probe;
619 | uint32_t addr;
620 | CRBNode *node;
621 | int r;
622 |
623 | /*
624 | * We are interested in 2 kinds of ARP messages:
625 | *
626 | * 1) Someone who is *NOT* us sends *ANY* ARP message with our IP
627 | * address as sender. This is never good, because it implies an
628 | * address conflict.
629 | * We call this a hard-conflict.
630 | *
631 | * 2) Someone who is *NOT* us sends an ARP REQUEST without any sender
632 | * IP, but our IP as target. This implies someone else performs an
633 | * ARP Probe with our address. This also implies a conflict, but
634 | * one that can be resolved by responding to the probe.
635 | * We call this a soft-conflict.
636 | *
637 | * We are never interested in any other ARP message. The kernel already
638 | * deals with everything else, hence, we can silently ignore those.
639 | *
640 | * Now, we simply check whether a sender-address is set. This allows us
641 | * to distinguish both cases. We then check further conditions, so we
642 | * can bail out early if neither is the case.
643 | *
644 | * Lastly, we perform a lookup in our probe-set to check whether the
645 | * address actually matches, so we can let these probes dispatch the
646 | * message. Note that we allow duplicate probes, so we need to dispatch
647 | * each matching probe, not just one.
648 | */
649 |
650 | if (memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa))) {
651 | memcpy(&addr, packet->arp_spa, sizeof(addr));
652 | hard_conflict = true;
653 | } else if (packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) {
654 | memcpy(&addr, packet->arp_tpa, sizeof(addr));
655 | hard_conflict = false;
656 | } else {
657 | /*
658 | * The BPF filter will not let through any other packet.
659 | */
660 | return -EIO;
661 | }
662 |
663 | /* Find top-most node that matches @addr. */
664 | node = acd->ip_tree.root;
665 | while (node) {
666 | probe = c_rbnode_entry(node, NAcdProbe, ip_node);
667 | if (addr < probe->ip.s_addr)
668 | node = node->left;
669 | else if (addr > probe->ip.s_addr)
670 | node = node->right;
671 | else
672 | break;
673 | }
674 |
675 | /*
676 | * If the address is unknown, we drop the package. This might happen if
677 | * the kernel queued the packet and passed the BPF filter, but we
678 | * modified the set before dequeuing the message.
679 | */
680 | if (!node)
681 | return 0;
682 |
683 | /* Forward to left-most child that still matches @addr. */
684 | while (node->left && addr == c_rbnode_entry(node->left,
685 | NAcdProbe,
686 | ip_node)->ip.s_addr)
687 | node = node->left;
688 |
689 | /* Iterate all matching entries in-order. */
690 | do {
691 | probe = c_rbnode_entry(node, NAcdProbe, ip_node);
692 |
693 | r = n_acd_probe_handle_packet(probe, packet, hard_conflict);
694 | if (r)
695 | return r;
696 |
697 | node = c_rbnode_next(node);
698 | } while (node && addr == c_rbnode_entry(node,
699 | NAcdProbe,
700 | ip_node)->ip.s_addr);
701 |
702 | return 0;
703 | }
704 |
705 | static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) {
706 | int r;
707 |
708 | if (event->events & (EPOLLHUP | EPOLLERR)) {
709 | /*
710 | * There is no way to handle either gracefully. If we ignored
711 | * them, we would busy-loop, so lets rather forward the error
712 | * to the caller.
713 | */
714 | return -EIO;
715 | }
716 |
717 | if (event->events & EPOLLIN) {
718 | r = timer_read(&acd->timer);
719 | if (r <= 0)
720 | return r;
721 |
722 | c_assert(r == TIMER_E_TRIGGERED);
723 |
724 | /*
725 | * A timer triggered, handle all pending timeouts at a given
726 | * point in time. There can only be a finite number of pending
727 | * timeouts, any new ones will be in the future, so not handled
728 | * now, but guaranteed to wake us up again when they do trigger.
729 | */
730 | r = n_acd_handle_timeout(acd);
731 | if (r)
732 | return r;
733 | }
734 |
735 | return 0;
736 | }
737 |
738 | static bool n_acd_packet_is_valid(NAcd *acd, void *packet, size_t n_packet) {
739 | struct ether_arp *arp;
740 |
741 | /*
742 | * The eBPF filter will ensure that this function always returns true, however,
743 | * this allows the eBPF filter to be an optional optimization which is necessary
744 | * on older kernels.
745 | *
746 | * See comments in n-acd-bpf.c for details.
747 | */
748 |
749 | if (n_packet != sizeof(*arp))
750 | return false;
751 |
752 | arp = packet;
753 |
754 | if (arp->arp_hrd != htobe16(ARPHRD_ETHER))
755 | return false;
756 |
757 | if (arp->arp_pro != htobe16(ETHERTYPE_IP))
758 | return false;
759 |
760 | if (arp->arp_hln != sizeof(struct ether_addr))
761 | return false;
762 |
763 | if (arp->arp_pln != sizeof(struct in_addr))
764 | return false;
765 |
766 | if (!memcmp(arp->arp_sha, acd->mac, sizeof(struct ether_addr)))
767 | return false;
768 |
769 | if (memcmp(arp->arp_spa, &((struct in_addr) { INADDR_ANY }), sizeof(struct in_addr))) {
770 | if (arp->arp_op != htobe16(ARPOP_REQUEST) && arp->arp_op != htobe16(ARPOP_REPLY))
771 | return false;
772 | } else if (arp->arp_op != htobe16(ARPOP_REQUEST)) {
773 | return false;
774 | }
775 |
776 | return true;
777 | }
778 |
779 | static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
780 | const size_t n_batch = 8;
781 | struct mmsghdr msgs[n_batch];
782 | struct iovec iovecs[n_batch];
783 | struct ether_arp data[n_batch];
784 | size_t i;
785 | int r, n;
786 |
787 | for (i = 0; i < n_batch; ++i) {
788 | iovecs[i].iov_base = data + i;
789 | iovecs[i].iov_len = sizeof(data[i]);
790 | msgs[i].msg_hdr = (struct msghdr){
791 | .msg_iov = iovecs + i,
792 | .msg_iovlen = 1,
793 | };
794 | }
795 |
796 | /*
797 | * We always directly call into recvmmsg(2), regardless which EPOLL*
798 | * event is signalled. On sockets, the recv(2)-family of syscalls does
799 | * a suitable job of handling all possible scenarios and telling us
800 | * about it. Hence, lets take the easy route and always ask the kernel
801 | * about the current state.
802 | */
803 | n = recvmmsg(acd->fd_socket, msgs, n_batch, 0, NULL);
804 | if (n < 0) {
805 | if (errno == ENETDOWN) {
806 | /*
807 | * We get ENETDOWN if the network-device goes down or
808 | * is removed. This error is temporary and only queued
809 | * once. Subsequent reads will simply return EAGAIN
810 | * until the device is up again and has data queued.
811 | * Usually, the caller should tear down all probes when
812 | * an interface goes down, but we leave it up to the
813 | * caller to decide what to do. We propagate the code
814 | * and continue.
815 | */
816 | return n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
817 | } else if (errno == EAGAIN) {
818 | /*
819 | * There is no more data queued and we did not get
820 | * preempted. Everything is good to go.
821 | * As a safety-net against busy-looping, we do check
822 | * for HUP/ERR. Neither should be set, since they imply
823 | * error-dequeue behavior on all socket calls. Lets
824 | * fail hard if we trigger it, so we can investigate.
825 | */
826 | if (event->events & (EPOLLHUP | EPOLLERR))
827 | return -EIO;
828 |
829 | return 0;
830 | } else {
831 | /*
832 | * Something went wrong. Propagate the error-code, so
833 | * this can be investigated.
834 | */
835 | return -c_errno();
836 | }
837 | } else if (n >= (ssize_t)n_batch) {
838 | /*
839 | * If all buffers were filled with data, we cannot be sure that
840 | * there is nothing left to read. But to avoid starvation, we
841 | * cannot loop on this condition. Instead, we mark the context
842 | * as preempted so the caller can call us again.
843 | * Note that in level-triggered event-loops this condition can
844 | * be neglected, but in edge-triggered event-loops it is
845 | * crucial to forward this information.
846 | *
847 | * On the other hand, there are several conditions where the
848 | * kernel might return less batches than requested, but was
849 | * still preempted. However, all of those cases require the
850 | * preemption to have triggered a wakeup *after* we entered
851 | * recvmmsg(). Hence, even if we did not recognize the
852 | * preemption, an edge must have triggered and as such we will
853 | * handle the event on the next turn.
854 | */
855 | acd->preempted = true;
856 | }
857 |
858 | for (i = 0; (ssize_t)i < n; ++i) {
859 | if (!n_acd_packet_is_valid(acd, data + i, msgs[i].msg_len))
860 | continue;
861 | /*
862 | * Handle the packet. Bail out if something went wrong. Note
863 | * that this must be fatal errors, since we discard all other
864 | * packets that follow.
865 | */
866 | r = n_acd_handle_packet(acd, data + i);
867 | if (r)
868 | return r;
869 | }
870 |
871 | return 0;
872 | }
873 |
874 | /**
875 | * n_acd_dispatch() - dispatch context
876 | * @acd: context object to operate on
877 | *
878 | * This dispatches the internal state-machine of all probes and operations
879 | * running on the context @acd.
880 | *
881 | * Any outside effect or event triggered by this dispatcher will be queued on
882 | * the event-queue of @acd. Whenever the dispatcher returns, the caller is
883 | * required to drain the event-queue via n_acd_pop_event() until it is empty.
884 | *
885 | * This function dispatches as many events as possible up to a static limit to
886 | * prevent stalling execution. If the static limit is reached, this function
887 | * will return with N_ACD_E_PREEMPTED, otherwise 0 is returned. In most cases
888 | * preemption can be ignored, because level-triggered event notification
889 | * handles it automatically. However, in case of edge-triggered event
890 | * mechanisms, the caller must make sure to call the dispatcher again.
891 | *
892 | * Return: 0 on success, N_ACD_E_PREEMPTED on preemption, negative error code
893 | * on failure.
894 | */
895 | _c_public_ int n_acd_dispatch(NAcd *acd) {
896 | struct epoll_event events[2];
897 | int n, i, r = 0;
898 |
899 | n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0);
900 | if (n < 0) {
901 | /* Linux never returns EINTR if `timeout == 0'. */
902 | return -c_errno();
903 | }
904 |
905 | acd->preempted = false;
906 |
907 | for (i = 0; i < n; ++i) {
908 | switch (events[i].data.u32) {
909 | case N_ACD_EPOLL_TIMER:
910 | r = n_acd_dispatch_timer(acd, events + i);
911 | break;
912 | case N_ACD_EPOLL_SOCKET:
913 | r = n_acd_dispatch_socket(acd, events + i);
914 | break;
915 | default:
916 | c_assert(0);
917 | r = 0;
918 | break;
919 | }
920 |
921 | if (r)
922 | return r;
923 | }
924 |
925 | return acd->preempted ? N_ACD_E_PREEMPTED : 0;
926 | }
927 |
928 | /**
929 | * n_acd_pop_event() - get the next pending event
930 | * @acd: context object to operate on
931 | * @eventp: output argument for the event
932 | *
933 | * Returns a pointer to the next pending event. The event is still owend by
934 | * the context, and is only valid until the next call to n_acd_pop_event()
935 | * or until the owning object is freed (either the ACD context or the indicated
936 | * probe object).
937 | *
938 | * An event either originates on the ACD context, or one of the configured
939 | * probes. If the event-type has a 'probe' pointer, it originated on the
940 | * indicated probe (which is *never* NULL), otherwise it originated on the
941 | * context.
942 | *
943 | * Users must call this function repeatedly until either an error is returned,
944 | * or the event-pointer is NULL. Wakeups on the epoll-fd are only guaranteed
945 | * for each batch of events. Hence, it is the callers responsibility to drain
946 | * the event-queue somehow after each call to n_acd_dispatch(). Note that
947 | * events can only be added by n_acd_dispatch(), hence, you cannot live-lock
948 | * when draining the event queue.
949 | *
950 | * The possible events are:
951 | * * N_ACD_EVENT_READY: A configured IP address was probed successfully
952 | * and is ready to be used. Once configured on the
953 | * interface, the caller must call n_acd_announce()
954 | * to announce and start defending the address.
955 | * * N_ACD_EVENT_USED: Someone is already using the IP address being
956 | * probed. The probe is put into stopped state and
957 | * should be freed by the caller.
958 | * * N_ACD_EVENT_DEFENDED: A conflict was detected for an announced IP
959 | * address, and the engine attempted to defend it.
960 | * This is purely informational, and no action is
961 | * required by the caller.
962 | * * N_ACD_EVENT_CONFLICT: A conflict was detected for an announced IP
963 | * address, and the probe was not able to defend
964 | * it (according to the configured policy). The
965 | * probe halted, the caller must stop using
966 | * the address immediately, and should free the probe.
967 | * * N_ACD_EVENT_DOWN: The specified network interface was put down. The
968 | * user is recommended to free *ALL* probes and
969 | * recreate them as soon as the interface is up again.
970 | * Note that this event is purely informational. The
971 | * probes will continue running, but all packets will
972 | * be blackholed, and no network packets are received,
973 | * until the network is back up again. Hence, from an
974 | * operational perspective, the legitimacy of the ACD
975 | * probes is lost and the user better re-probes all
976 | * addresses.
977 | *
978 | * Returns: 0 on success, negative error code on failure. The popped event is
979 | * returned in @eventp. If no event is pending, NULL is placed in
980 | * @eventp and 0 is returned. If an error is returned, @eventp is left
981 | * untouched.
982 | */
983 | _c_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) {
984 | NAcdEventNode *node, *t_node;
985 |
986 | c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) {
987 | if (node->is_public) {
988 | n_acd_event_node_free(node);
989 | continue;
990 | }
991 |
992 | node->is_public = true;
993 | *eventp = &node->event;
994 | return 0;
995 | }
996 |
997 | *eventp = NULL;
998 | return 0;
999 | }
1000 |
1001 | /**
1002 | * n_acd_probe() - start new probe
1003 | * @acd: context object to operate on
1004 | * @probep: output argument for new probe
1005 | * @config: probe configuration
1006 | *
1007 | * This creates a new probe on the context @acd and returns the probe in
1008 | * @probep. The configuration @config must provide valid probe parameters. At
1009 | * least a valid IP address must be provided through the configuration.
1010 | *
1011 | * This function does not reject duplicate probes for the same address. It is
1012 | * the caller's decision whether duplicates are allowed or not. But note that
1013 | * duplicate probes on the same context will not conflict each other. That is,
1014 | * running a probe for the same address twice on the same context will not
1015 | * cause them to consider each other a duplicate.
1016 | *
1017 | * Probes are rather lightweight objects. They do not create any
1018 | * file-descriptors or other kernel objects. Probes always re-use the
1019 | * infrastructure provided by the context object @acd. This allows running many
1020 | * probes simultaneously without exhausting resources.
1021 | *
1022 | * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT on invalid configuration
1023 | * parameters, negative error code on failure.
1024 | */
1025 | _c_public_ int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config) {
1026 | return n_acd_probe_new(probep, acd, config);
1027 | }
1028 |
--------------------------------------------------------------------------------
/src/n-acd.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | /*
4 | * IPv4 Address Conflict Detection
5 | *
6 | * This is the public header of the n-acd library, implementing IPv4 Address
7 | * Conflict Detection as described in RFC-5227. This header defines the public
8 | * API and all entry points of n-acd.
9 | */
10 |
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | typedef struct NAcd NAcd;
21 | typedef struct NAcdConfig NAcdConfig;
22 | typedef struct NAcdEvent NAcdEvent;
23 | typedef struct NAcdProbe NAcdProbe;
24 | typedef struct NAcdProbeConfig NAcdProbeConfig;
25 |
26 | #define N_ACD_TIMEOUT_RFC5227 (UINT64_C(9000))
27 |
28 | enum {
29 | _N_ACD_E_SUCCESS,
30 |
31 | N_ACD_E_PREEMPTED,
32 | N_ACD_E_INVALID_ARGUMENT,
33 |
34 | _N_ACD_E_N,
35 | };
36 |
37 | enum {
38 | N_ACD_TRANSPORT_ETHERNET,
39 | _N_ACD_TRANSPORT_N,
40 | };
41 |
42 | enum {
43 | N_ACD_EVENT_READY,
44 | N_ACD_EVENT_USED,
45 | N_ACD_EVENT_DEFENDED,
46 | N_ACD_EVENT_CONFLICT,
47 | N_ACD_EVENT_DOWN,
48 | _N_ACD_EVENT_N,
49 | };
50 |
51 | enum {
52 | N_ACD_DEFEND_NEVER,
53 | N_ACD_DEFEND_ONCE,
54 | N_ACD_DEFEND_ALWAYS,
55 | _N_ACD_DEFEND_N,
56 | };
57 |
58 | struct NAcdEvent {
59 | unsigned int event;
60 | union {
61 | struct {
62 | NAcdProbe *probe;
63 | } ready;
64 | struct {
65 | } down;
66 | struct {
67 | NAcdProbe *probe;
68 | uint8_t *sender;
69 | size_t n_sender;
70 | } used, defended, conflict;
71 | };
72 | };
73 |
74 | /* configs */
75 |
76 | int n_acd_config_new(NAcdConfig **configp);
77 | NAcdConfig *n_acd_config_free(NAcdConfig *config);
78 |
79 | void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex);
80 | void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport);
81 | void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac);
82 |
83 | int n_acd_probe_config_new(NAcdProbeConfig **configp);
84 | NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config);
85 |
86 | void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip);
87 | void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs);
88 |
89 | /* contexts */
90 |
91 | int n_acd_new(NAcd **acdp, NAcdConfig *config);
92 | NAcd *n_acd_ref(NAcd *acd);
93 | NAcd *n_acd_unref(NAcd *acd);
94 |
95 | void n_acd_get_fd(NAcd *acd, int *fdp);
96 | int n_acd_dispatch(NAcd *acd);
97 | int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp);
98 |
99 | int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config);
100 |
101 | /* probes */
102 |
103 | NAcdProbe *n_acd_probe_free(NAcdProbe *probe);
104 |
105 | void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata);
106 | void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap);
107 |
108 | int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend);
109 |
110 | /* inline helpers */
111 |
112 | static inline void n_acd_config_freep(NAcdConfig **config) {
113 | if (*config)
114 | n_acd_config_free(*config);
115 | }
116 |
117 | static inline void n_acd_config_freev(NAcdConfig *config) {
118 | n_acd_config_free(config);
119 | }
120 |
121 | static inline void n_acd_probe_config_freep(NAcdProbeConfig **config) {
122 | if (*config)
123 | n_acd_probe_config_free(*config);
124 | }
125 |
126 | static inline void n_acd_probe_config_freev(NAcdProbeConfig *config) {
127 | n_acd_probe_config_free(config);
128 | }
129 |
130 | static inline void n_acd_unrefp(NAcd **acd) {
131 | if (*acd)
132 | n_acd_unref(*acd);
133 | }
134 |
135 | static inline void n_acd_unrefv(NAcd *acd) {
136 | n_acd_unref(acd);
137 | }
138 |
139 | static inline void n_acd_probe_freep(NAcdProbe **probe) {
140 | if (*probe)
141 | n_acd_probe_free(*probe);
142 | }
143 |
144 | static inline void n_acd_probe_freev(NAcdProbe *probe) {
145 | n_acd_probe_free(probe);
146 | }
147 |
148 | #ifdef __cplusplus
149 | }
150 | #endif
151 |
--------------------------------------------------------------------------------
/src/test-api.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Tests for n-acd API
3 | * This verifies the visibility and availability of the public API.
4 | */
5 |
6 | #undef NDEBUG
7 | #include
8 | #include
9 | #include "n-acd.h"
10 |
11 | static void test_api_constants(void) {
12 | assert(1 + N_ACD_TIMEOUT_RFC5227);
13 |
14 | assert(1 + _N_ACD_E_SUCCESS);
15 | assert(1 + N_ACD_E_PREEMPTED);
16 | assert(1 + N_ACD_E_INVALID_ARGUMENT);
17 | assert(1 + _N_ACD_E_N);
18 |
19 | assert(1 + N_ACD_TRANSPORT_ETHERNET);
20 | assert(1 + _N_ACD_TRANSPORT_N);
21 |
22 | assert(1 + N_ACD_EVENT_READY);
23 | assert(1 + N_ACD_EVENT_USED);
24 | assert(1 + N_ACD_EVENT_DEFENDED);
25 | assert(1 + N_ACD_EVENT_CONFLICT);
26 | assert(1 + N_ACD_EVENT_DOWN);
27 | assert(1 + _N_ACD_EVENT_N);
28 |
29 | assert(1 + N_ACD_DEFEND_NEVER);
30 | assert(1 + N_ACD_DEFEND_ONCE);
31 | assert(1 + N_ACD_DEFEND_ALWAYS);
32 | assert(1 + _N_ACD_DEFEND_N);
33 | }
34 |
35 | static void test_api_types(void) {
36 | assert(sizeof(NAcdEvent*));
37 | assert(sizeof(NAcdConfig*));
38 | assert(sizeof(NAcdProbeConfig*));
39 | assert(sizeof(NAcd*));
40 | assert(sizeof(NAcdProbe*));
41 | }
42 |
43 | static void test_api_functions(void) {
44 | void *fns[] = {
45 | (void *)n_acd_config_new,
46 | (void *)n_acd_config_free,
47 | (void *)n_acd_config_set_ifindex,
48 | (void *)n_acd_config_set_transport,
49 | (void *)n_acd_config_set_mac,
50 | (void *)n_acd_probe_config_new,
51 | (void *)n_acd_probe_config_free,
52 | (void *)n_acd_probe_config_set_ip,
53 | (void *)n_acd_probe_config_set_timeout,
54 |
55 | (void *)n_acd_new,
56 | (void *)n_acd_ref,
57 | (void *)n_acd_unref,
58 | (void *)n_acd_get_fd,
59 | (void *)n_acd_dispatch,
60 | (void *)n_acd_pop_event,
61 | (void *)n_acd_probe,
62 |
63 | (void *)n_acd_probe_free,
64 | (void *)n_acd_probe_set_userdata,
65 | (void *)n_acd_probe_get_userdata,
66 | (void *)n_acd_probe_announce,
67 |
68 | (void *)n_acd_config_freep,
69 | (void *)n_acd_config_freev,
70 | (void *)n_acd_probe_config_freep,
71 | (void *)n_acd_probe_config_freev,
72 | (void *)n_acd_unrefp,
73 | (void *)n_acd_unrefv,
74 | (void *)n_acd_probe_freep,
75 | (void *)n_acd_probe_freev,
76 | };
77 | size_t i;
78 |
79 | for (i = 0; i < sizeof(fns) / sizeof(*fns); ++i)
80 | assert(!!fns[i]);
81 | }
82 |
83 | int main(int argc, char **argv) {
84 | test_api_constants();
85 | test_api_types();
86 | test_api_functions();
87 | return 0;
88 | }
89 |
--------------------------------------------------------------------------------
/src/test-bpf.c:
--------------------------------------------------------------------------------
1 | /*
2 | * eBPF socket filter tests
3 | */
4 |
5 | #undef NDEBUG
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include "n-acd.h"
19 | #include "n-acd-private.h"
20 | #include "test.h"
21 |
22 | #define ETHER_ARP_PACKET_INIT(_op, _mac, _sip, _tip) { \
23 | .ea_hdr = { \
24 | .ar_hrd = htobe16(ARPHRD_ETHER), \
25 | .ar_pro = htobe16(ETHERTYPE_IP), \
26 | .ar_hln = 6, \
27 | .ar_pln = 4, \
28 | .ar_op = htobe16(_op), \
29 | }, \
30 | .arp_sha[0] = (_mac)->ether_addr_octet[0], \
31 | .arp_sha[1] = (_mac)->ether_addr_octet[1], \
32 | .arp_sha[2] = (_mac)->ether_addr_octet[2], \
33 | .arp_sha[3] = (_mac)->ether_addr_octet[3], \
34 | .arp_sha[4] = (_mac)->ether_addr_octet[4], \
35 | .arp_sha[5] = (_mac)->ether_addr_octet[5], \
36 | .arp_spa[0] = (be32toh((_sip)->s_addr) >> 24) & 0xff, \
37 | .arp_spa[1] = (be32toh((_sip)->s_addr) >> 16) & 0xff, \
38 | .arp_spa[2] = (be32toh((_sip)->s_addr) >> 8) & 0xff, \
39 | .arp_spa[3] = be32toh((_sip)->s_addr) & 0xff, \
40 | .arp_tpa[0] = (be32toh((_tip)->s_addr) >> 24) & 0xff, \
41 | .arp_tpa[1] = (be32toh((_tip)->s_addr) >> 16) & 0xff, \
42 | .arp_tpa[2] = (be32toh((_tip)->s_addr) >> 8) & 0xff, \
43 | .arp_tpa[3] = be32toh((_tip)->s_addr) & 0xff, \
44 | }
45 |
46 | static void test_map(void) {
47 | int r, mapfd = -1;
48 | struct in_addr addr = { 1 };
49 |
50 | r = n_acd_bpf_map_create(&mapfd, 8);
51 | c_assert(r >= 0);
52 | c_assert(mapfd >= 0);
53 |
54 | r = n_acd_bpf_map_remove(mapfd, &addr);
55 | c_assert(r == -ENOENT);
56 |
57 | r = n_acd_bpf_map_add(mapfd, &addr);
58 | c_assert(r >= 0);
59 |
60 | r = n_acd_bpf_map_add(mapfd, &addr);
61 | c_assert(r == -EEXIST);
62 |
63 | r = n_acd_bpf_map_remove(mapfd, &addr);
64 | c_assert(r >= 0);
65 |
66 | r = n_acd_bpf_map_remove(mapfd, &addr);
67 | c_assert(r == -ENOENT);
68 |
69 | close(mapfd);
70 | }
71 |
72 | static void verify_success(struct ether_arp *packet, int out_fd, int in_fd) {
73 | uint8_t buf[sizeof(struct ether_arp)];
74 | int r;
75 |
76 | r = send(out_fd, packet, sizeof(struct ether_arp), 0);
77 | c_assert(r == sizeof(struct ether_arp));
78 |
79 | r = recv(in_fd, buf, sizeof(buf), 0);
80 | c_assert(r == sizeof(struct ether_arp));
81 | }
82 |
83 | static void verify_failure(struct ether_arp *packet, int out_fd, int in_fd) {
84 | uint8_t buf[sizeof(struct ether_arp)];
85 | int r;
86 |
87 | r = send(out_fd, packet, sizeof(struct ether_arp), 0);
88 | c_assert(r == sizeof(struct ether_arp));
89 |
90 | r = recv(in_fd, buf, sizeof(buf), 0);
91 | c_assert(r < 0);
92 | c_assert(errno == EAGAIN);
93 | }
94 |
95 | static void test_filter(void) {
96 | uint8_t buf[sizeof(struct ether_arp) + 1] = {};
97 | struct ether_addr mac1 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 } };
98 | struct ether_addr mac2 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x07 } };
99 | struct in_addr ip0 = { 0 };
100 | struct in_addr ip1 = { 1 };
101 | struct in_addr ip2 = { 2 };
102 | struct ether_arp *packet = (struct ether_arp *)buf;
103 | int r, mapfd = -1, progfd = -1, pair[2];
104 |
105 | r = n_acd_bpf_map_create(&mapfd, 1);
106 | c_assert(r >= 0);
107 |
108 | r = n_acd_bpf_compile(&progfd, mapfd, &mac1);
109 | c_assert(r >= 0);
110 | c_assert(progfd >= 0);
111 |
112 | r = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, pair);
113 | c_assert(r >= 0);
114 |
115 | r = setsockopt(pair[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd,
116 | sizeof(progfd));
117 | c_assert(r >= 0);
118 |
119 | r = n_acd_bpf_map_add(mapfd, &ip1);
120 | c_assert(r >= 0);
121 |
122 | /* valid */
123 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
124 | verify_success(packet, pair[0], pair[1]);
125 |
126 | /* valid: reply instead of request */
127 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip1, &ip2);
128 | verify_success(packet, pair[0], pair[1]);
129 |
130 | /* valid: to us instead of from us */
131 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip1);
132 | verify_success(packet, pair[0], pair[1]);
133 |
134 | /* invalid header type */
135 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
136 | packet->arp_hrd += 1;
137 | verify_failure(packet, pair[0], pair[1]);
138 |
139 | /* invalid protocol */
140 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
141 | packet->arp_pro += 1;
142 | verify_failure(packet, pair[0], pair[1]);
143 |
144 | /* invalid hw addr length */
145 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
146 | packet->arp_hln += 1;
147 | verify_failure(packet, pair[0], pair[1]);
148 |
149 | /* invalid protocol addr length */
150 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
151 | packet->arp_pln += 1;
152 | verify_failure(packet, pair[0], pair[1]);
153 |
154 | /* invalid operation */
155 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_NAK, &mac2, &ip1, &ip2);
156 | packet->arp_hln += 1;
157 | verify_failure(packet, pair[0], pair[1]);
158 |
159 | /* own mac */
160 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac1, &ip1, &ip2);
161 | verify_failure(packet, pair[0], pair[1]);
162 |
163 | /* not to, nor from us, with source */
164 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip2, &ip2);
165 | verify_failure(packet, pair[0], pair[1]);
166 |
167 | /* not to, nor from us, without source */
168 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip2);
169 | verify_failure(packet, pair[0], pair[1]);
170 |
171 | /* to us instead of from us, but reply */
172 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip0, &ip1);
173 | verify_failure(packet, pair[0], pair[1]);
174 |
175 | /* long */
176 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
177 | r = send(pair[0], buf, sizeof(struct ether_arp) + 1, 0);
178 | c_assert(r == sizeof(struct ether_arp) + 1);
179 |
180 | r = recv(pair[1], buf, sizeof(buf), 0);
181 | c_assert(r == sizeof(struct ether_arp));
182 |
183 | /* short */
184 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
185 | r = send(pair[0], buf, sizeof(struct ether_arp) - 1, 0);
186 | c_assert(r == sizeof(struct ether_arp) - 1);
187 |
188 | r = recv(pair[1], buf, sizeof(buf), 0);
189 | c_assert(r < 0);
190 | c_assert(errno == EAGAIN);
191 |
192 | /*
193 | * Send one packet before and one packet after modifying the map,
194 | * verify that the modification applies at the time of send(), not recv().
195 | */
196 | *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
197 | r = send(pair[0], buf, sizeof(struct ether_arp), 0);
198 | c_assert(r == sizeof(struct ether_arp));
199 |
200 | r = n_acd_bpf_map_remove(mapfd, &ip1);
201 | c_assert(r >= 0);
202 |
203 | r = send(pair[0], buf, sizeof(struct ether_arp), 0);
204 | c_assert(r == sizeof(struct ether_arp));
205 |
206 | r = recv(pair[1], buf, sizeof(buf), 0);
207 | c_assert(r == sizeof(struct ether_arp));
208 |
209 | r = recv(pair[1], buf, sizeof(buf), 0);
210 | c_assert(r < 0);
211 | c_assert(errno == EAGAIN);
212 |
213 | close(pair[0]);
214 | close(pair[1]);
215 | close(progfd);
216 | close(mapfd);
217 | }
218 |
219 | int main(int argc, char **argv) {
220 | test_setup();
221 |
222 | test_map();
223 | test_filter();
224 |
225 | return 0;
226 | }
227 |
--------------------------------------------------------------------------------
/src/test-loopback.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Test on loopback device
3 | * This runs the ACD engine on the loopback device, effectively testing the BPF
4 | * filter of ACD to discard its own packets. This might happen on
5 | * non-spanning-tree networks, or on networks that echo packets.
6 | */
7 |
8 | #undef NDEBUG
9 | #include
10 | #include
11 | #include "test.h"
12 |
13 | static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) {
14 | NAcdConfig *config;
15 | NAcd *acd;
16 | struct pollfd pfds;
17 | int r, fd;
18 |
19 | r = n_acd_config_new(&config);
20 | c_assert(!r);
21 |
22 | n_acd_config_set_ifindex(config, ifindex);
23 | n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
24 | n_acd_config_set_mac(config, mac, n_mac);
25 |
26 | r = n_acd_new(&acd, config);
27 | c_assert(!r);
28 |
29 | n_acd_config_free(config);
30 |
31 | {
32 | NAcdProbeConfig *probe_config;
33 | NAcdProbe *probe;
34 | struct in_addr ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) };
35 |
36 | r = n_acd_probe_config_new(&probe_config);
37 | c_assert(!r);
38 |
39 | n_acd_probe_config_set_ip(probe_config, ip);
40 | n_acd_probe_config_set_timeout(probe_config, 100);
41 |
42 | r = n_acd_probe(acd, &probe, probe_config);
43 | c_assert(!r);
44 |
45 | n_acd_probe_config_free(probe_config);
46 |
47 | n_acd_get_fd(acd, &fd);
48 |
49 | for (;;) {
50 | NAcdEvent *event;
51 | pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
52 | r = poll(&pfds, 1, -1);
53 | c_assert(r >= 0);
54 |
55 | r = n_acd_dispatch(acd);
56 | c_assert(!r);
57 |
58 | r = n_acd_pop_event(acd, &event);
59 | c_assert(!r);
60 | if (event) {
61 | c_assert(event->event == N_ACD_EVENT_READY);
62 | break;
63 | }
64 | }
65 |
66 | n_acd_probe_free(probe);
67 | }
68 |
69 | n_acd_unref(acd);
70 | }
71 |
72 | int main(int argc, char **argv) {
73 | struct ether_addr mac;
74 | int ifindex;
75 |
76 | test_setup();
77 |
78 | test_loopback_up(&ifindex, &mac);
79 | test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
80 |
81 | return 0;
82 | }
83 |
--------------------------------------------------------------------------------
/src/test-twice.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Test with unused address twice in parallel
3 | * This runs the ACD engine with an unused address on a veth pair, but it runs
4 | * it on both ends. We expect the PROBE to fail on at least one of the devices.
5 | */
6 |
7 | #undef NDEBUG
8 | #include
9 | #include
10 | #include "test.h"
11 |
12 | static void test_unused(int ifindex1, uint8_t *mac1, size_t n_mac1, int ifindex2, uint8_t *mac2, size_t n_mac2) {
13 | NAcdConfig config1 = {
14 | .ifindex = ifindex1,
15 | .transport = N_ACD_TRANSPORT_ETHERNET,
16 | .mac = mac1,
17 | .n_mac = n_mac1,
18 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
19 | .timeout_msec = 100,
20 | };
21 | NAcdConfig config2 = {
22 | .ifindex = ifindex2,
23 | .transport = N_ACD_TRANSPORT_ETHERNET,
24 | .mac = mac2,
25 | .n_mac = n_mac2,
26 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
27 | .timeout_msec = 100,
28 | };
29 | struct pollfd pfds[2];
30 | NAcd *acd1, *acd2;
31 | int r, fd1, fd2, state1, state2;
32 |
33 | r = n_acd_new(&acd1);
34 | c_assert(!r);
35 | r = n_acd_new(&acd2);
36 | c_assert(!r);
37 |
38 | n_acd_get_fd(acd1, &fd1);
39 | n_acd_get_fd(acd2, &fd2);
40 |
41 | r = n_acd_start(acd1, &config1);
42 | c_assert(!r);
43 | r = n_acd_start(acd2, &config2);
44 | c_assert(!r);
45 |
46 | for (state1 = state2 = -1; state1 == -1 || state2 == -1; ) {
47 | NAcdEvent *event;
48 | pfds[0] = (struct pollfd){ .fd = fd1, .events = (state1 == -1) ? POLLIN : 0 };
49 | pfds[1] = (struct pollfd){ .fd = fd2, .events = (state2 == -1) ? POLLIN : 0 };
50 |
51 | r = poll(pfds, sizeof(pfds) / sizeof(*pfds), -1);
52 | c_assert(r >= 0);
53 |
54 | if (state1 == -1) {
55 | r = n_acd_dispatch(acd1);
56 | c_assert(!r);
57 |
58 | r = n_acd_pop_event(acd1, &event);
59 | if (!r) {
60 | c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED);
61 | state1 = !!(event->event == N_ACD_EVENT_READY);
62 | } else {
63 | c_assert(r == N_ACD_E_DONE);
64 | }
65 | }
66 |
67 | if (state2 == -1) {
68 | r = n_acd_dispatch(acd2);
69 | c_assert(!r);
70 |
71 | r = n_acd_pop_event(acd2, &event);
72 | if (!r) {
73 | c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED);
74 | state2 = !!(event->event == N_ACD_EVENT_READY);
75 | } else {
76 | c_assert(r == N_ACD_E_DONE);
77 | }
78 | }
79 | }
80 |
81 | n_acd_free(acd1);
82 | n_acd_free(acd2);
83 |
84 | c_assert(!state1 || !state2);
85 | }
86 |
87 | int main(int argc, char **argv) {
88 | struct ether_addr mac1, mac2;
89 | int ifindex1, ifindex2;
90 |
91 | test_setup();
92 |
93 | test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
94 | test_unused(ifindex1, mac1.ether_addr_octet, sizeof(mac2.ether_addr_octet), ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
95 |
96 | return 0;
97 | }
98 |
--------------------------------------------------------------------------------
/src/test-unplug.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Unplug device during test run
3 | * Run the ACD engine with an address that is not used by anyone else on the
4 | * link, but DOWN or UNPLUG the device while running.
5 | */
6 |
7 | #undef NDEBUG
8 | #include
9 | #include
10 | #include "test.h"
11 |
12 | static void test_unplug_down(int ifindex, uint8_t *mac, size_t n_mac, unsigned int run) {
13 | NAcdConfig config = {
14 | .ifindex = ifindex,
15 | .transport = N_ACD_TRANSPORT_ETHERNET,
16 | .mac = mac,
17 | .n_mac = n_mac,
18 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
19 | .timeout_msec = 100,
20 | };
21 | struct pollfd pfds;
22 | NAcd *acd;
23 | int r, fd;
24 |
25 | if (!run--)
26 | test_veth_cmd(ifindex, "down");
27 |
28 | r = n_acd_new(&acd);
29 | c_assert(!r);
30 |
31 | if (!run--)
32 | test_veth_cmd(ifindex, "down");
33 |
34 | n_acd_get_fd(acd, &fd);
35 | r = n_acd_start(acd, &config);
36 | c_assert(!r);
37 |
38 | if (!run--)
39 | test_veth_cmd(ifindex, "down");
40 |
41 | for (;;) {
42 | NAcdEvent *event;
43 | pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
44 | r = poll(&pfds, 1, -1);
45 | c_assert(r >= 0);
46 |
47 | if (!run--)
48 | test_veth_cmd(ifindex, "down");
49 |
50 | r = n_acd_dispatch(acd);
51 | c_assert(!r);
52 |
53 | r = n_acd_pop_event(acd, &event);
54 | if (!r) {
55 | if (event->event == N_ACD_EVENT_DOWN) {
56 | break;
57 | } else {
58 | c_assert(event->event == N_ACD_EVENT_READY);
59 | test_veth_cmd(ifindex, "down");
60 | }
61 | } else {
62 | c_assert(r == N_ACD_E_DONE);
63 | }
64 | }
65 |
66 | n_acd_free(acd);
67 | }
68 |
69 | int main(int argc, char **argv) {
70 | struct ether_addr mac;
71 | unsigned int i;
72 | int ifindex;
73 |
74 | test_setup();
75 |
76 | test_veth_new(&ifindex, &mac, NULL, NULL);
77 |
78 | for (i = 0; i < 5; ++i) {
79 | test_unplug_down(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet), i);
80 | test_veth_cmd(ifindex, "up");
81 | }
82 |
83 | return 0;
84 | }
85 |
--------------------------------------------------------------------------------
/src/test-unused.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Test with unused address
3 | * Run the ACD engine with an address that is not used by anyone else on the
4 | * link. This should just pass through, with a short, random timeout.
5 | */
6 |
7 | #undef NDEBUG
8 | #include
9 | #include
10 | #include "test.h"
11 |
12 | static void test_unused(int ifindex, const uint8_t *mac, size_t n_mac) {
13 | NAcdConfig config = {
14 | .ifindex = ifindex,
15 | .transport = N_ACD_TRANSPORT_ETHERNET,
16 | .mac = mac,
17 | .n_mac = n_mac,
18 | .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
19 | .timeout_msec = 100,
20 | };
21 | struct pollfd pfds;
22 | NAcd *acd;
23 | int r, fd;
24 |
25 | r = n_acd_new(&acd);
26 | c_assert(!r);
27 |
28 | n_acd_get_fd(acd, &fd);
29 | r = n_acd_start(acd, &config);
30 | c_assert(!r);
31 |
32 | for (;;) {
33 | NAcdEvent *event;
34 | pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
35 | r = poll(&pfds, 1, -1);
36 | c_assert(r >= 0);
37 |
38 | r = n_acd_dispatch(acd);
39 | c_assert(!r);
40 |
41 | r = n_acd_pop_event(acd, &event);
42 | if (!r) {
43 | c_assert(event->event == N_ACD_EVENT_READY);
44 | break;
45 | } else {
46 | c_assert(r == N_ACD_E_DONE);
47 | }
48 | }
49 |
50 | n_acd_free(acd);
51 | }
52 |
53 | int main(int argc, char **argv) {
54 | struct ether_addr mac;
55 | int ifindex;
56 |
57 | test_setup();
58 |
59 | test_veth_new(&ifindex, &mac, NULL, NULL);
60 | test_unused(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
61 |
62 | return 0;
63 | }
64 |
--------------------------------------------------------------------------------
/src/test-veth.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Test on a veth link
3 | *
4 | * This essentially mimics a real network with two peers.
5 | *
6 | * Run one ACD context on each end of the tunnel. On one end probe for N,
7 | * addresses on the other end pre-configure N/3 of the same addresses and probe
8 | * for another N/3 of the addresses.
9 | *
10 | * Verify that in the case of simultaneous probes of the same address at most one
11 | * succeed, in the case of probing for a configured address it always fails, and
12 | * probing for a non-existent address always succeeds.
13 | *
14 | * Make sure to keep N fairly high as the protocol is probabilistic, and we also
15 | * want to verify that resizing the internal maps works correctly.
16 | */
17 |
18 | #undef NDEBUG
19 | #include
20 | #include
21 | #include "test.h"
22 |
23 | #define TEST_ACD_N_PROBES (9)
24 |
25 | typedef enum {
26 | TEST_ACD_STATE_UNKNOWN,
27 | TEST_ACD_STATE_USED,
28 | TEST_ACD_STATE_READY,
29 | } TestAcdState;
30 |
31 | static void test_veth(int ifindex1, uint8_t *mac1, size_t n_mac1,
32 | int ifindex2, uint8_t *mac2, size_t n_mac2) {
33 | NAcdConfig *config;
34 | NAcd *acd1, *acd2;
35 | NAcdProbe *probes1[TEST_ACD_N_PROBES];
36 | NAcdProbe *probes2[TEST_ACD_N_PROBES];
37 | unsigned long state1, state2;
38 | size_t n_running = 0;
39 | int r;
40 |
41 | r = n_acd_config_new(&config);
42 | c_assert(!r);
43 |
44 | n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
45 |
46 | n_acd_config_set_ifindex(config, ifindex1);
47 | n_acd_config_set_mac(config, mac1, n_mac1);
48 | r = n_acd_new(&acd1, config);
49 | c_assert(!r);
50 |
51 | n_acd_config_set_ifindex(config, ifindex2);
52 | n_acd_config_set_mac(config, mac2, n_mac2);
53 | r = n_acd_new(&acd2, config);
54 | c_assert(!r);
55 |
56 | n_acd_config_free(config);
57 |
58 | {
59 | NAcdProbeConfig *probe_config;
60 |
61 | r = n_acd_probe_config_new(&probe_config);
62 | c_assert(!r);
63 | n_acd_probe_config_set_timeout(probe_config, 1024);
64 |
65 | c_assert(TEST_ACD_N_PROBES <= 10 << 24);
66 |
67 | for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
68 | struct in_addr ip = { htobe32((10 << 24) | i) };
69 |
70 | n_acd_probe_config_set_ip(probe_config, ip);
71 |
72 | switch (i % 3) {
73 | case 0:
74 | /*
75 | * Probe on one side, and leave the address
76 | * unset on the other. The probe must succeed.
77 | */
78 | break;
79 | case 1:
80 | /*
81 | * Preconfigure the address on one side, and
82 | * probe on the other. The probe must fail.
83 | */
84 | test_add_child_ip(&ip);
85 | break;
86 | case 2:
87 | /*
88 | * Probe both sides for the same address, at
89 | * most one may succeed.
90 | */
91 |
92 | r = n_acd_probe(acd2, &probes2[i], probe_config);
93 | c_assert(!r);
94 |
95 | ++n_running;
96 | break;
97 | default:
98 | c_assert(0);
99 | abort();
100 | break;
101 | }
102 |
103 | r = n_acd_probe(acd1, &probes1[i], probe_config);
104 | c_assert(!r);
105 |
106 | ++n_running;
107 | }
108 |
109 | n_acd_probe_config_free(probe_config);
110 |
111 | while (n_running > 0) {
112 | NAcdEvent *event;
113 | struct pollfd pfds[2] = {
114 | { .events = POLLIN },
115 | { .events = POLLIN },
116 | };
117 |
118 | n_acd_get_fd(acd1, &pfds[0].fd);
119 | n_acd_get_fd(acd2, &pfds[1].fd);
120 |
121 | r = poll(pfds, 2, -1);
122 | c_assert(r >= 0);
123 |
124 | if (pfds[0].revents & POLLIN) {
125 | r = n_acd_dispatch(acd1);
126 | c_assert(!r || r == N_ACD_E_PREEMPTED);
127 |
128 | for (;;) {
129 | r = n_acd_pop_event(acd1, &event);
130 | c_assert(!r);
131 | if (event) {
132 | switch (event->event) {
133 | case N_ACD_EVENT_READY:
134 | n_acd_probe_get_userdata(event->ready.probe, (void**)&state1);
135 | c_assert(state1 == TEST_ACD_STATE_UNKNOWN);
136 | state1 = TEST_ACD_STATE_READY;
137 | n_acd_probe_set_userdata(event->ready.probe, (void*)state1);
138 |
139 | break;
140 | case N_ACD_EVENT_USED:
141 | n_acd_probe_get_userdata(event->used.probe, (void**)&state1);
142 | c_assert(state1 == TEST_ACD_STATE_UNKNOWN);
143 | state1 = TEST_ACD_STATE_USED;
144 | n_acd_probe_set_userdata(event->used.probe, (void*)state1);
145 |
146 | break;
147 | default:
148 | c_assert(0);
149 | }
150 |
151 | --n_running;
152 | } else {
153 | break;
154 | }
155 | }
156 | }
157 |
158 | if (pfds[1].revents & POLLIN) {
159 | r = n_acd_dispatch(acd2);
160 | c_assert(!r || r == N_ACD_E_PREEMPTED);
161 |
162 | for (;;) {
163 | r = n_acd_pop_event(acd2, &event);
164 | c_assert(!r);
165 | if (event) {
166 | switch (event->event) {
167 | case N_ACD_EVENT_READY:
168 | n_acd_probe_get_userdata(event->ready.probe, (void**)&state2);
169 | c_assert(state2 == TEST_ACD_STATE_UNKNOWN);
170 | state2 = TEST_ACD_STATE_READY;
171 | n_acd_probe_set_userdata(event->ready.probe, (void*)state2);
172 |
173 | break;
174 | case N_ACD_EVENT_USED:
175 | n_acd_probe_get_userdata(event->used.probe, (void**)&state2);
176 | c_assert(state2 == TEST_ACD_STATE_UNKNOWN);
177 | state2 = TEST_ACD_STATE_USED;
178 | n_acd_probe_set_userdata(event->used.probe, (void*)state2);
179 |
180 | break;
181 | default:
182 | c_assert(0);
183 | }
184 |
185 | --n_running;
186 | } else {
187 | break;
188 | }
189 | }
190 | }
191 | }
192 |
193 | for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
194 | struct in_addr ip = { htobe32((10 << 24) | i) };
195 |
196 | switch (i % 3) {
197 | case 0:
198 | n_acd_probe_get_userdata(probes1[i], (void **)&state1);
199 | c_assert(state1 == TEST_ACD_STATE_READY);
200 |
201 | break;
202 | case 1:
203 | test_del_child_ip(&ip);
204 |
205 | n_acd_probe_get_userdata(probes1[i], (void **)&state1);
206 | c_assert(state1 == TEST_ACD_STATE_USED);
207 |
208 | break;
209 | case 2:
210 | n_acd_probe_get_userdata(probes1[i], (void **)&state1);
211 | n_acd_probe_get_userdata(probes2[i], (void **)&state2);
212 | c_assert(state1 != TEST_ACD_STATE_UNKNOWN);
213 | c_assert(state2 != TEST_ACD_STATE_UNKNOWN);
214 | c_assert(state1 == TEST_ACD_STATE_USED || state2 == TEST_ACD_STATE_USED);
215 | n_acd_probe_free(probes2[i]);
216 |
217 | break;
218 | }
219 | n_acd_probe_free(probes1[i]);
220 | }
221 | }
222 |
223 | n_acd_unref(acd2);
224 | n_acd_unref(acd1);
225 | }
226 |
227 | int main(int argc, char **argv) {
228 | struct ether_addr mac1, mac2;
229 | int ifindex1, ifindex2;
230 |
231 | test_setup();
232 |
233 | test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
234 | for (unsigned int i = 0; i < 8; ++i) {
235 | test_veth(ifindex1, mac1.ether_addr_octet, sizeof(mac1.ether_addr_octet),
236 | ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
237 | }
238 |
239 | return 0;
240 | }
241 |
--------------------------------------------------------------------------------
/src/test.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | /*
4 | * Test Helpers
5 | * Bunch of helpers to setup the environment for networking tests. This
6 | * includes net-namespace setups, veth setups, and more.
7 | */
8 |
9 | #undef NDEBUG
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include "n-acd.h"
33 |
34 | static inline void test_add_child_ip(const struct in_addr *addr) {
35 | char *p;
36 | int r;
37 |
38 | r = asprintf(&p, "ip addr add dev veth1 %s/8", inet_ntoa(*addr));
39 | c_assert(r >= 0);
40 |
41 | r = system(p);
42 | c_assert(r >= 0);
43 |
44 | free(p);
45 | }
46 |
47 | static inline void test_del_child_ip(const struct in_addr *addr) {
48 | char *p;
49 | int r;
50 |
51 | r = asprintf(&p, "ip addr del dev veth1 %s/8", inet_ntoa(*addr));
52 | c_assert(r >= 0);
53 |
54 | r = system(p);
55 | c_assert(r >= 0);
56 |
57 | free(p);
58 | }
59 |
60 | static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) {
61 | struct ifreq ifr = {};
62 | size_t l;
63 | int r, s;
64 |
65 | l = strlen(name);
66 | c_assert(l <= IF_NAMESIZE);
67 |
68 | if (indexp) {
69 | *indexp = if_nametoindex(name);
70 | c_assert(*indexp > 0);
71 | }
72 |
73 | if (macp) {
74 | s = socket(AF_INET, SOCK_DGRAM, 0);
75 | c_assert(s >= 0);
76 |
77 | strncpy(ifr.ifr_name, name, l + 1);
78 | r = ioctl(s, SIOCGIFHWADDR, &ifr);
79 | c_assert(r >= 0);
80 |
81 | memcpy(macp->ether_addr_octet, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
82 |
83 | close(s);
84 | }
85 | }
86 |
87 | static inline void test_veth_cmd(int ifindex, const char *cmd) {
88 | char *p, name[IF_NAMESIZE + 1] = {};
89 | int r;
90 |
91 | p = if_indextoname(ifindex, name);
92 | c_assert(p);
93 |
94 | r = asprintf(&p, "ip link set %s %s", name, cmd);
95 | c_assert(r >= 0);
96 |
97 | /* Again: Ewwww... */
98 | r = system(p);
99 | c_assert(r == 0);
100 |
101 | free(p);
102 | }
103 |
104 | static inline void test_veth_new(int *parent_indexp,
105 | struct ether_addr *parent_macp,
106 | int *child_indexp,
107 | struct ether_addr *child_macp) {
108 | int r;
109 |
110 | /* Eww... but it works. */
111 | r = system("ip link add type veth");
112 | c_assert(r == 0);
113 | r = system("ip link set veth0 up");
114 | c_assert(r == 0);
115 | r = system("ip link set veth1 up");
116 | c_assert(r == 0);
117 |
118 | test_if_query("veth0", parent_indexp, parent_macp);
119 | test_if_query("veth1", child_indexp, child_macp);
120 | }
121 |
122 | static inline void test_loopback_up(int *indexp, struct ether_addr *macp) {
123 | int r;
124 |
125 | r = system("ip link set lo up");
126 | c_assert(r == 0);
127 |
128 | test_if_query("lo", indexp, macp);
129 | }
130 |
131 | static inline void test_raise_memlock(void) {
132 | const size_t wanted = 64 * 1024 * 1024;
133 | struct rlimit get, set;
134 | int r;
135 |
136 | r = getrlimit(RLIMIT_MEMLOCK, &get);
137 | c_assert(!r);
138 |
139 | /* try raising limit to @wanted */
140 | set.rlim_cur = wanted;
141 | set.rlim_max = (wanted > get.rlim_max) ? wanted : get.rlim_max;
142 | r = setrlimit(RLIMIT_MEMLOCK, &set);
143 | if (r) {
144 | c_assert(errno == EPERM);
145 |
146 | /* not privileged to raise limit, so maximize soft limit */
147 | set.rlim_cur = get.rlim_max;
148 | set.rlim_max = get.rlim_max;
149 | r = setrlimit(RLIMIT_MEMLOCK, &set);
150 | c_assert(!r);
151 | }
152 | }
153 |
154 | static inline void test_unshare_user_namespace(void) {
155 | uid_t euid;
156 | gid_t egid;
157 | int r, fd;
158 |
159 | /*
160 | * Enter a new user namespace as root:root.
161 | */
162 |
163 | euid = geteuid();
164 | egid = getegid();
165 |
166 | r = unshare(CLONE_NEWUSER);
167 | c_assert(r >= 0);
168 |
169 | fd = open("/proc/self/uid_map", O_WRONLY);
170 | c_assert(fd >= 0);
171 | r = dprintf(fd, "0 %d 1\n", euid);
172 | c_assert(r >= 0);
173 | close(fd);
174 |
175 | fd = open("/proc/self/setgroups", O_WRONLY);
176 | c_assert(fd >= 0);
177 | r = dprintf(fd, "deny");
178 | c_assert(r >= 0);
179 | close(fd);
180 |
181 | fd = open("/proc/self/gid_map", O_WRONLY);
182 | c_assert(fd >= 0);
183 | r = dprintf(fd, "0 %d 1\n", egid);
184 | c_assert(r >= 0);
185 | close(fd);
186 | }
187 |
188 | static inline void test_setup(void) {
189 | int r;
190 |
191 | /*
192 | * Move into a new network and mount namespace both associated
193 | * with a new user namespace where the current eUID is mapped to
194 | * 0. Then create a private instance of /run/netns. This ensures
195 | * that any network devices or network namespaces are private to
196 | * the test process.
197 | */
198 |
199 | test_raise_memlock();
200 | test_unshare_user_namespace();
201 |
202 | r = unshare(CLONE_NEWNET | CLONE_NEWNS);
203 | c_assert(r >= 0);
204 |
205 | r = mount(NULL, "/", "", MS_PRIVATE | MS_REC, NULL);
206 | c_assert(r >= 0);
207 |
208 | r = mount(NULL, "/run", "tmpfs", 0, NULL);
209 | c_assert(r >= 0);
210 |
211 | r = mkdir("/run/netns", 0755);
212 | c_assert(r >= 0);
213 | }
214 |
--------------------------------------------------------------------------------
/src/util/test-timer.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Tests for timer utility library
3 | */
4 |
5 | #undef NDEBUG
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include "timer.h"
14 |
15 | #define N_TIMEOUTS (10000)
16 |
17 | static void test_api(void) {
18 | Timer timer = TIMER_NULL(timer);
19 | Timeout t1 = TIMEOUT_INIT(t1), t2 = TIMEOUT_INIT(t2), *t;
20 | int r;
21 |
22 | r = timer_init(&timer);
23 | c_assert(!r);
24 |
25 | timeout_schedule(&t1, &timer, 1);
26 | timeout_schedule(&t2, &timer, 2);
27 |
28 | r = timer_pop_timeout(&timer, 10, &t);
29 | c_assert(!r);
30 | c_assert(t == &t1);
31 |
32 | timeout_unschedule(&t2);
33 |
34 | r = timer_pop_timeout(&timer, 10, &t);
35 | c_assert(!r);
36 | c_assert(!t);
37 |
38 | timer_deinit(&timer);
39 | }
40 |
41 | static void test_pop(void) {
42 | Timer timer = TIMER_NULL(timer);
43 | Timeout timeouts[N_TIMEOUTS] = {};
44 | uint64_t times[N_TIMEOUTS] = {};
45 | size_t n_timeouts = 0;
46 | bool armed;
47 | Timeout *t;
48 | int r;
49 |
50 | r = timer_init(&timer);
51 | c_assert(!r);
52 |
53 | for(size_t i = 0; i < N_TIMEOUTS; ++i) {
54 | timeouts[i] = (Timeout)TIMEOUT_INIT(timeouts[i]);
55 | times[i] = rand() % 128 + 1;
56 | timeout_schedule(&timeouts[i], &timer, times[i]);
57 | }
58 |
59 | armed = true;
60 |
61 | for(size_t i = 0; i <= 128; ++i) {
62 | if (armed) {
63 | struct pollfd pfd = {
64 | .fd = timer.fd,
65 | .events = POLLIN,
66 | };
67 | uint64_t count;
68 |
69 | r = poll(&pfd, 1, -1);
70 | c_assert(r == 1);
71 |
72 | r = read(timer.fd, &count, sizeof(count));
73 | c_assert(r == sizeof(count));
74 | c_assert(count == 1);
75 | armed = false;
76 | }
77 |
78 | for (;;) {
79 | uint64_t current_time;
80 |
81 | r = timer_pop_timeout(&timer, i, &t);
82 | c_assert(!r);
83 | if (!t) {
84 | timer_rearm(&timer);
85 | break;
86 | }
87 |
88 | current_time = times[t - timeouts];
89 | c_assert(current_time == i);
90 | ++n_timeouts;
91 | armed = true;
92 | }
93 | }
94 |
95 | c_assert(n_timeouts == N_TIMEOUTS);
96 |
97 | r = timer_pop_timeout(&timer, (uint64_t)-1, &t);
98 | c_assert(!r);
99 | c_assert(!t);
100 |
101 | timer_deinit(&timer);
102 | }
103 |
104 | void test_arm(void) {
105 | struct itimerspec spec = {
106 | .it_value = {
107 | .tv_sec = 1000,
108 | },
109 | };
110 | int fd1, fd2, r;
111 |
112 | fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
113 | c_assert(fd1 >= 0);
114 |
115 | fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
116 | c_assert(fd1 >= 0);
117 |
118 | r = timerfd_settime(fd1, 0, &spec, NULL);
119 | c_assert(r >= 0);
120 |
121 | r = timerfd_settime(fd2, 0, &spec, NULL);
122 | c_assert(r >= 0);
123 |
124 | r = timerfd_gettime(fd1, &spec);
125 | c_assert(r >= 0);
126 | c_assert(spec.it_value.tv_sec);
127 |
128 | r = timerfd_gettime(fd2, &spec);
129 | c_assert(r >= 0);
130 | c_assert(spec.it_value.tv_sec);
131 |
132 | spec = (struct itimerspec){};
133 |
134 | r = timerfd_settime(fd1, 0, &spec, NULL);
135 | c_assert(r >= 0);
136 |
137 | r = timerfd_gettime(fd1, &spec);
138 | c_assert(r >= 0);
139 | c_assert(!spec.it_value.tv_sec);
140 | c_assert(!spec.it_value.tv_nsec);
141 |
142 | r = timerfd_gettime(fd2, &spec);
143 | c_assert(r >= 0);
144 | c_assert(spec.it_value.tv_sec);
145 |
146 | spec = (struct itimerspec){ .it_value = { .tv_nsec = 1, }, };
147 |
148 | r = timerfd_settime(fd1, 0, &spec, NULL);
149 | c_assert(r >= 0);
150 |
151 | r = poll(&(struct pollfd) { .fd = fd1, .events = POLLIN }, 1, -1);
152 | c_assert(r == 1);
153 |
154 | r = timerfd_settime(fd2, 0, &spec, NULL);
155 | c_assert(r >= 0);
156 |
157 | r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
158 | c_assert(r == 1);
159 |
160 | spec = (struct itimerspec){};
161 |
162 | r = timerfd_settime(fd1, 0, &spec, NULL);
163 | c_assert(r >= 0);
164 |
165 | r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
166 | c_assert(r == 1);
167 |
168 | close(fd2);
169 | close(fd1);
170 | }
171 |
172 | int main(int argc, char **argv) {
173 | test_arm();
174 | test_api();
175 | test_pop();
176 | return 0;
177 | }
178 |
--------------------------------------------------------------------------------
/src/util/timer.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Timer Utility Library
3 | */
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "timer.h"
13 |
14 | int timer_init(Timer *timer) {
15 | clockid_t clock = CLOCK_BOOTTIME;
16 | int r;
17 |
18 | r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
19 | if (r < 0 && errno == EINVAL) {
20 | clock = CLOCK_MONOTONIC;
21 | r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
22 | }
23 | if (r < 0)
24 | return -errno;
25 |
26 | *timer = (Timer)TIMER_NULL(*timer);
27 | timer->fd = r;
28 | timer->clock = clock;
29 |
30 | return 0;
31 | }
32 |
33 | void timer_deinit(Timer *timer) {
34 | c_assert(c_rbtree_is_empty(&timer->tree));
35 |
36 | if (timer->fd >= 0) {
37 | close(timer->fd);
38 | timer->fd = -1;
39 | }
40 | }
41 |
42 | void timer_now(Timer *timer, uint64_t *nowp) {
43 | struct timespec ts;
44 | int r;
45 |
46 | r = clock_gettime(timer->clock, &ts);
47 | c_assert(r >= 0);
48 |
49 | *nowp = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
50 | }
51 |
52 | void timer_rearm(Timer *timer) {
53 | uint64_t time;
54 | Timeout *timeout;
55 | int r;
56 |
57 | /*
58 | * A timeout value of 0 clears the timer, we should only set that if
59 | * no timeout exists in the tree.
60 | */
61 |
62 | timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
63 | c_assert(!timeout || timeout->timeout);
64 |
65 | time = timeout ? timeout->timeout : 0;
66 |
67 | if (time != timer->scheduled_timeout) {
68 | r = timerfd_settime(timer->fd,
69 | TFD_TIMER_ABSTIME,
70 | &(struct itimerspec){
71 | .it_value = {
72 | .tv_sec = time / UINT64_C(1000000000),
73 | .tv_nsec = time % UINT64_C(1000000000),
74 | },
75 | },
76 | NULL);
77 | c_assert(r >= 0);
78 |
79 | timer->scheduled_timeout = time;
80 | }
81 | }
82 |
83 | int timer_read(Timer *timer) {
84 | uint64_t v;
85 | int r;
86 |
87 | r = read(timer->fd, &v, sizeof(v));
88 | if (r < 0) {
89 | if (errno == EAGAIN) {
90 | /*
91 | * No more pending events.
92 | */
93 | return 0;
94 | } else {
95 | /*
96 | * Something failed. We use CLOCK_BOOTTIME/MONOTONIC,
97 | * so ECANCELED cannot happen. Hence, there is no
98 | * error that we could gracefully handle. Fail hard
99 | * and let the caller deal with it.
100 | */
101 | return -errno;
102 | }
103 | } else if (r != sizeof(v) || v == 0) {
104 | /*
105 | * Kernel guarantees 8-byte reads, and only to return
106 | * data if at least one timer triggered; fail hard if
107 | * it suddenly starts doing weird shit.
108 | */
109 | return -EIO;
110 | }
111 |
112 | return TIMER_E_TRIGGERED;
113 | }
114 |
115 |
116 | int timer_pop_timeout(Timer *timer, uint64_t until, Timeout **timeoutp) {
117 | Timeout *timeout;
118 |
119 | /*
120 | * If the first timeout is scheduled before @until, then unlink
121 | * it and return it. Otherwise, return NULL.
122 | */
123 | timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
124 | if (timeout && timeout->timeout <= until) {
125 | c_rbnode_unlink(&timeout->node);
126 | timeout->timeout = 0;
127 | *timeoutp = timeout;
128 | } else {
129 | *timeoutp = NULL;
130 | }
131 |
132 | return 0;
133 | }
134 |
135 | void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time) {
136 | c_assert(time);
137 |
138 | /*
139 | * In case @timeout was already scheduled, remove it from the
140 | * tree. If we are moving it to a new timer, rearm the old one.
141 | */
142 | if (timeout->timer) {
143 | c_rbnode_unlink(&timeout->node);
144 | if (timeout->timer != timer)
145 | timer_rearm(timeout->timer);
146 | }
147 | timeout->timer = timer;
148 | timeout->timeout = time;
149 |
150 | /*
151 | * Now insert it back into the tree in the correct new position.
152 | * We allow duplicates in the tree, so this insertion is open-coded.
153 | */
154 | {
155 | Timeout *other;
156 | CRBNode **slot, *parent;
157 |
158 | slot = &timer->tree.root;
159 | parent = NULL;
160 | while (*slot) {
161 | other = c_rbnode_entry(*slot, Timeout, node);
162 | parent = *slot;
163 | if (timeout->timeout < other->timeout)
164 | slot = &(*slot)->left;
165 | else
166 | slot = &(*slot)->right;
167 | }
168 |
169 | c_rbtree_add(&timer->tree, parent, slot, &timeout->node);
170 | }
171 |
172 | /*
173 | * Rearm the timer as we updated the timeout tree.
174 | */
175 | timer_rearm(timer);
176 | }
177 |
178 | void timeout_unschedule(Timeout *timeout) {
179 | Timer *timer = timeout->timer;
180 |
181 | if (!timer)
182 | return;
183 |
184 | c_rbnode_unlink(&timeout->node);
185 | timeout->timeout = 0;
186 | timeout->timer = NULL;
187 |
188 | timer_rearm(timer);
189 | }
190 |
--------------------------------------------------------------------------------
/src/util/timer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | typedef struct Timer Timer;
11 | typedef struct Timeout Timeout;
12 |
13 | enum {
14 | _TIMER_E_SUCCESS,
15 |
16 | TIMER_E_TRIGGERED,
17 |
18 | _TIMER_E_N,
19 | };
20 |
21 | struct Timer {
22 | int fd;
23 | clockid_t clock;
24 | CRBTree tree;
25 | uint64_t scheduled_timeout;
26 | };
27 |
28 | #define TIMER_NULL(_x) { \
29 | .fd = -1, \
30 | .tree = C_RBTREE_INIT, \
31 | }
32 |
33 | struct Timeout {
34 | Timer *timer;
35 | CRBNode node;
36 | uint64_t timeout;
37 | };
38 |
39 | #define TIMEOUT_INIT(_x) { \
40 | .node = C_RBNODE_INIT((_x).node), \
41 | }
42 |
43 | int timer_init(Timer *timer);
44 | void timer_deinit(Timer *timer);
45 |
46 | void timer_now(Timer *timer, uint64_t *nowp);
47 |
48 | int timer_pop_timeout(Timer *timer, uint64_t now, Timeout **timerp);
49 | void timer_rearm(Timer *timer);
50 | int timer_read(Timer *timer);
51 |
52 | void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time);
53 | void timeout_unschedule(Timeout *timeout);
54 |
55 |
--------------------------------------------------------------------------------