├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .mdlrc
├── .packit.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── include
    ├── common.h
    ├── jhash.h
    ├── listener.h
    ├── log.h
    ├── msgbuf-struct.h
    ├── output.h
    ├── threads.h
    └── worker.h
├── listener.c
├── main.c
├── modules
    ├── Makefile
    ├── logger.cc
    └── printer.c
├── ncrx
    ├── Makefile
    ├── libncrx.c
    ├── ncrx-struct.h
    ├── ncrx.c
    ├── ncrx.h
    ├── nctx.c
    └── netcons-gen.py
├── output.c
├── threads.c
├── util
    ├── Makefile
    └── netconsblaster.c
└── worker.c


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Continuous Integration
 2 | on:
 3 |   push:
 4 |     branches: [main]
 5 |   pull_request:
 6 | jobs:
 7 |   build:
 8 |     name: Build netconsd
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         include:
13 |           - cc: gcc
14 |             cxx: g++
15 |           - cc: clang
16 |             cxx: clang++
17 |     env:
18 |       CC: ${{ matrix.cc }}
19 |       CXX: ${{ matrix.cxx }}
20 |     steps:
21 |     - name: Checkout repository
22 |       uses: actions/checkout@v2
23 |     - name: Build netconsd
24 |       run: make
25 |     - name: Build netconsblaster
26 |       run: make -C util
27 |   markdown:
28 |     name: Markdown
29 |     runs-on: ubuntu-latest
30 |     steps:
31 |     - name: Checkout repository
32 |       uses: actions/checkout@v2
33 |     - name: Lint Markdown
34 |       uses: actionshub/markdownlint@2.0.2
35 |     - name: Check links
36 |       uses: gaurav-nelson/github-action-markdown-link-check@1.0.13
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.d
 2 | *.o
 3 | netconsd
 4 | modules/*.so
 5 | util/netconsblaster
 6 | /netconsd-*.tar.gz
 7 | /netconsd-*.src.rpm
 8 | /netconsd.spec
 9 | Cargo.lock
10 | /target
11 | libnetconsd.a
12 | 


--------------------------------------------------------------------------------
/.mdlrc:
--------------------------------------------------------------------------------
1 | rules '~MD013', '~MD014', '~MD029', '~MD034'
2 | 


--------------------------------------------------------------------------------
/.packit.yaml:
--------------------------------------------------------------------------------
 1 | # See the documentation for more information:
 2 | # https://packit.dev/docs/configuration/
 3 | 
 4 | specfile_path: netconsd.spec
 5 | files_to_sync:
 6 |   - netconsd.spec
 7 |   - .packit.yaml
 8 | 
 9 | upstream_package_name: netconsd
10 | downstream_package_name: netconsd
11 | actions:
12 |   # Fetch the specfile from Rawhide, drop any patches and disable rpmautospec
13 |   post-upstream-clone: "bash -c \"curl -s https://src.fedoraproject.org/rpms/netconsd/raw/main/f/netconsd.spec | sed -e '/^Patch[0-9]/d' -e '/^%autochangelog$/d' > netconsd.spec\""
14 | 
15 | srpm_build_deps:
16 |   - bash
17 |   - curl
18 |   - sed
19 | 
20 | jobs:
21 | - job: copr_build
22 |   trigger: commit
23 |   owner: "@meta"
24 |   project: netconsd
25 |   targets:
26 |     - fedora-all-aarch64
27 |     - fedora-all-i386
28 |     - fedora-all-ppc64le
29 |     - fedora-all-s390x
30 |     - fedora-all-x86_64
31 |     - fedora-eln-aarch64
32 |     - fedora-eln-i386
33 |     - fedora-eln-ppc64le
34 |     - fedora-eln-s390x
35 |     - fedora-eln-x86_64
36 |     - epel-8-aarch64
37 |     - epel-8-ppc64le
38 |     - epel-8-s390x
39 |     - epel-8-x86_64
40 |     - epel-9-aarch64
41 |     - epel-9-ppc64le
42 |     - epel-9-s390x
43 |     - epel-9-x86_64
44 | - job: copr_build
45 |   trigger: pull_request
46 |   owner: "@meta"
47 |   project: netconsd
48 |   targets:
49 |     - fedora-all-aarch64
50 |     - fedora-all-i386
51 |     - fedora-all-ppc64le
52 |     - fedora-all-s390x
53 |     - fedora-all-x86_64
54 |     - fedora-eln-aarch64
55 |     - fedora-eln-i386
56 |     - fedora-eln-ppc64le
57 |     - fedora-eln-s390x
58 |     - fedora-eln-x86_64
59 |     - epel-8-aarch64
60 |     - epel-8-ppc64le
61 |     - epel-8-s390x
62 |     - epel-8-x86_64
63 |     - epel-9-aarch64
64 |     - epel-9-ppc64le
65 |     - epel-9-s390x
66 |     - epel-9-x86_64
67 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at <opensource-conduct@fb.com>. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to netconsd
 2 | 
 3 | We want to make contributing to this project as easy and transparent as
 4 | possible.
 5 | 
 6 | ## Our Development Process
 7 | 
 8 | This repository is synced from an internal repository. We gladly accept
 9 | pull requests and will deal with the merging appropriately.
10 | 
11 | ## Contributor License Agreement ("CLA")
12 | 
13 | In order to accept your pull request, we need you to submit a CLA. You only
14 | need to do this once to work on any of Facebook's open source projects.
15 | 
16 | Complete your CLA here: <https://code.facebook.com/cla>
17 | 
18 | ## Issues
19 | 
20 | We use GitHub issues to track public bugs. Please ensure your description is
21 | clear and has sufficient instructions to be able to reproduce the issue.
22 | 
23 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the
24 | safe disclosure of security bugs. In those cases, please go through the
25 | process outlined on that page and do not file a public issue.
26 | 
27 | ## Sending a pull request
28 | 
29 | Have a fix or feature? Awesome! When you send the pull request we suggest you
30 | include a build output.
31 | 
32 | We will hold all contributions to the same quality and style standards as the
33 | existing code.
34 | 
35 | ## License
36 | 
37 | By contributing to this repository, you agree that your contributions will be
38 | licensed in accordance to the LICENSE document in the root of this repository.
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC ?= gcc
 2 | 
 3 | LIBS = -lpthread
 4 | CFLAGS ?= -O2 -fPIC
 5 | CFLAGS += -D_GNU_SOURCE -fno-strict-aliasing -Wall -Wextra \
 6 |           -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations \
 7 |           -Wdeclaration-after-statement -Wno-missing-field-initializers \
 8 |           -Wno-unused-parameter
 9 | CPPFLAGS ?=
10 | INCLUDES = -Incrx
11 | 
12 | UNAME := $(shell uname)
13 | ifneq ($(UNAME), OpenBSD)
14 | LIBS += -lrt -ldl
15 | endif
16 | 
17 | debug debug32: CFLAGS += -O0 -gdwarf-4 -fno-omit-frame-pointer \
18 | 	                 -fstack-protector-all -fsanitize=address \
19 |                          -fsanitize=undefined
20 | debug debug32: LDFLAGS ?= -lasan -lubsan
21 | 
22 | 32bit: CFLAGS += -m32
23 | 32bit: LDFLAGS ?= -m32
24 | 
25 | disasm: CFLAGS += -fverbose-asm
26 | 
27 | binary = netconsd
28 | lib = ncrx/libncrx.o
29 | liball = libnetconsd.a
30 | obj = threads.o listener.o worker.o output.o main.o
31 | rlibobj = threads.o listener.o worker.o output.o
32 | asm = $(obj:.o=.s)
33 | 
34 | all: $(binary) mods
35 | rlib: $(liball)
36 | 32bit: $(binary) mods
37 | 
38 | debug: all
39 | debug32: 32bit
40 | disasm: $(asm)
41 | 
42 | -include $(obj:.o=.d)
43 | 
44 | $(binary): $(lib) $(obj)
45 | 	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(lib) $(obj) $(LIBS) -o $@
46 | 
47 | $(liball): $(rlibobj) $(lib)
48 | 	ar rc $@ $(rlibobj) $(lib)
49 | 
50 | %.o: %.c
51 | 	$(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $@
52 | 	$(CC) -MM $< $(INCLUDES) > $(@:.o=.d)
53 | 
54 | %.s: %.c
55 | 	$(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -S -o $@
56 | 
57 | $(lib):
58 | 	$(MAKE) -e -C ncrx
59 | 
60 | mods:
61 | 	$(MAKE) -e -C modules
62 | 
63 | utils:
64 | 	$(MAKE) -e -C util
65 | 
66 | clean:
67 | 	rm -f netconsd *.o *.d *.s
68 | 	rm -f modules/*.o modules/*.so
69 | 	rm -f ncrx/*.o ncrx/*.d
70 | 	rm -f util/netconsblaster
71 | 	rm -f libnetconsd.a
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Netconsd: The Netconsole Daemon
  2 | 
  3 | [![Continuous Integration](https://github.com/facebook/netconsd/workflows/Continuous%20Integration/badge.svg?event=push)](https://github.com/facebook/netconsd/actions?query=workflow%3A%22Continuous+Integration%22)
  4 | 
  5 | This is a daemon for receiving and processing logs from the Linux Kernel, as
  6 | emitted over a network by the kernel's netconsole module. It supports both the
  7 | old "legacy" text-only format, and the new extended format added in v4.4.
  8 | 
  9 | The core of the daemon does nothing but process messages and drop them: in order
 10 | to make the daemon useful, the user must supply one or more "output modules".
 11 | These modules are shared object files which expose a small ABI that is called by
 12 | netconsd with the content and metadata for netconsole messages it receives.
 13 | 
 14 | This README explains how to build netconsd and use it with one of the existing
 15 | output modules in the modules/ directory. The end discusses how to write your
 16 | own custom output module.
 17 | 
 18 | ## Building netconsd
 19 | 
 20 | The default Makefile target intended for production use has no external
 21 | dependencies besides glibc. To build it, just say `make` (or `gmake` on BSD):
 22 | you'll end up with a single executable in this directory called `netconsd`, and
 23 | a `*.so` file for every module in the `modules/` directory.
 24 | 
 25 | The Makefile includes a few other handy targets:
 26 | 
 27 | * `debug`: Adds the usual debug flags, and also enables the ASAN and
 28 |            UBSAN sanitizers. You'll need to install libasan/libubsan on
 29 |            your system to build this target and run the binaries.
 30 | * `32bit`: Forces 32-bit compilation on x86_64 systems, for easily
 31 |            testing portability to 32-bit CPU architectures. You'll need
 32 |            to install 32-bit libraries if your distro doesn't have them.
 33 | * `debug32`: Union of the `32bit` and `debug` targets.
 34 | * `disasm`: Emits verbose annotated disassembly in `*.s` files.
 35 | 
 36 | If you want to build the daemon with clang, just append `CC="clang"` to your
 37 | make invocation. All the above targets should build with both clang and gcc.
 38 | 
 39 | ## Running netconsd
 40 | 
 41 | ### Setting up the server
 42 | 
 43 | By default, netconsd will use 1 listener and 2 worker threads, and listen on
 44 | port 1514 for messages. You can use `-l`, `-w`, and `-u` respectively to change
 45 | the defaults.
 46 | 
 47 | There's no universal wisdom about how many threads to use: just experiment with
 48 | different numbers and use netconsblaster to load up the server. Both the blaster
 49 | and the server will print how many packets they sent/processed.
 50 | 
 51 | If you run out of memory and OOM, you need more workers; if you see messages
 52 | being dropped, you need more listeners. The tuning here will obviously depend on
 53 | what your output module does: make sure to pass it when you do your testing.
 54 | 
 55 | For the simplest setup, just run:
 56 | 
 57 | ```
 58 | $ make -s
 59 | $ ./netconsd ./modules/printer.so
 60 | ```
 61 | 
 62 | Netconsd will listen on `INADDR_ANY` and `IN6ADDR_ANY`, unless you pass a
 63 | specific IPv4 or IPv6 address to listen on using the `-a` argument.
 64 | 
 65 | Note that some systems (at least, OpenBSD) do not allow dual stack sockets at
 66 | all, so as currently written netconsd is only capable of receiving IPv6
 67 | netconsole packets on those systems.
 68 | 
 69 | ### Setting up the client
 70 | 
 71 | The netconsole module takes a parameter like this:
 72 | 
 73 | ```
 74 | netconsole=[+][r]${sport}@${saddr}/${intf},${dport}@${daddr}/${dmac}
 75 | ```
 76 | 
 77 | The fields are as follows:
 78 | 
 79 | 1. `sport`: Source port for the netconsole UDP packets
 80 | 2. `saddr`: Source address for the netconsole UDP packets
 81 | 3. `intf`: The name of the interface to send the UDP packets from
 82 | 4. `dport`: Destination port for the netconsole UDP packets
 83 | 5. `daddr`: Destination address for the netconsole UDP packets
 84 | 6. `dmac`: Destination L2 MAC address for the netconsole UDP packets
 85 | 
 86 | We need (6) because of how low-level netconsole is: it can't consult the routing
 87 | table to send the packet, so it must know a priori what MAC address to use in
 88 | the Ethernet frame it builds.
 89 | 
 90 | If you're talking to a server on the same L2 segment as the client, use the MAC
 91 | address of that server. Otherwise, use the MAC address of your router. You can
 92 | use the following quick shell one-liners to easily get the MAC of the router:
 93 | 
 94 | * IPv6: `ip -6 neighbor show | grep router`
 95 | * IPv4: `sudo arp –a | grep gateway`
 96 | 
 97 | Here are a couple examples for the parameter above:
 98 | 
 99 | ```
100 | IPv6: netconsole=+r6666@2401:db00:11:801e:face:0:31:0/eth0,1514@2401:db00:11:d0be:face:0:1b:0/c0:8c:60:3d:0d:bc
101 | IPv4: netconsole=6666@192.168.0.22/eth0,1514@192.168.0.1/00:00:0c:9f:f1:90
102 | ```
103 | 
104 | Prepending `+` to the cmdline will cause kernels that support it to use extended
105 | netconsole, which you almost certainly want. Kernels too old to support extcon
106 | will silently ignore the `+`.
107 | 
108 | Adding the `r` to the command line will cause netcons to emit the kernel
109 | release version in the first field of the extended message. For that, you need
110 | to have extended log (extcon) enabled.
111 | 
112 | Once you have your parameter constructed, just insert the module with it:
113 | 
114 | ```
115 | $ sudo modprobe netconsole netconsole=+r6666@2401:db00:11:801e:face:0:31:0/eth0,1514@2401:db00:11:d0be:face:0:1b:0/c0:8c:60:3d:0d:bc
116 | ```
117 | 
118 | You're good to go!
119 | 
120 | ### Testing on the client
121 | 
122 | Now that everything is running, you can use `/dev/kmsg` to write some logs:
123 | 
124 | ```
125 | $ sudo bash -c 'echo "Hello world!" > /dev/kmsg'
126 | $ sudo bash -c 'echo "<0>OMG!" > /dev/kmsg'
127 | ```
128 | 
129 | The `<0>` tells the kernel what loglevel to use: 0 is `KERN_EMERG`, which ensures
130 | your message will actually get transmitted.
131 | 
132 | ## Writing an output module
133 | 
134 | ### Interface to netconsd
135 | 
136 | Output modules are shared object files loaded with `dlopen()` at runtime by
137 | netconsd. Netconsd will look for three functions in your module:
138 | 
139 | 1. `int netconsd_output_init(int worker_thread_count)`
140 | 2. `void netconsd_output_handler(int thread, struct in6_addr *src, struct msgbuf *buf, struct ncrx_msg *msg)`
141 | 3. `void netconsd_output_exit(void)`
142 | 
143 | If (1) exists, it is called when your module is loaded: the argument tells you
144 | how many worker threads netconsd is going to call your module from. If you
145 | return non-zero from this function, netconsd will `abort()` and exit.
146 | 
147 | If (3) exists, it is called when netconsd unloads your module.
148 | 
149 | For every message it receives, netconsd will call (2) in your module. The code
150 | must be reentrant: `netconsd_output_handler()` will be called concurrently from
151 | all of the worker threads in netconsd. The `thread` argument tells you which
152 | worker is invoking the function, which makes it easy to have per-thread data.
153 | 
154 | Netconsd uses a consistent hash to decide which worker to pass messages to, so
155 | messages from same remote address will always be queued to the same thread.
156 | 
157 | The `src` argument will always point to an `in6_addr` struct containing the source
158 | address of the netconsole packet. If the source was an IPv4 address, it will be
159 | formatted like `::FFFF:<IPv4 address>` (see `man ipv6` for details).
160 | 
161 | If the message had extended metadata, `msg` will point to the `ncrx_msg` struct
162 | containing that metadata and `buf` will be `NULL`. Otherwise, `msg` will be `NULL`
163 | and `buf` will point to a `msgbuf` struct with the raw message text.
164 | 
165 | Output modules must not modify the structures passed in. The memory backing all
166 | the pointers passed in will be freed immediately after the handler returns.
167 | 
168 | ### Building the modules
169 | 
170 | For modules written in C this is trivial: just compile with `-shared`.
171 | 
172 | For modules written in C++ it can be a bit trickier: you will probably need to
173 | build with `-static-libstdc++` and/or `-static-libgcc` to make this work.
174 | 
175 | See the code and Makefile in `modules/` for some examples of the above.
176 | 
177 | ## Contributing
178 | 
179 | See the CONTRIBUTING file for how to help out.
180 | 
181 | ## License
182 | 
183 | netconsd is BSD licensed, see the LICENSE file for more information.
184 | 
185 | netconsd was originally written by Calvin Owens as part of
186 | [fbkutils](https://github.com/facebookarchive/fbkutils) in 2016, with later
187 | contributions by several other people. The ncrx library was originally written
188 | by Tejun Heo. This repository is a direct continuation of that codebase.
189 | 


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __COMMON_H__
 9 | #define __COMMON_H__
10 | 
11 | #include <stdlib.h>
12 | #include <stdint.h>
13 | #include <time.h>
14 | #include <string.h>
15 | #include <errno.h>
16 | #include <unistd.h>
17 | #include <sys/socket.h>
18 | #include <netinet/in.h>
19 | 
20 | #include "log.h"
21 | #include "jhash.h"
22 | 
23 | #define min(x, y) ({							\
24 | 	typeof(x) _min1 = (x);						\
25 | 	typeof(y) _min2 = (y);						\
26 | 	(void) (&_min1 == &_min2);					\
27 | 	_min1 < _min2 ? _min1 : _min2; })
28 | 
29 | #define max(x, y) ({							\
30 | 	typeof(x) _max1 = (x);						\
31 | 	typeof(y) _max2 = (y);						\
32 | 	(void) (&_max1 == &_max2);					\
33 | 	_max1 > _max2 ? _max1 : _max2; })
34 | 
35 | #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
36 | 
37 | #define container_of(ptr, type, member) ({			\
38 | 	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
39 | 	(type *)( (char *)__mptr - __builtin_offsetof(type,member) );})
40 | 
41 | static inline void *zalloc(size_t n)
42 | {
43 | 	return calloc(1, n);
44 | }
45 | 
46 | #define assert_pthread_mutex_locked(m)					\
47 | do {									\
48 | 	fatal_on(pthread_mutex_trylock(m) != EBUSY, "UNLOCKED!\n");	\
49 | } while (0)
50 | 
51 | static inline uint64_t now_ms(clockid_t clock)
52 | {
53 | 	struct timespec t;
54 | 	int ret;
55 | 
56 | 	ret = clock_gettime(clock, &t);
57 | 	fatal_on(ret, "Oops, clock_gettime() barfed: %m (-%d)\n", errno);
58 | 
59 | 	return t.tv_sec * 1000LL + t.tv_nsec / 1000000L;
60 | }
61 | 
62 | static inline uint64_t now_mono_ms(void)
63 | {
64 | 	return now_ms(CLOCK_MONOTONIC);
65 | }
66 | 
67 | static inline uint64_t now_real_ms(void)
68 | {
69 | 	return now_ms(CLOCK_REALTIME);
70 | }
71 | 
72 | struct netconsd_params {
73 | 	int nr_workers;
74 | 	int nr_listeners;
75 | 	int mmsg_batch;
76 | 	unsigned int gc_int_ms;
77 | 	unsigned int gc_age_ms;
78 | 	struct sockaddr_in6 listen_addr;
79 | };
80 | 
81 | #endif /* __COMMON_H__ */
82 | 


--------------------------------------------------------------------------------
/include/jhash.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * Lifted from 4.4 Linux kernel source. Alterations for netconsd:
  5 |  *	- Pulled in rol32() from linux/bitops.h
  6 |  *	- Use stdint fixed-width types instead of kernel shorthand types
  7 |  *	- Deleted unaligned jhash() because we don't use it and C++ hates it.
  8 |  */
  9 | 
 10 | #ifndef _LINUX_JHASH_H
 11 | #define _LINUX_JHASH_H
 12 | 
 13 | /* jhash.h: Jenkins hash support.
 14 |  *
 15 |  * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
 16 |  *
 17 |  * http://burtleburtle.net/bob/hash/
 18 |  *
 19 |  * These are the credits from Bob's sources:
 20 |  *
 21 |  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
 22 |  *
 23 |  * These are functions for producing 32-bit hashes for hash table lookup.
 24 |  * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
 25 |  * are externally useful functions.  Routines to test the hash are included
 26 |  * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
 27 |  * the public domain.  It has no warranty.
 28 |  *
 29 |  * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
 30 |  *
 31 |  * I've modified Bob's hash to be useful in the Linux kernel, and
 32 |  * any bugs present are my fault.
 33 |  * Jozsef
 34 |  */
 35 | 
 36 | #include <stdint.h>
 37 | 
 38 | static inline uint32_t rol32(uint32_t word, unsigned int shift)
 39 | {
 40 | 	return (word << shift) | (word >> (32 - shift));
 41 | }
 42 | 
 43 | /* Best hash sizes are of power of two */
 44 | #define jhash_size(n)   ((uint32_t)1<<(n))
 45 | /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
 46 | #define jhash_mask(n)   (jhash_size(n)-1)
 47 | 
 48 | /* __jhash_mix -- mix 3 32-bit values reversibly. */
 49 | #define __jhash_mix(a, b, c)			\
 50 | {						\
 51 | 	a -= c;  a ^= rol32(c, 4);  c += b;	\
 52 | 	b -= a;  b ^= rol32(a, 6);  a += c;	\
 53 | 	c -= b;  c ^= rol32(b, 8);  b += a;	\
 54 | 	a -= c;  a ^= rol32(c, 16); c += b;	\
 55 | 	b -= a;  b ^= rol32(a, 19); a += c;	\
 56 | 	c -= b;  c ^= rol32(b, 4);  b += a;	\
 57 | }
 58 | 
 59 | /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
 60 | #define __jhash_final(a, b, c)			\
 61 | {						\
 62 | 	c ^= b; c -= rol32(b, 14);		\
 63 | 	a ^= c; a -= rol32(c, 11);		\
 64 | 	b ^= a; b -= rol32(a, 25);		\
 65 | 	c ^= b; c -= rol32(b, 16);		\
 66 | 	a ^= c; a -= rol32(c, 4);		\
 67 | 	b ^= a; b -= rol32(a, 14);		\
 68 | 	c ^= b; c -= rol32(b, 24);		\
 69 | }
 70 | 
 71 | /*
 72 |  * Arbitrary initial parameters
 73 |  */
 74 | #define JHASH_INITVAL	0xdeadbeef
 75 | #define LISTEN_SEED	0xfaceb00c
 76 | #define WORKER_SEED	0xb00cface
 77 | 
 78 | /* jhash2 - hash an array of uint32_t's
 79 |  * @k: the key which must be an array of uint32_t's
 80 |  * @length: the number of uint32_t's in the key
 81 |  * @initval: the previous hash, or an arbitray value
 82 |  *
 83 |  * Returns the hash value of the key.
 84 |  */
 85 | static inline __attribute__((pure)) uint32_t jhash2(const uint32_t *k,
 86 | 		uint32_t length, uint32_t initval)
 87 | {
 88 | 	uint32_t a, b, c;
 89 | 
 90 | 	/* Set up the internal state */
 91 | 	a = b = c = JHASH_INITVAL + (length<<2) + initval;
 92 | 
 93 | 	/* Handle most of the key */
 94 | 	while (length > 3) {
 95 | 		a += k[0];
 96 | 		b += k[1];
 97 | 		c += k[2];
 98 | 		__jhash_mix(a, b, c);
 99 | 		length -= 3;
100 | 		k += 3;
101 | 	}
102 | 
103 | 	/* Handle the last 3 uint32_t's: all the case statements fall through */
104 | 	switch (length) {
105 | 	case 3: c += k[2];	__attribute__((fallthrough));
106 | 	case 2: b += k[1];	__attribute__((fallthrough));
107 | 	case 1: a += k[0];
108 | 		__jhash_final(a, b, c);
109 | 		break;
110 | 	case 0:	/* Nothing left to add */
111 | 		break;
112 | 	}
113 | 
114 | 	return c;
115 | }
116 | 
117 | #endif /* _LINUX_JHASH_H */
118 | 


--------------------------------------------------------------------------------
/include/listener.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __LISTENER_H__
 9 | #define __LISTENER_H__
10 | 
11 | #include "threads.h"
12 | #include <pthread.h>
13 | 
14 | #define RCVBUF_SIZE	1024
15 | 
16 | struct ncrx_worker;
17 | 
18 | struct ncrx_prequeue {
19 | 	struct msg_buf *queue_head;
20 | 	struct msg_buf *queue_tail;
21 | 	int count;
22 | };
23 | 
24 | struct ncrx_listener {
25 | 	pthread_t id;
26 | 	int thread_nr;
27 | 	struct ncrx_prequeue *prequeues;
28 | 	struct ncrx_worker *workers;
29 | 	int nr_workers;
30 | 	int batch;
31 | 	uint64_t processed;
32 | 	struct sockaddr_in6 *address;
33 | 
34 | 	/*
35 | 	 * Flags
36 | 	 */
37 | 	unsigned stop:1;
38 | };
39 | 
40 | void *udp_listener_thread(void *arg);
41 | 
42 | #endif /* __LISTENER_H__ */
43 | 


--------------------------------------------------------------------------------
/include/log.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | #ifndef __LOG_H__
 8 | #define __LOG_H__
 9 | 
10 | #include <stdio.h>
11 | #include <errno.h>
12 | 
13 | #define LOGPFX "[fb-netconsd] "
14 | 
15 | #define S(x) #x
16 | #define S_(x) S(x)
17 | #define S__LINE__ S_(__LINE__)
18 | 
19 | #define __log(pfx, ...) \
20 | do { \
21 | 	printf(LOGPFX __FILE__ ":" S__LINE__ ": " pfx __VA_ARGS__); \
22 | 	fflush(stdout); \
23 | } while (0)
24 | 
25 | #define fatal(...) \
26 | do { \
27 | 	__log("FATAL: ", __VA_ARGS__); \
28 | 	abort(); \
29 | } while (0)
30 | 
31 | #define warn(...) \
32 | do { \
33 | 	__log("WARNING: ", __VA_ARGS__); \
34 | } while (0)
35 | 
36 | #define log(...) \
37 | do { \
38 | 	__log("INFO: ", __VA_ARGS__); \
39 | } while (0)
40 | 
41 | #ifdef DEBUG
42 | #define debug(...) \
43 | do { \
44 | 	__log("DEBUG: ", __VA_ARGS__); \
45 | } while (0)
46 | #else
47 | #define debug(...) do {} while (0)
48 | #endif
49 | 
50 | #define fatal_on(cond, ...) \
51 | do { \
52 | 	if (__builtin_expect(cond, 0)) { \
53 | 		fatal(__VA_ARGS__); \
54 | 	} \
55 | } while (0)
56 | 
57 | #define log_once(...) \
58 | do { \
59 | 	static int _t; \
60 | 	if (__builtin_expect(!_t, 0)) { \
61 | 		log(__VA_ARGS__); \
62 | 		_t = -1; \
63 | 	} \
64 | } while (0)
65 | 
66 | #define log_every(n, ...) \
67 | do { \
68 | 	static int _t = 1; \
69 | 	if (!(_t % n), 0) \
70 | 		log(__VA_ARGS__); \
71 | 	_t++; \
72 | } while (0)
73 | 
74 | #endif /* __LOG_H__ */
75 | 


--------------------------------------------------------------------------------
/include/msgbuf-struct.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __MSGBUF_STRUCT_H__
 9 | #define __MSGBUF_STRUCT_H__
10 | 
11 | #include <unistd.h>
12 | #include <netinet/in.h>
13 | #include <sys/socket.h>
14 | #include <arpa/inet.h>
15 | 
16 | #ifdef __cplusplus
17 | #define __cpp extern "C"
18 | #else
19 | #define __cpp
20 | #endif
21 | 
22 | struct ncrx_msg;
23 | 
24 | struct msg_buf {
25 | 	struct msg_buf *next;
26 | 
27 | 	struct iovec iovec;
28 | 	struct sockaddr_in6 src;
29 | 	uint64_t rcv_time;
30 | 	int rcv_flags;
31 | 	int rcv_bytes;
32 | 
33 | 	char buf[];
34 | };
35 | 
36 | __cpp int netconsd_output_init(int nr_workers);
37 | __cpp void netconsd_output_exit(void);
38 | __cpp void netconsd_output_handler(int t, struct in6_addr *src,
39 | 				   struct msg_buf *b, struct ncrx_msg *m);
40 | 
41 | #endif /* __MSGBUF_STRUCT_H__ */
42 | 


--------------------------------------------------------------------------------
/include/output.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __OUTPUT_H__
 9 | #define __OUTPUT_H__
10 | 
11 | #include <ncrx-struct.h>
12 | 
13 | #include "msgbuf-struct.h"
14 | 
15 | #define MAXOUTS 32
16 | 
17 | int register_output_module(char *path, int nr_workers);
18 | void destroy_output_modules(void);
19 | 
20 | void execute_output_pipeline(int thread_nr, struct in6_addr *src,
21 | 		struct msg_buf *buf, struct ncrx_msg *msg);
22 | 
23 | #endif /* __OUTPUT_H__ */
24 | 


--------------------------------------------------------------------------------
/include/threads.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __NCRX_THREADS_H__
 9 | #define __NCRX_THREADS_H__
10 | 
11 | #include "msgbuf-struct.h"
12 | #include "common.h"
13 | 
14 | struct tctl;
15 | struct ncrx_listener;
16 | 
17 | void enqueue_and_wake_all(struct ncrx_listener *listener);
18 | struct tctl *create_threads(struct netconsd_params *p);
19 | void destroy_threads(struct tctl *ctl);
20 | 
21 | #endif /* __NCRX_THREADS_H__ */
22 | 


--------------------------------------------------------------------------------
/include/worker.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __WORKER_H__
 9 | #define __WORKER_H__
10 | 
11 | #include <pthread.h>
12 | 
13 | #include "msgbuf-struct.h"
14 | 
15 | /*
16 |  * How long to wait for messages before giving up, in milliseconds
17 |  */
18 | #define NETCONS_RTO 200
19 | 
20 | struct hashtable;
21 | struct timerlist;
22 | 
23 | struct ncrx_worker {
24 | 	struct msg_buf *queue_head;
25 | 	struct msg_buf *queue_tail;
26 | 
27 | 	pthread_t id;
28 | 	pthread_condattr_t condattr;
29 | 	pthread_cond_t cond;
30 | 	pthread_mutex_t queuelock;
31 | 	int nr_queued;
32 | 
33 | 	struct hashtable *ht;
34 | 	struct timerlist *tlist;
35 | 	struct timespec wake;
36 | 
37 | 	unsigned int gc_age_ms;
38 | 	unsigned int gc_int_ms;
39 | 	uint64_t lastgc;
40 | 
41 | 	uint64_t processed;
42 | 	uint64_t hosts_seen;
43 | 	int thread_nr;
44 | 
45 | 	/*
46 | 	 * Flags
47 | 	 */
48 | 	unsigned stop:1;
49 | };
50 | 
51 | void *ncrx_worker_thread(void *arg);
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/listener.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the BSD-style license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <stdlib.h>
  9 | #include <stdint.h>
 10 | #include <errno.h>
 11 | #include <string.h>
 12 | #include <sys/socket.h>
 13 | 
 14 | #include "include/common.h"
 15 | #include "include/msgbuf-struct.h"
 16 | #include "include/threads.h"
 17 | #include "include/listener.h"
 18 | 
 19 | static void handle_listen_error(int err)
 20 | {
 21 | 	switch(err) {
 22 | 	case EINTR:
 23 | 		/*
 24 | 		 * The fact that we got an error return means that recvmmsg()
 25 | 		 * hadn't actually done anything, so we can just loop back over
 26 | 		 * the call no problem.
 27 | 		 */
 28 | 		return;
 29 | 	case 0:
 30 | 		fatal("Unexpected EOF from recvmmsg()\n");
 31 | 	default:
 32 | 		fatal("Unexpected listen error: %m (-%d)\n", errno);
 33 | 	}
 34 | }
 35 | 
 36 | static struct msg_buf *msgbuf_from_iovec(struct iovec *vecptr)
 37 | {
 38 | 	return container_of(vecptr, struct msg_buf, iovec);
 39 | }
 40 | 
 41 | static unsigned long hash_srcaddr(struct in6_addr *addr)
 42 | {
 43 | 	uint32_t *addrptr = (uint32_t *)addr;
 44 | 
 45 | 	return jhash2(addrptr, sizeof(*addr) / sizeof(*addrptr), LISTEN_SEED);
 46 | }
 47 | 
 48 | static void prequeue_msgbuf(struct ncrx_listener *listener, struct msg_buf *buf)
 49 | {
 50 | 	struct ncrx_prequeue *prequeue;
 51 | 	unsigned long hash;
 52 | 
 53 | 	hash = hash_srcaddr(&buf->src.sin6_addr);
 54 | 	prequeue = &listener->prequeues[hash % listener->nr_workers];
 55 | 
 56 | 	if (prequeue->queue_head)
 57 | 		prequeue->queue_tail->next = buf;
 58 | 	else
 59 | 		prequeue->queue_head = buf;
 60 | 
 61 | 	prequeue->queue_tail = buf;
 62 | 	prequeue->count++;
 63 | }
 64 | 
 65 | static void reinit_mmsghdr_vec(struct mmsghdr *vec, int nr, int rcvbufsz)
 66 | {
 67 | 	struct msg_buf *cur;
 68 | 	int i;
 69 | 
 70 | 	memset(vec, 0, sizeof(*vec) * nr);
 71 | 	for (i = 0; i < nr; i++) {
 72 | 		cur = malloc(sizeof(*cur) + rcvbufsz);
 73 | 		if (!cur)
 74 | 			fatal("-ENOMEM after %d/%d rcvbufs\n", i, nr);
 75 | 
 76 | 		memset(cur, 0, sizeof(*cur));
 77 | 		cur->buf[rcvbufsz - 1] = '\0';
 78 | 
 79 | 		cur->iovec.iov_base = &cur->buf;
 80 | 		cur->iovec.iov_len = rcvbufsz - 1;
 81 | 
 82 | 		vec[i].msg_hdr.msg_iov = &cur->iovec;
 83 | 		vec[i].msg_hdr.msg_iovlen = 1;
 84 | 
 85 | 		vec[i].msg_hdr.msg_name = &cur->src;
 86 | 		vec[i].msg_hdr.msg_namelen = sizeof(cur->src);
 87 | 	}
 88 | }
 89 | 
 90 | static struct mmsghdr *alloc_mmsghdr_vec(int nr, int rcvbufsz)
 91 | {
 92 | 	struct mmsghdr *mmsgvec;
 93 | 
 94 | 	mmsgvec = malloc(sizeof(*mmsgvec) * nr);
 95 | 	if (!mmsgvec)
 96 | 		fatal("Unable to allocate mmsghdr array\n");
 97 | 
 98 | 	reinit_mmsghdr_vec(mmsgvec, nr, rcvbufsz);
 99 | 	return mmsgvec;
100 | }
101 | 
102 | static void free_mmsghdr_vec(struct mmsghdr *vec, int nr)
103 | {
104 | 	struct msg_buf *cur;
105 | 	int i;
106 | 
107 | 	for (i = 0; i < nr; i++) {
108 | 		cur = msgbuf_from_iovec(vec[i].msg_hdr.msg_iov);
109 | 		free(cur);
110 | 	}
111 | 
112 | 	free(vec);
113 | }
114 | 
115 | static int get_listen_socket(struct sockaddr_in6 *bindaddr)
116 | {
117 | 	int fd, ret, optval = 1;
118 | 
119 | 	fd = socket(AF_INET6, SOCK_DGRAM, 0);
120 | 	if (fd == -1)
121 | 		fatal("Couldn't get socket: %m\n");
122 | 
123 | 	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval));
124 | 	if (ret == -1)
125 | 		fatal("Couldn't set SO_REUSEPORT on socket: %m\n");
126 | 
127 | 	ret = bind(fd, (const struct sockaddr *)bindaddr, sizeof(*bindaddr));
128 | 	if (ret == -1)
129 | 		fatal("Couldn't bind: %m\n");
130 | 
131 | 	return fd;
132 | }
133 | 
134 | void *udp_listener_thread(void *arg)
135 | {
136 | 	int fd, nr_recv, i;
137 | 	uint64_t now;
138 | 	struct ncrx_listener *us = arg;
139 | 	struct mmsghdr *vec;
140 | 	struct msg_buf *cur;
141 | 
142 | 	fd = get_listen_socket(us->address);
143 | 	vec = alloc_mmsghdr_vec(us->batch, RCVBUF_SIZE);
144 | 
145 | 	while (!us->stop) {
146 | 		nr_recv = recvmmsg(fd, vec, us->batch, MSG_WAITFORONE, NULL);
147 | 		if (nr_recv <= 0) {
148 | 			handle_listen_error(errno);
149 | 			continue;
150 | 		}
151 | 
152 | 		debug("recvmmsg() got %d packets\n", nr_recv);
153 | 
154 | 		now = now_real_ms();
155 | 		for (i = 0; i < nr_recv; i++) {
156 | 			cur = msgbuf_from_iovec(vec[i].msg_hdr.msg_iov);
157 | 
158 | 			cur->rcv_flags = vec[i].msg_hdr.msg_flags;
159 | 			cur->rcv_bytes = vec[i].msg_len;
160 | 			cur->rcv_time = now;
161 | 
162 | 			prequeue_msgbuf(us, cur);
163 | 			us->processed++;
164 | 		}
165 | 
166 | 		enqueue_and_wake_all(us);
167 | 		reinit_mmsghdr_vec(vec, nr_recv, RCVBUF_SIZE);
168 | 	}
169 | 
170 | 	free_mmsghdr_vec(vec, us->batch);
171 | 
172 | 	return NULL;
173 | }
174 | 


--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the BSD-style license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <stdlib.h>
  9 | #include <signal.h>
 10 | #include <arpa/inet.h>
 11 | #include <getopt.h>
 12 | 
 13 | #include "include/common.h"
 14 | #include "include/output.h"
 15 | #include "include/threads.h"
 16 | 
 17 | static void parse_arguments(int argc, char **argv, struct netconsd_params *p)
 18 | {
 19 | 	int i;
 20 | 	char *tmp;
 21 | 	static const char *optstr = "hw:l:b:a:u:g:";
 22 | 	static const struct option optlong[] = {
 23 | 		{
 24 | 			.name = "help",
 25 | 			.has_arg = no_argument,
 26 | 			.val = 'h',
 27 | 		},
 28 | 		{
 29 | 			.name = NULL,
 30 | 		},
 31 | 	};
 32 | 
 33 | 	while (1) {
 34 | 		i = getopt_long(argc, argv, optstr, optlong, NULL);
 35 | 
 36 | 		switch (i) {
 37 | 		case 'w':
 38 | 			p->nr_workers = atoi(optarg);
 39 | 			break;
 40 | 		case 'l':
 41 | 			p->nr_listeners = atoi(optarg);
 42 | 			break;
 43 | 		case 'b':
 44 | 			p->mmsg_batch = atoi(optarg);
 45 | 			break;
 46 | 		case 'a':
 47 | 			if (!inet_pton(AF_INET6, optarg, &p->listen_addr.sin6_addr)) {
 48 | 				char v4[sizeof("::ffff:XXX.XXX.XXX.XXX")];
 49 | 				snprintf(v4, sizeof(v4), "::ffff:%s", optarg);
 50 | 				if (!inet_pton(AF_INET6, v4, &p->listen_addr.sin6_addr))
 51 | 					fatal("invalid listen address\n");
 52 | 			}
 53 | 
 54 | 			debug("listening for address %s\n", optarg);
 55 | 			break;
 56 | 		case 'u':
 57 | 			p->listen_addr.sin6_port = htons(atoi(optarg));
 58 | 			break;
 59 | 		case 'g':
 60 | 			tmp = index(optarg, '/');
 61 | 			if (!tmp)
 62 | 				fatal("'-g' expects 'INTERVAL/AGE' in ms\n");
 63 | 
 64 | 			p->gc_int_ms = atoi(optarg);
 65 | 			p->gc_age_ms = atoi(tmp + 1);
 66 | 
 67 | 			if (p->gc_age_ms < p->gc_int_ms)
 68 | 				fatal("GC age must be >= GC interval\n");
 69 | 
 70 | 			break;
 71 | 		case -1:
 72 | 			goto done;
 73 | 		case 'h':
 74 | 			printf("Usage: %s [-w workers] [-l listeners] "
 75 | 			     "[-b mmsg_batch] [-a udp_listen_addr] [-u udp_listen_port] "
 76 | 			     "[-g '${interval}/${age}'] [output module path] "
 77 | 			     "[another output module path...]\n", argv[0]);
 78 | 			exit(0);
 79 | 		default:
 80 | 			exit(1);
 81 | 		}
 82 | 	}
 83 | 
 84 | done:
 85 | 
 86 | 	/*
 87 | 	 * Register output modules
 88 | 	 */
 89 | 	if (optind == argc)
 90 | 		warn("You passed no output modules, which is sort of silly\n");
 91 | 
 92 | 	if (argc - optind > MAXOUTS)
 93 | 		fatal("Too many output mods: %d>%d\n", argc - optind, MAXOUTS);
 94 | 
 95 | 	for (i = optind; i < argc; i++)
 96 | 		if (register_output_module(argv[i], p->nr_workers))
 97 | 			fatal("Can't register '%s'\n", argv[i]);
 98 | }
 99 | 
100 | /*
101 |  * This exists to kick the blocking recvmmsg() call in the listener threads, so
102 |  * they get -EINTR, notice the stop flag, and terminate.
103 |  *
104 |  * See also: stop_and_wait_for_listeners() in threads.c
105 |  */
106 | static void interrupter_handler(int sig)
107 | {
108 | 	return;
109 | }
110 | 
111 | /*
112 |  * Initialize the set of signals for which we try to terminate gracefully.
113 |  */
114 | static void init_sigset(sigset_t *set)
115 | {
116 | 	sigemptyset(set);
117 | 	sigaddset(set, SIGTERM);
118 | 	sigaddset(set, SIGINT);
119 | 	sigaddset(set, SIGHUP);
120 | }
121 | 
122 | static void init_sighandlers(void)
123 | {
124 | 	struct sigaction ignorer = {
125 | 		.sa_handler = SIG_IGN,
126 | 	};
127 | 	struct sigaction interrupter = {
128 | 		.sa_handler = interrupter_handler,
129 | 		.sa_flags = SA_NODEFER,
130 | 	};
131 | 
132 | 	sigaction(SIGUSR1, &interrupter, NULL);
133 | 	sigaction(SIGPIPE, &ignorer, NULL);
134 | }
135 | 
136 | int main(int argc, char **argv)
137 | {
138 | 	int num;
139 | 	sigset_t set;
140 | 	struct tctl *ctl;
141 | 	struct netconsd_params params = {
142 | 		.nr_workers = 2,
143 | 		.nr_listeners = 1,
144 | 		.mmsg_batch = 512,
145 | 		.gc_int_ms = 0,
146 | 		.gc_age_ms = 0,
147 | 		.listen_addr = {
148 | 			.sin6_family = AF_INET6,
149 | 			.sin6_addr = IN6ADDR_ANY_INIT,
150 | 			.sin6_port = htons(1514),
151 | 		}
152 | 	};
153 | 
154 | 	parse_arguments(argc, argv, &params);
155 | 
156 | 	init_sighandlers();
157 | 	init_sigset(&set);
158 | 	sigprocmask(SIG_BLOCK, &set, NULL);
159 | 
160 | 	ctl = create_threads(&params);
161 | 	sigwait(&set, &num);
162 | 
163 | 	log("Signal: '%s', terminating\n", strsignal(num));
164 | 	destroy_threads(ctl);
165 | 	destroy_output_modules();
166 | 
167 | 	return 0;
168 | }
169 | 


--------------------------------------------------------------------------------
/modules/Makefile:
--------------------------------------------------------------------------------
 1 | CC ?= gcc
 2 | CXX ?= g++
 3 | CPPFLAGS ?=
 4 | LDFLAGS ?=
 5 | 
 6 | override CFLAGS += -fPIC
 7 | override CXXFLAGS += -std=c++11 -fPIC
 8 | override LDFLAGS += -shared
 9 | INCLUDES = -I../ncrx -I../include
10 | 
11 | mods = printer.so logger.so
12 | 
13 | all: $(mods)
14 | 
15 | %.so: %.c
16 | 	$(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $(<:.c=.o)
17 | 	$(CC) $(<:.c=.o) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@
18 | 
19 | %.so: %.cc
20 | 	$(CXX) $< $(CPPFLAGS) $(CXXFLAGS) $(INCLUDES) -c -o $(<:.cc=.o)
21 | 	$(CXX) $(<:.cc=.o) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@
22 | 
23 | clean:
24 | 	rm -f *.o *.so
25 | 


--------------------------------------------------------------------------------
/modules/logger.cc:
--------------------------------------------------------------------------------
  1 | /* logger.cc: Very simple example C++ netconsd module
  2 |  *
  3 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  4 |  *
  5 |  * This source code is licensed under the BSD-style license found in the
  6 |  * LICENSE file in the root directory of this source tree.
  7 |  */
  8 | 
  9 | #include <cerrno>
 10 | #include <cstdio>
 11 | #include <cstdlib>
 12 | #include <cstring>
 13 | #include <functional>
 14 | #include <unordered_map>
 15 | #include <inttypes.h>
 16 | 
 17 | #include <fcntl.h>
 18 | #include <netdb.h>
 19 | #include <sys/types.h>
 20 | #include <sys/stat.h>
 21 | #include <sys/socket.h>
 22 | #include <arpa/inet.h>
 23 | #include <netinet/in.h>
 24 | 
 25 | #include <msgbuf-struct.h>
 26 | #include <ncrx-struct.h>
 27 | 
 28 | #include <jhash.h>
 29 | 
 30 | /*
 31 |  * The below allows us to index an unordered_map by an IP address.
 32 |  */
 33 | 
 34 | static bool operator==(const struct in6_addr& lhs, const struct in6_addr& rhs)
 35 | {
 36 | 	return std::memcmp(&lhs, &rhs, 16) == 0;
 37 | }
 38 | 
 39 | namespace std {
 40 | 
 41 | template<> struct hash<struct in6_addr>
 42 | {
 43 | 	std::size_t operator()(struct in6_addr const& s) const
 44 | 	{
 45 | 		return jhash2((uint32_t*)&s, sizeof(s) / sizeof(uint32_t),
 46 | 				0xbeefdead);
 47 | 	}
 48 | };
 49 | 
 50 | } /* namespace std */
 51 | 
 52 | /*
 53 |  * Basic struct to hold the hostname and the FD for its logfile.
 54 |  */
 55 | struct logtarget {
 56 | 	char hostname[INET6_ADDRSTRLEN + 1];
 57 | 	int fd;
 58 | 
 59 | 	/*
 60 | 	 * Resolve the hostname, and open() an appropriately named file to
 61 | 	 * write the logs into.
 62 | 	 */
 63 | 	logtarget(struct in6_addr *src)
 64 | 	{
 65 | 		int ret;
 66 | 		struct sockaddr_in6 sa = {
 67 | 			.sin6_family = AF_INET6,
 68 | 			.sin6_port = 0,
 69 | 		};
 70 | 
 71 | 		memcpy(&sa.sin6_addr, src, sizeof(*src));
 72 | 		ret = getnameinfo((const struct sockaddr *)&sa, sizeof(sa),
 73 | 				hostname, sizeof(hostname) - 1, NULL, 0, NI_NAMEREQD);
 74 | 		if (ret) {
 75 | 			const char *ptr;
 76 | 			fprintf(stderr, "getnameinfo failed: %s\n", gai_strerror(ret));
 77 | 			ptr = inet_ntop(AF_INET6, src, hostname, INET6_ADDRSTRLEN);
 78 | 			if (ptr == NULL) {
 79 | 				fprintf(stderr, "inet_ntop failed: %s\n", strerror(errno));
 80 | 				snprintf(hostname, 8, "unknown");
 81 | 			}
 82 | 		}
 83 | 
 84 | 		ret = open(hostname, O_TRUNC|O_WRONLY|O_CREAT, 0644);
 85 | 		if (ret == -1) {
 86 | 			fprintf(stderr, "FATAL: open() failed: %m\n");
 87 | 			abort();
 88 | 		}
 89 | 
 90 | 		fd = ret;
 91 | 	}
 92 | 
 93 | 	/*
 94 | 	 * Close the file
 95 | 	 */
 96 | 	~logtarget(void)
 97 | 	{
 98 | 		close(fd);
 99 | 	}
100 | };
101 | 
102 | /*
103 |  * This relates the IP address of the remote host to its logtarget struct.
104 |  */
105 | static std::unordered_map<struct in6_addr, struct logtarget> *maps;
106 | 
107 | /*
108 |  * Return the existing logtarget struct if we've seen this host before; else,
109 |  * initialize a new logtarget, insert it, and return that.
110 |  */
111 | static struct logtarget& get_target(int thread_nr, struct in6_addr *src)
112 | {
113 | 	auto itr = maps[thread_nr].find(*src);
114 | 	if (itr == maps[thread_nr].end())
115 | 		return maps[thread_nr].emplace(*src, src).first->second;
116 | 
117 | 	return itr->second;
118 | }
119 | 
120 | /*
121 |  * Actually write the line to the file
122 |  */
123 | static void write_log(struct logtarget& tgt, struct msg_buf *buf,
124 | 		struct ncrx_msg *msg)
125 | {
126 | 	/* legacy non-extended netcons message */
127 | 	if (!msg) {
128 | 		dprintf(tgt.fd, "%s\n", buf->buf);
129 | 		return;
130 | 	}
131 | 
132 | 	/* extended netcons msg with metadata */
133 | 	if (std::strlen(msg->version) > 1)
134 | 		 dprintf(tgt.fd, "%s ", msg->version);
135 | 	dprintf(tgt.fd, "%06" PRIu64 " ", msg->seq);
136 | 	dprintf(tgt.fd, "%014" PRIu64 " ", msg->ts_usec);
137 | 	dprintf(tgt.fd, "%d ", msg->facility);
138 | 	dprintf(tgt.fd, "%d ", msg->level);
139 | 	if (msg->cont_start)
140 | 		 dprintf(tgt.fd, "[CONT START] ");
141 | 	if (msg->cont)
142 | 		 dprintf(tgt.fd, "[CONT] ");
143 | 	if (msg->oos)
144 | 		 dprintf(tgt.fd, "[OOS] ");
145 | 	if (msg->seq_reset)
146 | 		 dprintf(tgt.fd, "[SEQ RESET] ");
147 | 	dprintf(tgt.fd, "%s\n", msg->text);
148 | }
149 | 
150 | extern "C" int netconsd_output_init(int nr)
151 | {
152 | 	maps = new std::unordered_map<struct in6_addr, struct logtarget>[nr];
153 | 	return 0;
154 | }
155 | 
156 | extern "C" void netconsd_output_exit(void)
157 | {
158 | 	delete[] maps;
159 | }
160 | 
161 | /*
162 |  * This is the actual function called by netconsd.
163 |  */
164 | extern "C" void netconsd_output_handler(int t, struct in6_addr *src,
165 | 		struct msg_buf *buf, struct ncrx_msg *msg)
166 | {
167 | 	struct logtarget& cur = get_target(t, src);
168 | 	write_log(cur, buf, msg);
169 | }
170 | 


--------------------------------------------------------------------------------
/modules/printer.c:
--------------------------------------------------------------------------------
 1 | /* printer.c: Very simple example C netconsd module
 2 |  *
 3 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 4 |  *
 5 |  * This source code is licensed under the BSD-style license found in the
 6 |  * LICENSE file in the root directory of this source tree.
 7 |  */
 8 | 
 9 | #include <stdlib.h>
10 | #include <stdio.h>
11 | #include <arpa/inet.h>
12 | #include <inttypes.h>
13 | 
14 | #include <msgbuf-struct.h>
15 | #include <ncrx-struct.h>
16 | 
17 | int netconsd_output_init(int nr_workers)
18 | {
19 | 	printf("From init hook: %d worker threads", nr_workers);
20 | 	return 0;
21 | }
22 | 
23 | void netconsd_output_exit(void)
24 | {
25 | 	puts("From exit hook");
26 | }
27 | 
28 | /*
29 |  * This is the actual function called by netconsd.
30 |  */
31 | void netconsd_output_handler(int t, struct in6_addr *src, struct msg_buf *buf,
32 | 		struct ncrx_msg *msg)
33 | {
34 | 	char addr[INET6_ADDRSTRLEN] = {0};
35 | 
36 | 	inet_ntop(AF_INET6, src, addr, INET6_ADDRSTRLEN);
37 | 	if (!msg)
38 | 		printf("%40s: %s\n", addr, buf->buf);
39 | 	else
40 | 		printf("%40s: %s S%06" PRIu64 " T%014" PRIu64 " F%d/L%d %s%s%s%s%s\n", addr,
41 | 			msg->version, msg->seq, msg->ts_usec, msg->facility, msg->level,
42 | 			msg->cont_start ? "[CONT START] " : "",
43 | 			msg->cont ? "[CONT] " : "",
44 | 			msg->oos ? "[OOS] ": "",
45 | 			msg->seq_reset ? "[SEQ RESET] " : "",
46 | 			msg->text);
47 | }
48 | 


--------------------------------------------------------------------------------
/ncrx/Makefile:
--------------------------------------------------------------------------------
 1 | CC ?= gcc
 2 | 
 3 | CFLAGS ?= -O2 -fPIC
 4 | CFLAGS += -D_GNU_SOURCE -fno-strict-aliasing -Wall -Wextra \
 5 |           -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations \
 6 |           -Wdeclaration-after-statement -Wno-missing-field-initializers \
 7 |           -Wno-unused-function -Wno-unused-parameter
 8 | CPPFLAGS ?=
 9 | 
10 | obj = libncrx.o
11 | 
12 | all: $(obj)
13 | 
14 | %.o: %.c
15 | 	$(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $@
16 | 
17 | clean:
18 | 	rm -f *.o
19 | 


--------------------------------------------------------------------------------
/ncrx/libncrx.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ncrx - extended netconsole receiver library
  3 |  *
  4 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  5 |  *
  6 |  * This source code is licensed under the BSD-style license found in the
  7 |  * LICENSE file in the root directory of this source tree.
  8 |  */
  9 | 
 10 | #include <stdbool.h>
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <stdint.h>
 14 | #include <string.h>
 15 | #include <errno.h>
 16 | #include <assert.h>
 17 | 
 18 | #include "ncrx.h"
 19 | 
 20 | /* oos history is tracked with a uint32_t */
 21 | #define NCRX_OOS_MAX		32
 22 | 
 23 | struct ncrx_msg_list {
 24 | 	struct ncrx_list	head;
 25 | 	int			nr;		/* number of msgs on the list */
 26 | };
 27 | 
 28 | struct ncrx_slot {
 29 | 	struct ncrx_msg		*msg;
 30 | 	uint64_t		timestamp;	/* last rx on this slot */
 31 | 	uint64_t		retx_timestamp;	/* last retransmission */
 32 | 	struct ncrx_list	hole_node;	/* anchored @ ncrx->hole_list */
 33 | };
 34 | 
 35 | struct ncrx {
 36 | 	struct ncrx_param	p;
 37 | 
 38 | 	uint64_t		now_mono;	/* latest time in msecs */
 39 | 
 40 | 	int			head;		/* next slot to use */
 41 | 	int			tail;		/* last slot in use */
 42 | 	uint64_t		head_seq;	/* next expected seq, unset=0 */
 43 | 	struct ncrx_slot	*slots;		/* msg slots */
 44 | 	struct ncrx_list	hole_list;	/* missing or !complete slots */
 45 | 
 46 | 	uint32_t		oos_history;	/* bit history of oos msgs */
 47 | 	struct ncrx_msg_list	oos_list;	/* buffered oos msgs */
 48 | 
 49 | 	struct ncrx_msg_list	retired_list;	/* msgs to be fetched by user */
 50 | 
 51 | 	uint64_t		acked_seq;	/* last seq acked, unset=max */
 52 | 	uint64_t		acked_at;	/* and when */
 53 | 
 54 | 	/* response buffer for ncrx_response() */
 55 | 	char			resp_buf[NCRX_PKT_MAX + 1];
 56 | 	int			resp_len;
 57 | };
 58 | 
 59 | static const struct ncrx_param ncrx_dfl_param = {
 60 | 	.nr_slots		= NCRX_DFL_NR_SLOTS,
 61 | 
 62 | 	.ack_intv		= NCRX_DFL_ACK_INTV,
 63 | 	.retx_intv		= NCRX_DFL_RETX_INTV,
 64 | 	.retx_stride		= NCRX_DFL_RETX_STRIDE,
 65 | 	.msg_timeout		= NCRX_DFL_MSG_TIMEOUT,
 66 | 
 67 | 	.oos_thr		= NCRX_DFL_OOS_THR,
 68 | 	.oos_intv		= NCRX_DFL_OOS_INTV,
 69 | 	.oos_timeout		= NCRX_DFL_OOS_TIMEOUT,
 70 | };
 71 | 
 72 | /* utilities mostly stolen from kernel */
 73 | #define min(x, y) ({							\
 74 | 	typeof(x) _min1 = (x);						\
 75 | 	typeof(y) _min2 = (y);						\
 76 | 	(void) (&_min1 == &_min2);					\
 77 | 	_min1 < _min2 ? _min1 : _min2; })
 78 | 
 79 | #define max(x, y) ({							\
 80 | 	typeof(x) _max1 = (x);						\
 81 | 	typeof(y) _max2 = (y);						\
 82 | 	(void) (&_max1 == &_max2);					\
 83 | 	_max1 > _max2 ? _max1 : _max2; })
 84 | 
 85 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 86 | 
 87 | #define container_of(ptr, type, member) ({				\
 88 | 	const typeof( ((type *)0)->member ) *__mptr = (ptr);		\
 89 | 	(type *)( (char *)__mptr - offsetof(type,member) );})
 90 | 
 91 | /* ncrx_msg from its ->node */
 92 | #define node_to_msg(ptr)	container_of(ptr, struct ncrx_msg, node)
 93 | 
 94 | /* iterate msg_list */
 95 | #define msg_list_for_each(pos, n, list)					\
 96 | 	for (pos = node_to_msg((list)->head.next),			\
 97 | 		     n = node_to_msg(pos->node.next);			\
 98 | 	     &pos->node != &(list)->head;				\
 99 | 	     pos = n, n = node_to_msg(pos->node.next))
100 | 
101 | /* ncrx_slot from its ->hole_node */
102 | #define hole_to_slot(ptr)						\
103 | 	container_of(ptr, struct ncrx_slot, hole_node)
104 | 
105 | /* iterate hole_list */
106 | #define hole_list_for_each(pos, n, list)				\
107 | 	for (pos = hole_to_slot((list)->next),				\
108 | 		     n = hole_to_slot(pos->hole_node.next);		\
109 | 	     &pos->hole_node != (list);					\
110 | 	     pos = n, n = hole_to_slot(pos->hole_node.next))
111 | 
112 | static unsigned int hweight32(uint32_t w)
113 | {
114 | 	w -= (w >> 1) & 0x55555555;
115 | 	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
116 | 	w =  (w + (w >> 4)) & 0x0f0f0f0f;
117 | 	return (w * 0x01010101) >> 24;
118 | }
119 | 
120 | static void init_list(struct ncrx_list *head)
121 | {
122 | 	head->next = head;
123 | 	head->prev = head;
124 | }
125 | 
126 | static int list_empty(struct ncrx_list *head)
127 | {
128 | 	return head->next == head;
129 | }
130 | 
131 | static void list_del(struct ncrx_list *head)
132 | {
133 | 	struct ncrx_list *prev = head->prev;
134 | 	struct ncrx_list *next = head->next;
135 | 
136 | 	prev->next = next;
137 | 	next->prev = prev;
138 | 	init_list(head);
139 | }
140 | 
141 | static void list_append(struct ncrx_list *node, struct ncrx_list *list)
142 | {
143 | 	struct ncrx_list *prev = list->prev;
144 | 
145 | 	assert(node->next == node && node->prev == node);
146 | 
147 | 	node->next = list;
148 | 	node->prev = prev;
149 | 	prev->next = node;
150 | 	list->prev = node;
151 | }
152 | 
153 | static void msg_list_del(struct ncrx_msg *msg, struct ncrx_msg_list *list)
154 | {
155 | 	list_del(&msg->node);
156 | 	list->nr--;
157 | 
158 | 	if (!list->nr)
159 | 		assert(list->head.next == &list->head &&
160 | 		       list->head.prev == &list->head);
161 | }
162 | 
163 | static void msg_list_append(struct ncrx_msg *msg, struct ncrx_msg_list *list)
164 | {
165 | 	list_append(&msg->node, &list->head);
166 | 	list->nr++;
167 | }
168 | 
169 | static struct ncrx_msg *msg_list_peek(struct ncrx_msg_list *list)
170 | {
171 | 	if (list_empty(&list->head))
172 | 		return NULL;
173 | 	return node_to_msg(list->head.next);
174 | }
175 | 
176 | static struct ncrx_msg *msg_list_pop(struct ncrx_msg_list *list)
177 | {
178 | 	struct ncrx_msg *msg;
179 | 
180 | 	msg = msg_list_peek(list);
181 | 	if (msg)
182 | 		msg_list_del(msg, list);
183 | 	return msg;
184 | }
185 | 
186 | /*
187 |  * Check if we have a kernel version in the very first field
188 |  */
189 | static int release_prepended(char *ptr)
190 | {
191 | 	char *dot_pos, *comma_pos;
192 | 
193 | 	if (!ptr)
194 | 		return 0;
195 | 
196 | 	dot_pos = memchr(ptr, '.', NCRX_KVERSION_MAX_LEN);
197 | 	comma_pos = memchr(ptr, ',', NCRX_KVERSION_MAX_LEN);
198 | 
199 | 	if (!dot_pos || !comma_pos)
200 | 		return 0;
201 | 
202 | 	if (dot_pos < comma_pos)
203 | 		return 1;
204 | 
205 | 	return 0;
206 | }
207 | /*
208 |  * Parse @payload into @msg.  The data is not copied into @msg's buffer.
209 |  * @msg->text and ->dict are updated to point into @payload instead.
210 |  */
211 | static int parse_packet(const char *payload, struct ncrx_msg *msg)
212 | {
213 | 	char buf[1024];
214 | 	char *p, *tok;
215 | 	int idx;
216 | 	bool is_frag_seen = false, is_emg_seen = false;
217 | 
218 | 	memset(msg, 0, sizeof(*msg));
219 | 
220 | 	p = strchr(payload, ';');
221 | 	if (!p || p - payload >= (signed)sizeof(buf))
222 | 		goto einval;
223 | 	memcpy(buf, payload, p - payload);
224 | 	buf[p - payload] = '\0';
225 | 
226 | 	msg->text = p + 1;
227 | 	msg->text_len = strlen(msg->text);
228 | 	if (msg->text_len > NCRX_LINE_MAX)
229 | 		msg->text_len = NCRX_LINE_MAX;
230 | 
231 | 	/* [release,]<level>,<sequnum>,<timestamp>,<contflag>[,KEY=VAL]* */
232 | 	p = buf;
233 | 	if (release_prepended(p)) {
234 | 		idx = 0;
235 | 	} else {
236 | 		idx = 1;
237 | 	}
238 | 	while ((tok = strsep(&p, ","))) {
239 | 		char *endp, *key, *val;
240 | 		unsigned long long v;
241 | 
242 | 		switch (idx++) {
243 | 		case 0:
244 | 			if (!tok)
245 | 				goto einval;
246 | 			strncpy(msg->version, tok, NCRX_KVERSION_MAX_LEN - 1);
247 | 			continue;
248 | 		case 1:
249 | 			v = strtoul(tok, &endp, 0);
250 | 			if (*endp != '\0' || v > UINT8_MAX)
251 | 				goto einval;
252 | 			msg->facility = v >> 3;
253 | 			msg->level = v & ((1 << 3) - 1);
254 | 			continue;
255 | 		case 2:
256 | 			v = strtoull(tok, &endp, 0);
257 | 			if (*endp != '\0')
258 | 				goto einval;
259 | 			msg->seq = v;
260 | 			continue;
261 | 		case 3:
262 | 			v = strtoull(tok, &endp, 0);
263 | 			if (*endp != '\0')
264 | 				goto einval;
265 | 			msg->ts_usec = v;
266 | 			continue;
267 | 		case 4:
268 | 			if (tok[0] == 'c')
269 | 				msg->cont_start = 1;
270 | 			else if (tok[0] == '+')
271 | 				msg->cont = 1;
272 | 			continue;
273 | 		}
274 | 
275 | 		val = tok;
276 | 		key = strsep(&val, "=");
277 | 		if (!val)
278 | 			continue;
279 | 		if (!strcmp(key, "ncfrag")) {
280 | 			unsigned nf_off, nf_len;
281 | 
282 | 			if (is_frag_seen)
283 | 				goto einval;
284 | 			if (sscanf(val, "%u/%u", &nf_off, &nf_len) != 2)
285 | 				goto einval;
286 | 			if (!msg->text_len ||
287 | 			    nf_len >= NCRX_LINE_MAX ||
288 | 			    nf_off >= nf_len ||
289 | 			    nf_off + msg->text_len > nf_len)
290 | 				goto einval;
291 | 
292 | 			msg->ncfrag_off = nf_off;
293 | 			msg->ncfrag_len = msg->text_len;
294 | 			msg->ncfrag_left = nf_len - msg->ncfrag_len;
295 | 			msg->text_len = nf_len;
296 | 			is_frag_seen = true;
297 | 		} else if (!strcmp(key, "ncemg")) {
298 | 			if (is_emg_seen)
299 | 				goto einval;
300 | 
301 | 			v = strtoul(val, &endp, 0);
302 | 			if (*endp != '\0')
303 | 				goto einval;
304 | 			msg->emg = v;
305 | 			is_emg_seen = true;
306 | 		}
307 | 	}
308 | 	return 0;
309 | einval:
310 | 	errno = EINVAL;
311 | 	return -1;
312 | }
313 | 
314 | /* how far @idx is behind @ncrx->head */
315 | static int slot_dist(int idx, struct ncrx *ncrx)
316 | {
317 | 	int dist = ncrx->head - idx;
318 | 	return dist >= 0 ? dist : dist + ncrx->p.nr_slots;
319 | }
320 | 
321 | /* number of occupied slots */
322 | static int nr_queued(struct ncrx *ncrx)
323 | {
324 | 	return slot_dist(ncrx->tail, ncrx);
325 | }
326 | 
327 | /* seq of the last queued message */
328 | static uint64_t tail_seq(struct ncrx *ncrx)
329 | {
330 | 	return ncrx->head_seq - nr_queued(ncrx);
331 | }
332 | 
333 | /* slot index of a message with sequence number @ncrx->head_seq + @delta */
334 | static int seq_delta_idx(struct ncrx *ncrx, int delta)
335 | {
336 | 	int idx = ncrx->head + delta;
337 | 
338 | 	if (idx < 0)
339 | 		return idx + ncrx->p.nr_slots;
340 | 	else if (idx >= ncrx->p.nr_slots)
341 | 		return idx - ncrx->p.nr_slots;
342 | 	else
343 | 		return idx;
344 | }
345 | 
346 | /* is @slot completely empty? */
347 | static int slot_is_free(struct ncrx_slot *slot)
348 | {
349 | 	return !slot->msg && list_empty(&slot->hole_node);
350 | }
351 | 
352 | /* @slot may have just been completed, if so, remove it from hole_list */
353 | static void slot_maybe_complete(struct ncrx_slot *slot)
354 | {
355 | 	struct ncrx_msg *msg = slot->msg;
356 | 
357 | 	if (!msg || msg->ncfrag_left || list_empty(&slot->hole_node))
358 | 		return;
359 | 
360 | 	list_del(&slot->hole_node);
361 | }
362 | 
363 | /* retire the last queued slot whether complete or not */
364 | static void retire_tail(struct ncrx *ncrx)
365 | {
366 | 	int ntail = (ncrx->tail + 1) % ncrx->p.nr_slots;
367 | 	struct ncrx_slot *slot = &ncrx->slots[ncrx->tail];
368 | 	struct ncrx_slot *nslot = &ncrx->slots[ntail];
369 | 
370 | 	if (slot->msg) {
371 | 		msg_list_append(slot->msg, &ncrx->retired_list);
372 | 		slot->msg = NULL;
373 | 	}
374 | 
375 | 	list_del(&slot->hole_node);	/* free slot is never a hole */
376 | 	ncrx->tail = ntail;
377 | 	/*
378 | 	 * Activities of past msgs are considered activities for newer ones
379 | 	 * too.  This prevents oos interval verdicts from flipping as
380 | 	 * sequence progresses.
381 | 	 */
382 | 	nslot->timestamp = max(slot->timestamp, nslot->timestamp);
383 | }
384 | 
385 | /* make room for message with seq ncrx->head_seq + @delta */
386 | static void make_room(struct ncrx *ncrx, int delta)
387 | {
388 | 	int i;
389 | 
390 | 	/* head_seq is for the next msg, need to advance for 0 @delta too */
391 | 	for (i = 0; i <= delta; i++) {
392 | 		struct ncrx_slot *slot;
393 | 		int max_busy = ncrx->p.nr_slots - ncrx->p.retx_stride;
394 | 
395 | 		/* a new slot is considered hole until it gets completed */
396 | 		slot = &ncrx->slots[ncrx->head];
397 | 		assert(slot_is_free(slot));
398 | 		list_append(&slot->hole_node, &ncrx->hole_list);
399 | 		slot->timestamp = ncrx->now_mono;
400 | 		slot->retx_timestamp = 0;
401 | 
402 | 		/*
403 | 		 * Wind the ring buffer and push out if overflowed.  Always
404 | 		 * keep at least one stride empty so that retransmissions
405 | 		 * of expired slots don't count as oos.
406 | 		 */
407 | 		ncrx->head_seq++;
408 | 		ncrx->head = (ncrx->head + 1) % ncrx->p.nr_slots;
409 | 		if (slot_dist(ncrx->tail, ncrx) > max_busy)
410 | 			retire_tail(ncrx);
411 | 	}
412 | }
413 | 
414 | /*
415 |  * Get slot for @tmsg.  On success, returns pointer to the slot which may
416 |  * be free or occupied with partial or complete message.  Returns NULL with
417 |  * errno set to ERANGE if oos, NULL / ENOENT if already retired.
418 |  */
419 | static struct ncrx_slot *get_seq_slot(struct ncrx_msg *tmsg, struct ncrx *ncrx)
420 | {
421 | 	struct ncrx_slot *slot;
422 | 	int64_t delta;
423 | 	int idx;
424 | 
425 | 	/* new seq stream */
426 | 	if (!ncrx->head_seq) {
427 | 		ncrx->head_seq = tmsg->seq;
428 | 		ncrx->acked_seq = UINT64_MAX;
429 | 		tmsg->seq_reset = 1;
430 | 	}
431 | 
432 | 	delta = tmsg->seq - ncrx->head_seq;
433 | 
434 | 	/*
435 | 	 * Consider oos if outside reorder window or if the slot is
436 | 	 * complete and the last activity on it was more than oos_intv ago.
437 | 	 * Emergency messages are never considered oos as they don't follow
438 | 	 * the usual transmission pattern and may repeat indefinitely.
439 | 	 */
440 | 	if (-delta > ncrx->p.nr_slots || delta > ncrx->p.nr_slots) {
441 | 		errno = ERANGE;
442 | 		return NULL;
443 | 	}
444 | 
445 | 	idx = seq_delta_idx(ncrx, delta);
446 | 	slot = &ncrx->slots[idx];
447 | 
448 | 	if (-delta > nr_queued(ncrx)) {
449 | 		int is_free = slot_is_free(slot);
450 | 
451 | 		if (!tmsg->emg &&
452 | 		    (!is_free ||
453 | 		     slot->timestamp + ncrx->p.oos_intv < ncrx->now_mono)) {
454 | 			errno = ERANGE;
455 | 			return NULL;
456 | 		}
457 | 
458 | 		if (is_free)
459 | 			slot->timestamp = ncrx->now_mono;
460 | 		errno = ENOENT;
461 | 		return NULL;
462 | 	}
463 | 
464 | 	make_room(ncrx, delta);
465 | 	slot->timestamp = ncrx->now_mono;
466 | 
467 | 	return slot;
468 | }
469 | 
470 | /* make @src's copy, if @src is a fragment, allocate full size as it may grow */
471 | static struct ncrx_msg *copy_msg(struct ncrx_msg *src)
472 | {
473 | 	struct ncrx_msg *dst;
474 | 
475 | 	assert(!src->dict && !src->dict_len);
476 | 
477 | 	dst = malloc(sizeof(*dst) + src->text_len + 1);
478 | 	if (!dst)
479 | 		return NULL;
480 | 
481 | 	*dst = *src;
482 | 	init_list(&dst->node);
483 | 
484 | 	dst->text = dst->buf;
485 | 	if (src->ncfrag_len) {
486 | 		memset(dst->text, 0, src->text_len + 1);
487 | 		memcpy(dst->text + src->ncfrag_off, src->text, src->ncfrag_len);
488 | 		dst->ncfrag_off = 0;
489 | 		dst->ncfrag_len = 0;
490 | 	} else {
491 | 		memcpy(dst->text, src->text, src->text_len);
492 | 		dst->text[dst->text_len] = '\0';
493 | 	}
494 | 	return dst;
495 | }
496 | 
497 | /*
498 |  * @tmsg is a newly parsed msg which is out-of-sequence.  Queue it on
499 |  * @ncrx->oos_list until the message times out, gets pushed out by other
500 |  * oos messages or the sequence stream gets reset.
501 |  */
502 | static int queue_oos_msg(struct ncrx_msg *tmsg, struct ncrx *ncrx)
503 | {
504 | 	struct ncrx_slot *slot;
505 | 	struct ncrx_msg *msg, *nmsg, *first;
506 | 
507 | 	msg = copy_msg(tmsg);
508 | 	if (!msg)
509 | 		return -1;
510 | 
511 | 	msg_list_append(msg, &ncrx->oos_list);
512 | 
513 | 	/*
514 | 	 * Shifted left automatically on each new msg.  Set oos and see if
515 | 	 * there have been too many oos among the last 32 messages.
516 | 	 */
517 | 	ncrx->oos_history |= 1;
518 | 	if ((signed)hweight32(ncrx->oos_history) < ncrx->p.oos_thr) {
519 | 		/* nope, handle oos overflow and handle */
520 | 		if (ncrx->oos_list.nr > NCRX_OOS_MAX) {
521 | 			msg = msg_list_pop(&ncrx->oos_list);
522 | 			if (msg) {
523 | 				msg->oos = 1;
524 | 				msg_list_append(msg, &ncrx->retired_list);
525 | 			}
526 | 		}
527 | 		return 0;
528 | 	}
529 | 
530 | 	/*
531 | 	 * The current sequence stream seems no good.  Let's reset by
532 | 	 * retiring all pending, picking the oos msg with the lowest seq,
533 | 	 * queueing it to reset the seq and then queueing all other oos
534 | 	 * msgs.  If a msg is still oos after reset, just retire it.
535 | 	 */
536 | 	while (ncrx->tail != ncrx->head)
537 | 		retire_tail(ncrx);
538 | 
539 | 	ncrx->head_seq = 0;
540 | 	ncrx->acked_seq = UINT64_MAX;
541 | 
542 | 	first = node_to_msg(ncrx->oos_list.head.next);
543 | 	msg_list_for_each(msg, nmsg, &ncrx->oos_list)
544 | 		first = msg->seq < first->seq ? msg : first;
545 | 
546 | 	msg_list_del(first, &ncrx->oos_list);
547 | 	slot = get_seq_slot(first, ncrx);
548 | 	slot->msg = first;
549 | 	slot_maybe_complete(slot);
550 | 
551 | 	while ((msg = msg_list_pop(&ncrx->oos_list))) {
552 | 		slot = get_seq_slot(msg, ncrx);
553 | 		if (slot) {
554 | 			slot->msg = msg;
555 | 			slot_maybe_complete(slot);
556 | 		} else {
557 | 			msg->oos = 1;
558 | 			msg_list_append(msg, &ncrx->retired_list);
559 | 		}
560 | 	}
561 | 
562 | 	return 0;
563 | }
564 | 
565 | /* @payload has just been received, parse and queue it */
566 | static int ncrx_queue_payload(const char *payload, struct ncrx *ncrx,
567 | 		uint64_t now_real)
568 | {
569 | 	struct ncrx_msg tmsg = {};
570 | 	struct ncrx_slot *slot;
571 | 	int new_msg = 0;
572 | 
573 | 	if (parse_packet(payload, &tmsg))
574 | 		return -1;
575 | 
576 | 	tmsg.rx_at_mono = ncrx->now_mono;
577 | 	tmsg.rx_at_real = now_real;
578 | 	ncrx->oos_history <<= 1;
579 | 
580 | 	/* ack immediately if logging source is doing emergency transmissions */
581 | 	if (tmsg.emg) {
582 | 		ncrx->acked_seq = UINT64_MAX;
583 | 		ncrx->acked_at = 0;
584 | 	}
585 | 
586 | 	/* get the matching slot and allocate a new message if empty */
587 | 	slot = get_seq_slot(&tmsg, ncrx);
588 | 	if (slot && !slot->msg) {
589 | 		slot->msg = copy_msg(&tmsg);
590 | 		new_msg = 1;
591 | 	}
592 | 	if (!slot || !slot->msg) {
593 | 		if (errno == ENOENT)
594 | 			return 0;
595 | 		if (errno == ERANGE)
596 | 			return queue_oos_msg(&tmsg, ncrx);
597 | 		return -1;
598 | 	}
599 | 
600 | 	if (!new_msg && slot->msg->ncfrag_left) {
601 | 		struct ncrx_msg *msg = slot->msg;
602 | 		int off = tmsg.ncfrag_off;
603 | 		int i;
604 | 
605 | 		/*
606 | 		 * we're merging a text fragment into the message text buffer.
607 | 		 * the checks done here ensure that the received fragment values
608 | 		 * are within bounds of the message text buffer.
609 | 		 */
610 | 		if (off >= msg->text_len ||
611 | 			off + tmsg.ncfrag_len > msg->text_len) {
612 | 			return -1;
613 | 		}
614 | 
615 | 		for (i = 0; i < tmsg.ncfrag_len; i++) {
616 | 			if (msg->text[off + i])
617 | 				continue;
618 | 			msg->text[off + i] = tmsg.text[i];
619 | 			msg->ncfrag_left--;
620 | 		}
621 | 	}
622 | 
623 | 	slot_maybe_complete(slot);
624 | 
625 | 	return 0;
626 | }
627 | 
628 | /*
629 |  * Build ncrx_response() output.  Ack for the last retired msg is always
630 |  * added.  If @slot is non-NULL, re-transmission for it is also added.
631 |  */
632 | static void ncrx_build_resp(struct ncrx_slot *slot, struct ncrx *ncrx)
633 | {
634 | 	/* no msg received? */
635 | 	if (!ncrx->head_seq)
636 | 		return;
637 | 
638 | 	/* "ncrx<ack-seq>" */
639 | 	if (!ncrx->resp_len) {
640 | 		ncrx->acked_seq = tail_seq(ncrx) - 1;
641 | 		ncrx->acked_at = ncrx->now_mono;
642 | 
643 | 		ncrx->resp_len = snprintf(ncrx->resp_buf, NCRX_PKT_MAX,
644 | 					  "ncrx%"PRIu64, ncrx->acked_seq);
645 | 	}
646 | 
647 | 	/* " <missing-seq>..." truncated to NCRX_PKT_MAX */
648 | 	if (slot) {
649 | 		int idx = slot - ncrx->slots;
650 | 		int len;
651 | 
652 | 		len = snprintf(ncrx->resp_buf + ncrx->resp_len,
653 | 			       NCRX_PKT_MAX - ncrx->resp_len, " %"PRIu64,
654 | 			       ncrx->head_seq - slot_dist(idx, ncrx));
655 | 		if (ncrx->resp_len + len <= NCRX_PKT_MAX) {
656 | 			ncrx->resp_len += len;
657 | 			ncrx->resp_buf[ncrx->resp_len] = '\0';
658 | 		}
659 | 	}
660 | }
661 | 
662 | int ncrx_process(const char *payload, uint64_t now_mono, uint64_t now_real,
663 | 		struct ncrx *ncrx)
664 | {
665 | 	struct ncrx_slot *slot, *tmp_slot;
666 | 	struct ncrx_msg *msg;
667 | 	uint64_t old_head_seq = ncrx->head_seq;
668 | 	int dist_retx, ret = 0;
669 | 
670 | 	if (now_mono < ncrx->now_mono)
671 | 		fprintf(stderr, "ncrx: time regressed %"PRIu64"->%"PRIu64"\n",
672 | 			ncrx->now_mono, now_mono);
673 | 
674 | 	ncrx->now_mono = now_mono;
675 | 	ncrx->resp_len = 0;
676 | 
677 | 	/*
678 | 	 * If fully acked, keep last ack timestamp current so that new
679 | 	 * messages arriving doesn't trigger ack timeout immediately.
680 | 	 */
681 | 	if (ncrx->acked_seq == tail_seq(ncrx) - 1)
682 | 		ncrx->acked_at = now_mono;
683 | 
684 | 	/* parse and queue @payload */
685 | 	if (payload)
686 | 		ret = ncrx_queue_payload(payload, ncrx, now_real);
687 | 
688 | 	/* retire complete & timed-out msgs from tail */
689 | 	while (ncrx->tail != ncrx->head) {
690 | 		slot = &ncrx->slots[ncrx->tail];
691 | 
692 | 		if ((!slot->msg || !list_empty(&slot->hole_node)) &&
693 | 		    slot->timestamp + ncrx->p.msg_timeout > now_mono)
694 | 			break;
695 | 		retire_tail(ncrx);
696 | 	}
697 | 
698 | 	/* retire timed-out oos msgs */
699 | 	while ((msg = msg_list_peek(&ncrx->oos_list))) {
700 | 		if (msg->rx_at_mono + ncrx->p.oos_timeout > now_mono)
701 | 			break;
702 | 		msg->oos = 1;
703 | 		msg_list_del(msg, &ncrx->oos_list);
704 | 		msg_list_append(msg, &ncrx->retired_list);
705 | 	}
706 | 
707 | 	/* if enabled, ack pending and timeout expired? */
708 | 	if (ncrx->p.ack_intv && ncrx->acked_seq != tail_seq(ncrx) - 1 &&
709 | 	    ncrx->acked_at + ncrx->p.ack_intv < now_mono)
710 | 		ncrx_build_resp(NULL, ncrx);
711 | 
712 | 	/* head passed one or more re-transmission boundaries? */
713 | 	dist_retx = old_head_seq / ncrx->p.retx_stride !=
714 | 		ncrx->head_seq / ncrx->p.retx_stride;
715 | 
716 | 	hole_list_for_each(slot, tmp_slot, &ncrx->hole_list) {
717 | 		int retx = 0;
718 | 
719 | 		/*
720 | 		 * If so, request re-tx of holes further away than stride.
721 | 		 * This ensures that a missing seq is requested at least
722 | 		 * certain number of times regardless of incoming rate.
723 | 		 */
724 | 		if (dist_retx &&
725 | 		    slot_dist(slot - ncrx->slots, ncrx) > ncrx->p.retx_stride)
726 | 			retx = 1;
727 | 
728 | 		/* request re-tx every retx_intv */
729 | 		if (now_mono - max(slot->timestamp, slot->retx_timestamp) >=
730 | 		    (unsigned)ncrx->p.retx_intv) {
731 | 			slot->retx_timestamp = now_mono;
732 | 			retx = 1;
733 | 		}
734 | 
735 | 		if (retx)
736 | 			ncrx_build_resp(slot, ncrx);
737 | 	}
738 | 
739 | 	return ret;
740 | }
741 | 
742 | const char *ncrx_response(struct ncrx *ncrx, int *lenp)
743 | {
744 | 	if (lenp)
745 | 		*lenp = ncrx->resp_len;
746 | 	if (ncrx->resp_len)
747 | 		return ncrx->resp_buf;
748 | 	return NULL;
749 | }
750 | 
751 | /* parse out the dictionary in a complete message, if it exists */
752 | static void terminate_msg_and_dict(struct ncrx_msg *msg)
753 | {
754 | 	msg->dict = strchr(msg->text, '\n');
755 | 	if (msg->dict) {
756 | 		int len = msg->text_len;
757 | 		msg->text_len = msg->dict - msg->text;
758 | 		msg->text[msg->text_len] = '\0';
759 | 		msg->dict_len = len - msg->text_len - 1;
760 | 		msg->dict++;
761 | 	}
762 | }
763 | 
764 | struct ncrx_msg *ncrx_next_msg(struct ncrx *ncrx)
765 | {
766 | 	struct ncrx_msg *msg = msg_list_pop(&ncrx->retired_list);
767 | 
768 | 	if (msg)
769 | 		terminate_msg_and_dict(msg);
770 | 
771 | 	return msg;
772 | }
773 | 
774 | uint64_t ncrx_invoke_process_at(struct ncrx *ncrx)
775 | {
776 | 	uint64_t when = UINT64_MAX;
777 | 	struct ncrx_msg *msg;
778 | 
779 | 	/* ack enabled and pending? */
780 | 	if (ncrx->p.ack_intv && ncrx->head_seq &&
781 | 			ncrx->acked_seq != tail_seq(ncrx) - 1)
782 | 		when = min(when, ncrx->acked_at + ncrx->p.ack_intv);
783 | 
784 | 	/*
785 | 	 * Holes to request for retransmission?  msg_timeout is the same
786 | 	 * condition but way longer.  Checking on retx_intv is enough.
787 | 	 */
788 | 	if (!list_empty(&ncrx->hole_list))
789 | 		when = min(when, ncrx->now_mono + ncrx->p.retx_intv);
790 | 
791 | 	/* oos timeout */
792 | 	if ((msg = msg_list_peek(&ncrx->oos_list)))
793 | 		when = min(when, msg->rx_at_mono + ncrx->p.oos_timeout);
794 | 
795 | 	/* min 10ms intv to avoid busy loop in case something goes bonkers */
796 | 	return max(when, ncrx->now_mono + 10);
797 | }
798 | 
799 | struct ncrx *ncrx_create(const struct ncrx_param *param)
800 | {
801 | 	const struct ncrx_param *dfl = &ncrx_dfl_param;
802 | 	struct ncrx_param *p;
803 | 	struct ncrx *ncrx;
804 | 	int i;
805 | 
806 | 	ncrx = calloc(1, sizeof(*ncrx));
807 | 	if (!ncrx)
808 | 		return NULL;
809 | 
810 | 	p = &ncrx->p;
811 | 	if (param) {
812 | 		p->nr_slots	= param->nr_slots	?: dfl->nr_slots;
813 | 
814 | 		p->ack_intv	= param->ack_intv	?: dfl->ack_intv;
815 | 		p->retx_intv	= param->retx_intv	?: dfl->retx_intv;
816 | 		p->retx_stride	= param->retx_stride	?: dfl->retx_stride;
817 | 		p->msg_timeout	= param->msg_timeout	?: dfl->msg_timeout;
818 | 
819 | 		p->oos_thr	= param->oos_thr	?: dfl->oos_thr;
820 | 		p->oos_intv	= param->oos_intv	?: dfl->oos_intv;
821 | 		p->oos_timeout	= param->oos_timeout	?: dfl->oos_timeout;
822 | 	} else {
823 | 		*p = *dfl;
824 | 	}
825 | 
826 | 	ncrx->acked_seq = UINT64_MAX;
827 | 	init_list(&ncrx->hole_list);
828 | 	init_list(&ncrx->oos_list.head);
829 | 	init_list(&ncrx->retired_list.head);
830 | 
831 | 	ncrx->slots = calloc(ncrx->p.nr_slots, sizeof(ncrx->slots[0]));
832 | 	if (!ncrx->slots) {
833 | 		free(ncrx);
834 | 		return NULL;
835 | 	}
836 | 
837 | 	for (i = 0; i < ncrx->p.nr_slots; i++)
838 | 		init_list(&ncrx->slots[i].hole_node);
839 | 
840 | 	return ncrx;
841 | }
842 | 
843 | void ncrx_destroy(struct ncrx *ncrx)
844 | {
845 | 	struct ncrx_msg *msg;
846 | 	int i;
847 | 
848 | 	for (i = 0; i < ncrx->p.nr_slots; i++)
849 | 		free(ncrx->slots[i].msg);
850 | 
851 | 	while ((msg = msg_list_pop(&ncrx->oos_list)))
852 | 		free(msg);
853 | 
854 | 	while ((msg = msg_list_pop(&ncrx->retired_list)))
855 | 		free(msg);
856 | 
857 | 	free(ncrx->slots);
858 | 	free(ncrx);
859 | }
860 | 


--------------------------------------------------------------------------------
/ncrx/ncrx-struct.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the BSD-style license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifndef __NETCONSOLE_NCRX_STRUCT__
 9 | #define __NETCONSOLE_NCRX_STRUCT__
10 | 
11 | struct ncrx_list {
12 | 	struct ncrx_list	*next;
13 | 	struct ncrx_list	*prev;
14 | };
15 | 
16 | #define NCRX_KVERSION_MAX_LEN		64
17 | 
18 | /*
19 |  * ncrx_msg represents a single log message and what gets returned from
20 |  * ncrx_next_msg().  Most of the public fields are self-explanatory except
21 |  * for the followings.
22 |  *
23 |  * oos
24 |  *	The message's sequence number doesn't match up with the current
25 |  *	message stream.  Could be from a foreign source or corrupt.  Ignore
26 |  *	when counting missing messages.
27 |  *
28 |  * seq_reset
29 |  *	The sequence number stream has jumped.  This usually happens when
30 |  *	the log source reboots.  The first message returned after ncrx
31 |  *	initialization always has this flag set.
32 |  */
33 | struct ncrx_msg {
34 | 	/* public fields */
35 | 	uint64_t		seq;		/* printk sequence number */
36 | 	uint64_t		ts_usec;	/* printk timestamp in usec */
37 | 	char			*text;		/* message body */
38 | 	char			*dict;		/* optional dictionary */
39 | 	int			text_len;	/* message body length */
40 | 	int			dict_len;	/* dictionary length */
41 | 
42 | 	uint8_t			facility;	/* log facility */
43 | 	uint8_t			level;		/* printk level */
44 | 	unsigned		cont_start:1;	/* first of continued msgs */
45 | 	unsigned		cont:1;		/* continuation of prev msg */
46 | 	unsigned		oos:1;		/* sequence out-of-order */
47 | 	unsigned		seq_reset:1;	/* sequence reset */
48 | 
49 | 	/* private fields */
50 | 	struct ncrx_list	node;
51 | 	uint64_t		rx_at_mono;	/* monotonic rx time in msec */
52 | 	uint64_t		rx_at_real;	/* real rx time in msec */
53 | 	int			ncfrag_off;	/* netconsole frag offset */
54 | 	int			ncfrag_len;	/* netconsole frag len */
55 | 	int			ncfrag_left;	/* number of missing bytes */
56 | 
57 | 	/* kernel release version */
58 | 	char			version[NCRX_KVERSION_MAX_LEN];
59 | 	unsigned		emg:1;		/* emergency transmission */
60 | 
61 | 	char			buf[];
62 | };
63 | 
64 | #endif /* __NETCONSOLE_NCRX_STRUCT__ */
65 | 


--------------------------------------------------------------------------------
/ncrx/ncrx.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ncrx - simple extended netconsole receiver
  3 |  *
  4 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  5 |  *
  6 |  * This source code is licensed under the BSD-style license found in the
  7 |  * LICENSE file in the root directory of this source tree.
  8 |  */
  9 | 
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <time.h>
 13 | #include <poll.h>
 14 | #include <ctype.h>
 15 | #include <errno.h>
 16 | #include <sys/socket.h>
 17 | #include <netinet/in.h>
 18 | #include <netinet/udp.h>
 19 | 
 20 | #include "ncrx.h"
 21 | 
 22 | union sockaddr_in46 {
 23 | 	struct sockaddr		addr;
 24 | 	struct sockaddr_in6	in6;
 25 | 	struct sockaddr_in	in4;
 26 | };
 27 | 
 28 | int main(int argc, char **argv)
 29 | {
 30 | 	char buf[NCRX_LINE_MAX + 1];
 31 | 	struct ncrx_param param = { .ack_intv = 1000 };
 32 | 	struct ncrx *ncrx;
 33 | 	struct sockaddr_in6 laddr = { };
 34 | 	uint64_t next_seq = 0, next_at = UINT64_MAX, now;
 35 | 	int prev_cont = 0;
 36 | 	int fd;
 37 | 
 38 | 	if (argc != 2) {
 39 | 		fprintf(stderr, "Usage: ncrx PORT\n");
 40 | 		return 1;
 41 | 	}
 42 | 
 43 | 	fd = socket(AF_INET6, SOCK_DGRAM, 0);
 44 | 	if (fd < 0) {
 45 | 		perror("socket");
 46 | 		return 1;
 47 | 	}
 48 | 
 49 | 	laddr.sin6_family = AF_INET6;
 50 | 	laddr.sin6_addr = in6addr_any;
 51 | 	laddr.sin6_port = htons(atoi(argv[1]));
 52 | 
 53 | 	if (bind(fd, (struct sockaddr *)&laddr, sizeof(laddr)) < 0) {
 54 | 		perror("bind");
 55 | 		return 1;
 56 | 	}
 57 | 
 58 | 	ncrx = ncrx_create(&param);
 59 | 	if (!ncrx) {
 60 | 		perror("ncrx_create");
 61 | 		return 1;
 62 | 	}
 63 | 
 64 | 	while (1) {
 65 | 		struct pollfd pfd = { .fd = fd, .events = POLLIN };
 66 | 		union sockaddr_in46 raddr;
 67 | 		struct ncrx_msg *msg;
 68 | 		struct timespec ts;
 69 | 		socklen_t raddr_len = sizeof(raddr);
 70 | 		char *payload = NULL;
 71 | 		const char *resp;
 72 | 		int timeout;
 73 | 		int len;
 74 | 
 75 | 		/* determine sleep interval and poll */
 76 | 		timeout = -1;
 77 | 		if (next_at != UINT64_MAX) {
 78 | 			timeout = 0;
 79 | 			if (next_at > now)
 80 | 				timeout = next_at - now;
 81 | 		}
 82 | 
 83 | 		if (poll(&pfd, 1, timeout) < 0) {
 84 | 			perror("poll");
 85 | 			return 1;
 86 | 		}
 87 | 
 88 | 		/* receive message */
 89 | 		len = recvfrom(fd, buf, sizeof(buf) - 1, MSG_DONTWAIT,
 90 | 			       (struct sockaddr *)&raddr, &raddr_len);
 91 | 
 92 | 		payload = NULL;
 93 | 		if (len >= 0) {
 94 | 			buf[len] = '\0';
 95 | 			payload = buf;
 96 | 		} else if (errno != EAGAIN) {
 97 | 			perror("recv");
 98 | 			return 1;
 99 | 		}
100 | 
101 | 		/* determine the current time */
102 | 		if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
103 | 			perror("clock_gettime");
104 | 			return 1;
105 | 		}
106 | 		now = ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
107 | 
108 | 		/* process the payload and perform rx operations */
109 | 		if (ncrx_process(payload, now, 0, ncrx) && errno != ENOENT) {
110 | 			if (errno == EINVAL) {
111 | 				while (len && isspace(payload[len - 1]))
112 | 					payload[--len] = '\0';
113 | 				printf("[%12s] %s\n", "INVAL", payload);
114 | 			} else {
115 | 				perror("ncrx_process");
116 | 			}
117 | 		}
118 | 
119 | 		resp = ncrx_response(ncrx, &len);
120 | 		if (resp && sendto(fd, resp, len, 0,
121 | 				   (struct sockaddr *)&raddr, raddr_len) < 0)
122 | 			perror("sendto");
123 | 
124 | 		while ((msg = ncrx_next_msg(ncrx))) {
125 | 			const char *pnl = prev_cont ? "\n" : "";
126 | 
127 | 			if (msg->oos) {
128 | 				printf("%s[%12s] %s\n", pnl, "OOS", msg->text);
129 | 				prev_cont = 0;
130 | 				continue;
131 | 			}
132 | 			if (msg->seq_reset) {
133 | 				printf("%s[%12s] seq=%"PRIu64"\n",
134 | 				       pnl, "SEQ RESET", msg->seq);
135 | 				next_seq = msg->seq;
136 | 			}
137 | 			if (msg->seq != next_seq) {
138 | 				printf("%s[%12s] %"PRIu64" messages skipped\n",
139 | 				       pnl, "SEQ SKIPPED", msg->seq - next_seq);
140 | 			}
141 | 
142 | 			next_seq = msg->seq + 1;
143 | 
144 | 			if (!msg->cont || !prev_cont)
145 | 				printf("%s[%5"PRIu64".%06"PRIu64"] ", pnl,
146 | 				       msg->ts_usec / 1000000,
147 | 				       msg->ts_usec % 1000000);
148 | 
149 | 			printf("%s", msg->text);
150 | 
151 | 			prev_cont = msg->cont_start || msg->cont;
152 | 			if (!prev_cont)
153 | 				printf("\n");
154 | 		}
155 | 
156 | 		next_at = ncrx_invoke_process_at(ncrx);
157 | 	}
158 | 
159 | 	return 0;
160 | }
161 | 


--------------------------------------------------------------------------------
/ncrx/ncrx.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ncrx - extended netconsole receiver library
  3 |  *
  4 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  5 |  *
  6 |  * This source code is licensed under the BSD-style license found in the
  7 |  * LICENSE file in the root directory of this source tree.
  8 |  */
  9 | 
 10 | #ifndef __NETCONSOLE_NCRX__
 11 | #define __NETCONSOLE_NCRX__
 12 | 
 13 | #include <inttypes.h>
 14 | 
 15 | #define NCRX_LINE_MAX		8192
 16 | 
 17 | /* max payload len for responses, this is what netconsole uses on tx side */
 18 | #define NCRX_PKT_MAX		1000
 19 | 
 20 | #include "ncrx-struct.h"
 21 | 
 22 | /*
 23 |  * ncrx parameters.  Specify NULL to use defaults for all.  Specify 0 to use
 24 |  * default for individual parameters.  All time periods are in millisecs.
 25 |  *
 26 |  * nr_slots
 27 |  *	The number of reorder slots.  This bounds the maximum memory which
 28 |  *	may be consumed by the ncrx instance.  Lowering this number
 29 |  *	increases the chance of the ordering window passing by a missing
 30 |  *	message before it can be obtained leading to missed messages.
 31 |  *
 32 |  * ack_intv
 33 |  *	A received message is acked after this period.  Transmission side
 34 |  *	ack timeout is 10s and this should be shorter than that.
 35 |  *
 36 |  * retx_intv
 37 |  *	Retransmission request is sent and repeated every this period.
 38 |  *
 39 |  * retx_stride
 40 |  *	A missing message generates retransmission request whenever it gets
 41 |  *	pushed back this number of slots by newly arriving message.
 42 |  *
 43 |  * msg_timeout
 44 |  *	A missing message expires after this period and the sequence number
 45 |  *	will be skipped in the output.
 46 |  *
 47 |  * oos_thr
 48 |  *	Among last 32 message, if more than this number of messages are
 49 |  *	out-of-order, the message stream is reset.
 50 |  *
 51 |  * oos_intv
 52 |  *	A message is considered out-of-sequence only if the last message
 53 |  *	received with the sequence number is older than this.
 54 |  *
 55 |  * oos_timeout
 56 |  *	If sequence is not reset in this period after reception of an
 57 |  *	out-of-order message, the message is output.
 58 |  */
 59 | struct ncrx_param {
 60 | 	int			nr_slots;
 61 | 
 62 | 	int			ack_intv;
 63 | 	int			retx_intv;
 64 | 	int			retx_stride;
 65 | 	int			msg_timeout;
 66 | 
 67 | 	int			oos_thr;
 68 | 	int			oos_intv;
 69 | 	int			oos_timeout;
 70 | };
 71 | 
 72 | /* default params */
 73 | #define NCRX_DFL_NR_SLOTS	8192
 74 | 
 75 | #define NCRX_DFL_ACK_INTV	0	/* disable ack logic by default */
 76 | 
 77 | #define NCRX_DFL_RETX_INTV	1000
 78 | #define NCRX_DFL_RETX_STRIDE	256
 79 | #define NCRX_DFL_MSG_TIMEOUT	30000
 80 | 
 81 | #define NCRX_DFL_OOS_THR	(32 * 3 / 5)			/* 19 */
 82 | #define NCRX_DFL_OOS_INTV	5000
 83 | #define NCRX_DFL_OOS_TIMEOUT	NCRX_DFL_MSG_TIMEOUT
 84 | 
 85 | /*
 86 |  * A ncrx instance is created by ncrx_create() and destroyed by
 87 |  * ncrx_destroy().  All accesses to a given instance must be serialized;
 88 |  * however, a process may create any number of instances and use them
 89 |  * concurrently.
 90 |  */
 91 | struct ncrx;
 92 | 
 93 | struct ncrx *ncrx_create(const struct ncrx_param *param);
 94 | void ncrx_destroy(struct ncrx *ncrx);
 95 | 
 96 | /*
 97 |  * A ncrx instance doesn't do any IO or blocking.  It's just a state
 98 |  * machine that the user can feed data into and get the results out of.
 99 |  *
100 |  * ncrx_process()
101 |  *	Process @payload of a packet.  @now_mono is the current time in msecs.
102 |  *	The origin doesn't matter as long as it's monotonously increasing.
103 |  *	@payload may be NULL.  See ncrx_invoke_process_at().
104 |  *
105 |  *	@now_real is an optional timestamp which will be stored at rx_at_real
106 |  *	in the resulting ncrx_msg struct. The library does not use this value
107 |  *	at all, so it can be zero.
108 |  *
109 |  *	Returns 0 on success.  1 on failure with errno set.  EINVAL
110 |  *	indicates that @payload is not a valid extended netconsole message.
111 |  *
112 |  * ncrx_response()
113 |  *	The response to send to log source.  If the user calls this
114 |  *	function after each ncrx_process() invocation and sends back the
115 |  *	output, re- and emergency transmissions are activated increasing
116 |  *	the reliability especially if the network is flaky.  If not, ncrx
117 |  *	will passively reorder and assemble messages.
118 |  *
119 |  *	Returns pointer to '\0' terminated response string or NULL if
120 |  *	there's nothing to send back.  If @lenp is not NULL, *@lenp is set
121 |  *	to the length of the response string.
122 |  *
123 |  * ncrx_next_msg()
124 |  *	Fetches the next completed message.  Call repeatedly until NULL is
125 |  *	returned after each ncrx_process() invocation.  Each message should
126 |  *	be free()'d by the user after consumption.
127 |  *
128 |  * ncrx_invoke_process_at()
129 |  *	Message processing is timing dependent and ncrx often needs to take
130 |  *	actions after a certain time period even when there hasn't been any
131 |  *	new packets.  This function indicates when the caller should invoke
132 |  *	ncrx_process() at the latest.
133 |  *
134 |  *	The returned time is relative to @now previously provided to
135 |  *	ncrx_process().  e.g. if ncrx_process() needs to be invoked after 4
136 |  *	seconds since the last invocation where @now was 60000, this
137 |  *	function will return 64000.  Returns UINT64_MAX if there's no
138 |  *	pending timing dependent operation.
139 |  *
140 |  * See tools/ncrx/ncrx.c for a simple example.
141 |  */
142 | int ncrx_process(const char *payload, uint64_t now_mono, uint64_t now_real,
143 | 		struct ncrx *ncrx);
144 | const char *ncrx_response(struct ncrx *ncrx, int *lenp);
145 | struct ncrx_msg *ncrx_next_msg(struct ncrx *ncrx);
146 | uint64_t ncrx_invoke_process_at(struct ncrx *ncrx);
147 | 
148 | #endif	/* __NETCONSOLE_NCRX__ */
149 | 


--------------------------------------------------------------------------------
/ncrx/nctx.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * nctx - extended netconsole sender
  3 |  *
  4 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  5 |  *
  6 |  * This source code is licensed under the BSD-style license found in the
  7 |  * LICENSE file in the root directory of this source tree.
  8 |  */
  9 | 
 10 | #include <stdio.h>
 11 | #include <string.h>
 12 | #include <stdlib.h>
 13 | #include <unistd.h>
 14 | #include <fcntl.h>
 15 | #include <time.h>
 16 | #include <poll.h>
 17 | #include <errno.h>
 18 | #include <arpa/inet.h>
 19 | #include <sys/socket.h>
 20 | #include <netinet/in.h>
 21 | #include <netinet/udp.h>
 22 | 
 23 | #include "ncrx.h"
 24 | 
 25 | /* in msecs */
 26 | #define ACK_TIMEOUT		10000
 27 | #define EMG_TX_MAX_INTV		1000
 28 | #define EMG_TX_MIN_INTV		100
 29 | 
 30 | union sockaddr_in46 {
 31 | 	struct sockaddr		addr;
 32 | 	struct sockaddr_in6	in6;
 33 | 	struct sockaddr_in	in4;
 34 | };
 35 | 
 36 | struct kmsg_slot {
 37 | 	char			*msg;
 38 | 	uint64_t		ts;
 39 | };
 40 | 
 41 | struct kmsg_ring {
 42 | 	int			head;
 43 | 	int			tail;
 44 | 	int			nr_slots;
 45 | 	uint64_t		head_seq;
 46 | 	union sockaddr_in46	raddr;
 47 | 	int			raddr_len;
 48 | 	int			emg_tx_intv;
 49 | 	uint64_t		emg_tx_seq;
 50 | 	uint64_t		emg_tx_ts;
 51 | 	struct kmsg_slot	*slots;
 52 | };
 53 | 
 54 | /* relative time in msecs */
 55 | static uint64_t current_msec(void)
 56 | {
 57 | 	struct timespec ts;
 58 | 
 59 | 	if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
 60 | 		perror("clock_gettime");
 61 | 		exit(1);
 62 | 	}
 63 | 	return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
 64 | }
 65 | 
 66 | static int kmsg_ring_init(struct kmsg_ring *ring, int nr_slots)
 67 | {
 68 | 	memset(ring, 0, sizeof(*ring));
 69 | 
 70 | 	ring->slots = malloc(sizeof(ring->slots[0]) * nr_slots);
 71 | 	if (!ring->slots)
 72 | 		return -1;
 73 | 
 74 | 	ring->nr_slots = nr_slots;
 75 | 	return 0;
 76 | }
 77 | 
 78 | /* advance @ring's head by one, if head catches up with tail, clip it */
 79 | static void kmsg_ring_advance(struct kmsg_ring *ring)
 80 | {
 81 | 	struct kmsg_slot *slot;
 82 | 
 83 | 	ring->head_seq++;
 84 | 	ring->head = (ring->head + 1) % ring->nr_slots;
 85 | 	slot = &ring->slots[ring->head];
 86 | 
 87 | 	if (ring->tail == ring->head) {
 88 | 		free(slot->msg);
 89 | 		memset(slot, 0, sizeof(*slot));
 90 | 		ring->tail = (ring->tail + 1) % ring->nr_slots;
 91 | 	}
 92 | }
 93 | 
 94 | /* fill @ring with kmsgs from @devkmsg, returns 0 on success, -1 on failure */
 95 | static int kmsg_ring_fill(struct kmsg_ring *ring, int devkmsg)
 96 | {
 97 | 	char buf[NCRX_LINE_MAX];
 98 | 	struct kmsg_slot *slot;
 99 | 	int level;
100 | 	uint64_t seq;
101 | 	ssize_t len;
102 | 
103 | next_line:
104 | 	do {
105 | 		len = read(devkmsg, buf, sizeof(buf) - 1);
106 | 		/*
107 | 		 * EPIPE indicates skipped messages.  kmsgs are always
108 | 		 * stored according to their sequence numbers, so we don't
109 | 		 * need to do anything special on EPIPE.  Keep reading.
110 | 		 */
111 | 	} while (len < 0 && errno == EPIPE);
112 | 
113 | 	if (len < 0) {
114 | 		if (errno == EAGAIN)
115 | 			return 0;
116 | 		return -1;
117 | 	}
118 | 
119 | 	/* read seq and see if it makes sense */
120 | 	buf[len] = '\0';
121 | 	if (sscanf(buf, "%d,%"SCNu64",", &level, &seq) != 2 ||
122 | 	    seq < ring->head_seq) {
123 | 		fprintf(stderr, "Warning: malformed kmsg \"%s\"\n", buf);
124 | 		goto next_line;
125 | 	}
126 | 
127 | 	/* wind ring till head is at the right slot and store */
128 | 	while (ring->head_seq < seq)
129 | 		kmsg_ring_advance(ring);
130 | 
131 | 	slot = &ring->slots[ring->head];
132 | 	slot->msg = strdup(buf);
133 | 	if (!slot->msg)
134 | 		return -1;
135 | 
136 | 	slot->ts = current_msec();
137 | 	kmsg_ring_advance(ring);
138 | 	goto next_line;
139 | }
140 | 
141 | /* sequence number of the oldest occupied slot in @ring */
142 | static uint64_t kmsg_ring_tail_seq(struct kmsg_ring *ring)
143 | {
144 | 	int nr;
145 | 
146 | 	nr = ring->head - ring->tail;
147 | 	if (nr < 0)
148 | 		nr += ring->nr_slots;
149 | 	return ring->head_seq - nr;
150 | }
151 | 
152 | /* peek kmsg matching @seq, NULL if not found */
153 | static char *kmsg_ring_peek(struct kmsg_ring *ring, uint64_t seq)
154 | {
155 | 	int idx;
156 | 
157 | 	if (seq < kmsg_ring_tail_seq(ring) || seq >= ring->head_seq)
158 | 		return NULL;
159 | 
160 | 	idx = ring->head - (int)(ring->head_seq - seq);
161 | 	if (idx < 0)
162 | 		idx += ring->nr_slots;
163 | 
164 | 	return ring->slots[idx].msg;
165 | }
166 | 
167 | /* free slots upto @upto_seq, tail_seq is @upto_seq + 1 afterwards */
168 | static void kmsg_ring_consume(struct kmsg_ring *ring, uint64_t upto_seq)
169 | {
170 | 	uint64_t tail_seq = kmsg_ring_tail_seq(ring);
171 | 	int tail = ring->tail;
172 | 
173 | 	if (!ring->head_seq || upto_seq < tail_seq)
174 | 		return;
175 | 
176 | 	if (upto_seq >= ring->head_seq)
177 | 		upto_seq = ring->head_seq - 1;
178 | 
179 | 	while (tail_seq <= upto_seq) {
180 | 		struct kmsg_slot *slot = &ring->slots[ring->head];
181 | 
182 | 		free(slot->msg);
183 | 		memset(slot, 0, sizeof(*slot));
184 | 		tail_seq++;
185 | 		tail = (tail + 1) % ring->nr_slots;
186 | 
187 | 		/* made progress, reset emergency tx */
188 | 		ring->emg_tx_intv = 0;
189 | 	}
190 | 
191 | 	ring->tail = tail;
192 | }
193 | 
194 | /*
195 |  * Send @msg to @addr via @sock.  If @msg is too long, split into
196 |  * NCRX_PKT_MAX byte chunks with ncfrag header added.  If @is_emg_tx is
197 |  * set, add ncemg header.
198 |  */
199 | static void send_kmsg(int sock, char *msg, int is_emg_tx,
200 | 		      struct sockaddr *addr, int addr_len)
201 | {
202 | 	char buf[NCRX_PKT_MAX + 1];
203 | 	const int max_extra_len = sizeof(",ncemg=1,ncfrag=0000/0000");
204 | 	const char *header, *body;
205 | 	int msg_len = strlen(msg);
206 | 	int header_len = msg_len, body_len = 0;
207 | 	int chunk_len, nr_chunks, i;
208 | 
209 | 	if (!is_emg_tx && msg_len <= NCRX_PKT_MAX) {
210 | 		sendto(sock, msg, msg_len, 0, addr, addr_len);
211 | 		return;
212 | 	}
213 | 
214 | 	/* need to insert extra header fields, detect header and body */
215 | 	header = msg;
216 | 	body = memchr(msg, ';', msg_len);
217 | 	if (body) {
218 | 		header_len = body - header;
219 | 		body_len = msg_len - header_len - 1;
220 | 		body++;
221 | 	}
222 | 
223 | 	chunk_len = NCRX_PKT_MAX - header_len - max_extra_len;
224 | 	if (chunk_len <= 0) {
225 | 		fprintf(stderr, "Error: invalid chunk_len %d in send_kmsg()\n",
226 | 			chunk_len);
227 | 		return;
228 | 	}
229 | 
230 | 	/*
231 | 	 * Transfer possibly multiple chunks with extra header fields.
232 | 	 *
233 | 	 * For emergency transfers due to missing acks, add "emg=1".
234 | 	 *
235 | 	 * If @msg needs to be split to fit NCRX_PKT_MAX, add
236 | 	 * "ncfrag=<byte-offset>/<total-bytes>" to identify each chunk.
237 | 	 */
238 | 	memcpy(buf, header, header_len);
239 | 	nr_chunks = (body_len + chunk_len - 1) / chunk_len;
240 | 
241 | 	for (i = 0; i < nr_chunks; i++) {
242 | 		int offset = i * chunk_len;
243 | 		int this_header = header_len;
244 | 		int this_chunk;
245 | 
246 | 		this_chunk = body_len - offset;
247 | 		if (this_chunk > chunk_len)
248 | 			this_chunk = chunk_len;
249 | 
250 | 		if (is_emg_tx && this_header < sizeof(buf))
251 | 			this_header += snprintf(buf + this_header,
252 | 						sizeof(buf) - this_header,
253 | 						",ncemg=1");
254 | 		if (nr_chunks > 1 && this_header < sizeof(buf))
255 | 			this_header += snprintf(buf + this_header,
256 | 						sizeof(buf) - this_header,
257 | 						",ncfrag=%d/%d",
258 | 						offset, body_len);
259 | 		if (this_header < sizeof(buf))
260 | 			this_header += snprintf(buf + this_header,
261 | 						sizeof(buf) - this_header, ";");
262 | 
263 | 		if (this_header + chunk_len > NCRX_PKT_MAX) {
264 | 			fprintf(stderr, "Error: this_header %d is too large for chunk_len %d in send_kmsg()\n",
265 | 				this_header, chunk_len);
266 | 			return;
267 | 		}
268 | 
269 | 		memcpy(buf + this_header, body, this_chunk);
270 | 
271 | 		sendto(sock, buf, this_header + this_chunk, 0, addr, addr_len);
272 | 
273 | 		body += this_chunk;
274 | 	}
275 | }
276 | 
277 | /* rx and handle response packets from @sock, returns 0 on success, -1 on err */
278 | static int kmsg_ring_process_resps(struct kmsg_ring *ring, int sock)
279 | {
280 | 	char rx_buf[NCRX_PKT_MAX + 1];
281 | 	union sockaddr_in46 raddr;
282 | 	struct iovec iov = { .iov_base = rx_buf, .iov_len = NCRX_PKT_MAX };
283 | 	struct msghdr msgh = { .msg_name = &raddr.addr, .msg_iov = &iov,
284 | 			       .msg_iovlen = 1 };
285 | 	ssize_t len;
286 | 	char *pos, *tok;
287 | 	uint64_t seq;
288 | 
289 | next_packet:
290 | 	msgh.msg_namelen = sizeof(raddr);
291 | 	len = recvmsg(sock, &msgh, MSG_DONTWAIT);
292 | 	if (len < 0) {
293 | 		if (errno == EAGAIN)
294 | 			return 0;
295 | 		return -1;
296 | 	}
297 | 
298 | 	rx_buf[len] = '\0';
299 | 	pos = rx_buf;
300 | 	tok = strsep(&pos, " ");
301 | 
302 | 	/* "ncrx" header */
303 | 	if (strncmp(tok, "ncrx", 4)) {
304 | 		char addr_str[INET6_ADDRSTRLEN];
305 | 
306 | 		if (raddr.addr.sa_family == AF_INET6)
307 | 			inet_ntop(AF_INET6, &raddr.in6.sin6_addr,
308 | 				  addr_str, sizeof(addr_str));
309 | 		else
310 | 			inet_ntop(AF_INET, &raddr.in4.sin_addr,
311 | 				  addr_str, sizeof(addr_str));
312 | 
313 | 		fprintf(stderr, "Warning: malformed packet from [%s]:%u\n",
314 | 			addr_str, ntohs(raddr.in4.sin_port));
315 | 		goto next_packet;
316 | 	}
317 | 	tok += 4;
318 | 
319 | 	/* <ack-seq> */
320 | 	if (sscanf(tok, "%"SCNu64, &seq))
321 | 		kmsg_ring_consume(ring, seq);
322 | 
323 | 	/* <missing-seq>... */
324 | 	while ((tok = strsep(&pos, " "))) {
325 | 		if (sscanf(tok, "%"SCNu64, &seq)) {
326 | 			char *msg = kmsg_ring_peek(ring, seq);
327 | 			if (msg)
328 | 				send_kmsg(sock, msg, 0,
329 | 					  &raddr.addr, msgh.msg_namelen);
330 | 		}
331 | 	}
332 | 
333 | 	/* stash remote address for emergency tx */
334 | 	ring->raddr = raddr;
335 | 	ring->raddr_len = msgh.msg_namelen;
336 | 
337 | 	goto next_packet;
338 | }
339 | 
340 | /*
341 |  * Perform emergency tx if necessary.  Must be called after @ring is filled
342 |  * and responses are processed.  Returns the duration in msecs after which
343 |  * this function should be invoked again.  If -1, timeout isn't necessary.
344 |  */
345 | static int kmsg_ring_emg_tx(struct kmsg_ring *ring, int sock)
346 | {
347 | 	struct kmsg_slot *slot = &ring->slots[ring->tail];
348 | 	uint64_t target, now;
349 | 	uint64_t tail_seq;
350 | 	char *msg;
351 | 
352 | 	/* if @ring is empty or remote site is not established, nothing to do */
353 | 	if (ring->head == ring->tail || !ring->raddr_len) {
354 | 		ring->emg_tx_intv = 0;
355 | 		return -1;
356 | 	}
357 | 
358 | 	/* calculate the next deadline, if in the future, return the diff */
359 | 	if (!ring->emg_tx_intv)
360 | 		target = slot->ts + ACK_TIMEOUT;
361 | 	else
362 | 		target = ring->emg_tx_ts + ring->emg_tx_intv;
363 | 
364 | 	now = current_msec();
365 | 
366 | 	if (target > now)
367 | 		return target - now;
368 | 
369 | 	tail_seq = kmsg_ring_tail_seq(ring);
370 | 
371 | 	if (!ring->emg_tx_intv) {
372 | 		/* new emg tx session */
373 | 		ring->emg_tx_intv = EMG_TX_MIN_INTV;
374 | 		ring->emg_tx_seq = tail_seq;
375 | 	} else if (ring->emg_tx_seq < ring->head_seq) {
376 | 		/* in the middle of emg tx session */
377 | 		ring->emg_tx_seq++;
378 | 		if (ring->emg_tx_seq < tail_seq)
379 | 			ring->emg_tx_seq = tail_seq;
380 | 	} else {
381 | 		/* finished one session, increase intv and repeat */
382 | 		ring->emg_tx_intv *= 2;
383 | 		if (ring->emg_tx_intv < EMG_TX_MAX_INTV)
384 | 			ring->emg_tx_intv = EMG_TX_MAX_INTV;
385 | 		ring->emg_tx_seq = tail_seq;
386 | 	}
387 | 
388 | 	msg = kmsg_ring_peek(ring, ring->emg_tx_seq);
389 | 	if (msg)
390 | 		send_kmsg(sock, msg, 1, &ring->raddr.addr, ring->raddr_len);
391 | 
392 | 	ring->emg_tx_ts = now;
393 | 
394 | 	return ring->emg_tx_intv;
395 | }
396 | 
397 | static void usage_err(const char *err)
398 | {
399 | 	if (err)
400 | 		fprintf(stderr, "Error: %s\n", err);
401 | 	fprintf(stderr, "Usage: nctx [-n nr_slots] [-k devkmsg_path] ip port\n");
402 | 	exit(1);
403 | }
404 | 
405 | int main(int argc, char **argv)
406 | {
407 | 	union sockaddr_in46 laddr = { };
408 | 	struct pollfd pfds[2] = { };
409 | 	struct kmsg_ring kmsg_ring;
410 | 	const char *devkmsg_path = "/dev/kmsg";
411 | 	int nr_slots = NCRX_DFL_NR_SLOTS;
412 | 	int sleep_dur = -1;
413 | 	int opt, port, sock, devkmsg;
414 | 	socklen_t addrlen;
415 | 
416 | 	while ((opt = getopt(argc, argv, "n:k:h?")) != -1) {
417 | 		switch (opt) {
418 | 		case 'n':
419 | 			nr_slots = atoi(optarg);
420 | 			if (nr_slots <= 0)
421 | 				usage_err("nr_slots must be a positive number");
422 | 			break;
423 | 		case 'k':
424 | 			devkmsg_path = optarg;
425 | 			break;
426 | 		default:
427 | 			usage_err(NULL);
428 | 		}
429 | 	}
430 | 
431 | 	if (optind + 2 != argc)
432 | 		usage_err(NULL);
433 | 
434 | 	if (inet_pton(AF_INET6, argv[optind], &laddr.in6.sin6_addr)) {
435 | 		laddr.addr.sa_family = AF_INET6;
436 | 		addrlen = sizeof(laddr.in6);
437 | 	} else if (inet_pton(AF_INET, argv[optind], &laddr.in4.sin_addr)) {
438 | 		laddr.addr.sa_family = AF_INET;
439 | 		addrlen = sizeof(laddr.in4);
440 | 	} else {
441 | 		usage_err("invalid IP address");
442 | 	}
443 | 
444 | 	port = atoi(argv[optind + 1]);
445 | 	if (port <= 0 || port > 65535)
446 | 		usage_err("invalid port number");
447 | 
448 | 	laddr.in4.sin_port = htons(port);
449 | 
450 | 	sock = socket(laddr.addr.sa_family, SOCK_DGRAM, 0);
451 | 	if (sock < 0) {
452 | 		perror("socket");
453 | 		return 1;
454 | 	}
455 | 
456 | 	if (bind(sock, &laddr.addr, addrlen)) {
457 | 		perror("bind");
458 | 		return 1;
459 | 	}
460 | 
461 | 	devkmsg = open(devkmsg_path, O_RDONLY | O_NONBLOCK);
462 | 	if (devkmsg < 0) {
463 | 		perror("open");
464 | 		return 1;
465 | 	}
466 | 
467 | 	if (kmsg_ring_init(&kmsg_ring, nr_slots)) {
468 | 		perror("kmsg_ring_init");
469 | 		return 1;
470 | 	}
471 | 
472 | 	pfds[0].events = POLLIN;
473 | 	pfds[1].events = POLLIN;
474 | 	pfds[0].fd = devkmsg;
475 | 	pfds[1].fd = sock;
476 | 
477 | 	while (poll(pfds, 2, sleep_dur) >= 0) {
478 | 		if (kmsg_ring_fill(&kmsg_ring, devkmsg)) {
479 | 			perror("kmsg_ring_fill");
480 | 			return 1;
481 | 		}
482 | 
483 | 		if (kmsg_ring_process_resps(&kmsg_ring, sock)) {
484 | 			perror("kmsg_ring_process_resps");
485 | 			return 1;
486 | 		}
487 | 
488 | 		sleep_dur = kmsg_ring_emg_tx(&kmsg_ring, sock);
489 | 	}
490 | 	perror("poll");
491 | 	return 1;
492 | }
493 | 


--------------------------------------------------------------------------------
/ncrx/netcons-gen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  4 | #
  5 | # This source code is licensed under the BSD-style license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | #
  8 | 
  9 | """
 10 | This tool produces netcons messages for testing (mostly of {lib,}ncrx).
 11 | 
 12 | Usual usage:
 13 | 
 14 | 1. Run `ncrx [port]` listening in one shell
 15 | 2. In another shell, run `netcons-gen [...] | nc -u 127.0.0.1 [port]`
 16 | """
 17 | 
 18 | import argparse
 19 | import random
 20 | import sys
 21 | import time
 22 | from enum import Enum
 23 | 
 24 | 
 25 | class Level(Enum):
 26 |     LOG_EMERG = 0
 27 |     LOG_ALERT = 1
 28 |     LOG_CRIT = 2
 29 |     LOG_ERR = 3
 30 |     LOG_WARNING = 4
 31 |     LOG_NOTICE = 5
 32 |     LOG_INFO = 6
 33 |     LOG_DEBUG = 7
 34 | 
 35 | 
 36 | class Facility(Enum):
 37 |     LOG_KERN = 0
 38 |     LOG_USER = 1
 39 |     LOG_MAIL = 2
 40 |     LOG_DAEMON = 3
 41 |     LOG_AUTH = 4
 42 |     LOG_SYSLOG = 5
 43 |     LOG_LPR = 6
 44 |     LOG_NEWS = 7
 45 |     LOG_UUCP = 8
 46 |     LOG_CRON = 9
 47 |     LOG_AUTHPRIV = 10
 48 | 
 49 |     LOG_LOCAL0 = 16
 50 |     LOG_LOCAL1 = 17
 51 |     LOG_LOCAL2 = 18
 52 |     LOG_LOCAL3 = 19
 53 |     LOG_LOCAL4 = 20
 54 |     LOG_LOCAL5 = 21
 55 |     LOG_LOCAL6 = 22
 56 |     LOG_LOCAL7 = 23
 57 | 
 58 | 
 59 | class Mode(Enum):
 60 |     NORMAL = 0
 61 |     SKIP = 1
 62 |     RESET = 2
 63 | 
 64 | 
 65 | ARG_TO_MODE_MAP = {"reset": Mode.RESET, "skip": Mode.SKIP}
 66 | 
 67 | 
 68 | def make_dictionary_string(msg):
 69 |     """Format X=Y\0X=Y, no trailing \0"""
 70 |     return "\0".join(f"{k}={v}" for k, v in msg.items())
 71 | 
 72 | 
 73 | def make_ext_header(seq, facility, level, cont):
 74 |     """
 75 |     See printk.c's msg_print_ext_header for format spec.
 76 |     """
 77 | 
 78 |     faclev = (facility.value << 3) | level.value
 79 |     ts_usec = int(time.monotonic() * (10**6))
 80 |     return "{},{},{},{};".format(faclev, seq, ts_usec, "c" if cont else "-")
 81 | 
 82 | 
 83 | def _body_escape(text):
 84 |     return text.replace("\0", "\n")
 85 | 
 86 | 
 87 | def make_ext_body(text, dict_str):
 88 |     """
 89 |     See printk.c's msg_print_ext_body for format spec.
 90 | 
 91 |     Escaping of unprintables is currently unimplemented.
 92 |     """
 93 |     return f"{_body_escape(text)}\n{_body_escape(dict_str)}"
 94 | 
 95 | 
 96 | def make_netcons_msg(
 97 |     seq=0,
 98 |     facility=Facility.LOG_KERN,
 99 |     level=Level.LOG_ERR,
100 |     cont=False,
101 |     text="text",
102 |     meta_dict=None,
103 | ):
104 |     if meta_dict is None:
105 |         meta_dict = {"DICT": "test"}
106 | 
107 |     dict_str = make_dictionary_string(meta_dict)
108 | 
109 |     header = make_ext_header(seq=seq, facility=facility, level=level, cont=cont)
110 |     body = make_ext_body(text=text, dict_str=dict_str)
111 | 
112 |     return f"{header}{body}"
113 | 
114 | 
115 | def parse_args():
116 |     parser = argparse.ArgumentParser(description=__doc__)
117 |     parser.add_argument(
118 |         "--skip", action="store_true", help="Randomly skip sequence numbers"
119 |     )
120 |     parser.add_argument(
121 |         "--reset", action="store_true", help="Randomly reset the sequence to 0 again"
122 |     )
123 |     parser.add_argument(
124 |         "--cont", action="store_true", help="Randomly insert LOG_CONT messages"
125 |     )
126 |     return parser.parse_args()
127 | 
128 | 
129 | def main() -> None:
130 |     args = parse_args()
131 | 
132 |     enabled_modes = [Mode.NORMAL]
133 | 
134 |     for arg_name, mode in ARG_TO_MODE_MAP.items():
135 |         if getattr(args, arg_name):
136 |             enabled_modes.append(mode)
137 | 
138 |     seq = 0
139 |     cont = False
140 | 
141 |     while True:
142 |         print(
143 |             make_netcons_msg(
144 |                 seq=seq, text="hi", meta_dict={"UNAME": "it's minix i swear"}, cont=cont
145 |             ),
146 |             flush=True,
147 |         )
148 | 
149 |         chosen_mode = random.choice(enabled_modes)
150 | 
151 |         if chosen_mode == Mode.NORMAL:
152 |             new_seq = seq + 1
153 |         elif chosen_mode == Mode.SKIP:
154 |             new_seq = seq + random.randint(1, 5)
155 |         elif chosen_mode == Mode.RESET:
156 |             new_seq = 0
157 | 
158 |         if args.cont:
159 |             cont = random.choice([True, False])
160 | 
161 |         print(
162 |             f"seq: {seq} -> {new_seq}, mode: {chosen_mode}, cont: {cont}",
163 |             file=sys.stderr,
164 |         )
165 |         seq = new_seq
166 | 
167 |         time.sleep(0.5)
168 | 
169 | 
170 | if __name__ == "__main__":
171 |     main()  # pragma: no cover
172 | 


--------------------------------------------------------------------------------
/output.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the BSD-style license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <limits.h>
 11 | #include <dlfcn.h>
 12 | #include <netinet/in.h>
 13 | 
 14 | #include <ncrx.h>
 15 | 
 16 | #include "include/common.h"
 17 | #include "include/msgbuf-struct.h"
 18 | #include "include/output.h"
 19 | 
 20 | static void *output_dlhandles[MAXOUTS];
 21 | static const char *output_dlpaths[MAXOUTS];
 22 | static void (*outputs[MAXOUTS])(int, struct in6_addr *, struct msg_buf *,
 23 | 		struct ncrx_msg *);
 24 | static int nr_outputs;
 25 | 
 26 | int register_output_module(char *path, int nr_workers)
 27 | {
 28 | 	void *dl, *dlsym_addr;
 29 | 	int (*mod_init)(int);
 30 | 	int ret;
 31 | 
 32 | 	if (nr_outputs == MAXOUTS) {
 33 | 		warn("Too many output modules!\n");
 34 | 		return -1;
 35 | 	}
 36 | 
 37 | 	log("Loading module '%s'\n", path);
 38 | 	dl = dlopen(path, RTLD_NOW|RTLD_LOCAL);
 39 | 	if (!dl) {
 40 | 		warn("Can't open '%s': %s", path, dlerror());
 41 | 		return -1;
 42 | 	}
 43 | 
 44 | 	dlsym_addr = dlsym(dl, "netconsd_output_handler");
 45 | 	if (!dlsym_addr) {
 46 | 		warn("Can't find handler sym in '%s': %s", path, dlerror());
 47 | 		goto err_close;
 48 | 	}
 49 | 
 50 | 	mod_init = dlsym(dl, "netconsd_output_init");
 51 | 	if (mod_init) {
 52 | 		log("Calling mod_init() for '%s'\n", path);
 53 | 		ret = mod_init(nr_workers);
 54 | 
 55 | 		if (ret) {
 56 | 			warn("mod_init() for '%s' failed: %d\n", path, ret);
 57 | 			goto err_close;
 58 | 		}
 59 | 	}
 60 | 
 61 | 	log("Module '%s' registered (#%d@%p)\n", path, nr_outputs, dlsym_addr);
 62 | 	output_dlhandles[nr_outputs] = dl;
 63 | 	output_dlpaths[nr_outputs] = strdup(path);
 64 | 	outputs[nr_outputs] = dlsym_addr;
 65 | 	nr_outputs++;
 66 | 	return 0;
 67 | 
 68 | err_close:
 69 | 	dlclose(dl);
 70 | 	return -1;
 71 | }
 72 | 
 73 | void destroy_output_modules(void)
 74 | {
 75 | 	int i, ret;
 76 | 	void (*mod_exit)(void);
 77 | 
 78 | 	for (i = 0; i < nr_outputs; i++) {
 79 | 		const char *path = output_dlpaths[i];
 80 | 
 81 | 		mod_exit = dlsym(output_dlhandles[i], "netconsd_output_exit");
 82 | 		if (mod_exit) {
 83 | 			log("Calling mod_exit() for '%s'\n", path);
 84 | 			mod_exit();
 85 | 		}
 86 | 
 87 | 		log("Unloading module '%s' (#%d@%p)\n", path, i, outputs[i]);
 88 | 		ret = dlclose(output_dlhandles[i]);
 89 | 		if (ret)
 90 | 			warn("dlclose() failed: %s\n", dlerror());
 91 | 
 92 | 		free((void *)path);
 93 | 	}
 94 | }
 95 | 
 96 | void execute_output_pipeline(int thread_nr, struct in6_addr *src,
 97 | 		struct msg_buf *buf, struct ncrx_msg *msg)
 98 | {
 99 | 	int i;
100 | 
101 | 	for (i = 0; i < nr_outputs; i++)
102 | 		outputs[i](thread_nr, src, buf, msg);
103 | }
104 | 


--------------------------------------------------------------------------------
/threads.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the BSD-style license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <stdlib.h>
  9 | #include <stdint.h>
 10 | #include <inttypes.h>
 11 | #include <signal.h>
 12 | #include <pthread.h>
 13 | 
 14 | #include "include/common.h"
 15 | #include "include/msgbuf-struct.h"
 16 | #include "include/listener.h"
 17 | #include "include/worker.h"
 18 | #include "include/threads.h"
 19 | 
 20 | struct tctl {
 21 | 	int nr_listeners;
 22 | 	int nr_workers;
 23 | 	struct ncrx_listener *listeners;
 24 | 	struct ncrx_worker *workers;
 25 | };
 26 | 
 27 | static void wake_thread(struct ncrx_listener *listener, int worker)
 28 | {
 29 | 	struct ncrx_worker *tgt = &listener->workers[worker];
 30 | 
 31 | 	assert_pthread_mutex_locked(&tgt->queuelock);
 32 | 
 33 | 	debug("Waking thread %d\n", worker);
 34 | 	pthread_cond_signal(&tgt->cond);
 35 | }
 36 | 
 37 | static void push_prequeue_to_worker(struct ncrx_listener *listener, int worker)
 38 | {
 39 | 	struct ncrx_worker *tgt = &listener->workers[worker];
 40 | 	struct ncrx_prequeue *prequeue = &listener->prequeues[worker];
 41 | 
 42 | 	assert_pthread_mutex_locked(&tgt->queuelock);
 43 | 
 44 | 	if (tgt->queue_head)
 45 | 		tgt->queue_tail->next = prequeue->queue_head;
 46 | 	else
 47 | 		tgt->queue_head = prequeue->queue_head;
 48 | 
 49 | 	tgt->queue_tail = prequeue->queue_tail;
 50 | 	prequeue->queue_head = NULL;
 51 | 
 52 | 	debug("Listener %d pushed %d pkts to worker %d (backlog: %d)\n",
 53 | 		listener->thread_nr, prequeue->count, worker->thread_nr, tgt->nr_queued);
 54 | 
 55 | 	tgt->nr_queued += prequeue->count;
 56 | 	prequeue->count = 0;
 57 | }
 58 | 
 59 | static void enqueue_and_wake_worker(struct ncrx_listener *listener, int worker)
 60 | {
 61 | 	struct ncrx_worker *tgt = &listener->workers[worker];
 62 | 
 63 | 	pthread_mutex_lock(&tgt->queuelock);
 64 | 	push_prequeue_to_worker(listener, worker);
 65 | 	wake_thread(listener, worker);
 66 | 	pthread_mutex_unlock(&tgt->queuelock);
 67 | }
 68 | 
 69 | static int prequeue_is_empty(struct ncrx_listener *listener, int worker)
 70 | {
 71 | 	struct ncrx_prequeue *prequeue = &listener->prequeues[worker];
 72 | 	return prequeue->queue_head == NULL;
 73 | }
 74 | 
 75 | void enqueue_and_wake_all(struct ncrx_listener *listener)
 76 | {
 77 | 	int i;
 78 | 
 79 | 	for (i = 0; i < listener->nr_workers; i++)
 80 | 		if (!prequeue_is_empty(listener, i))
 81 | 			enqueue_and_wake_worker(listener, i);
 82 | }
 83 | 
 84 | static void stop_and_wait_for_workers(struct tctl *ctl)
 85 | {
 86 | 	int i;
 87 | 	uint64_t total_processed = 0, total_hosts = 0;
 88 | 
 89 | 	for (i = 0; i < ctl->nr_workers; i++) {
 90 | 		pthread_mutex_lock(&ctl->workers[i].queuelock);
 91 | 		ctl->workers[i].stop = 1;
 92 | 		pthread_cond_signal(&ctl->workers[i].cond);
 93 | 		pthread_mutex_unlock(&ctl->workers[i].queuelock);
 94 | 		pthread_join(ctl->workers[i].id, NULL);
 95 | 
 96 | 		pthread_mutex_destroy(&ctl->workers[i].queuelock);
 97 | 		pthread_cond_destroy(&ctl->workers[i].cond);
 98 | 		pthread_condattr_destroy(&ctl->workers[i].condattr);
 99 | 
100 | 		total_processed += ctl->workers[i].processed;
101 | 		total_hosts += ctl->workers[i].hosts_seen;
102 | 		log("Exiting worker %d got %" PRIu64 " msgs from %" PRIu64 " hosts\n",
103 | 				i, ctl->workers[i].processed,
104 | 				ctl->workers[i].hosts_seen);
105 | 	}
106 | 
107 | 	log("Total messages processed by workers: %" PRIu64 " from %" PRIu64 " hosts\n",
108 | 			total_processed, total_hosts);
109 | 	free(ctl->workers);
110 | }
111 | 
112 | static void stop_and_wait_for_listeners(struct tctl *ctl)
113 | {
114 | 	int i;
115 | 	uint64_t total_processed = 0;
116 | 
117 | 	for (i = 0; i < ctl->nr_listeners; i++) {
118 | 		ctl->listeners[i].stop = 1;
119 | 		pthread_kill(ctl->listeners[i].id, SIGUSR1);
120 | 		pthread_join(ctl->listeners[i].id, NULL);
121 | 
122 | 		free(ctl->listeners[i].prequeues);
123 | 
124 | 		total_processed += ctl->listeners[i].processed;
125 | 		log("Exiting listener %d queued %" PRIu64 " messages\n", i,
126 | 				ctl->listeners[i].processed);
127 | 	}
128 | 
129 | 	log("Total messages processed by listeners: %" PRIu64 "\n",
130 | 			total_processed);
131 | 	free(ctl->listeners);
132 | }
133 | 
134 | static void create_worker_threads(struct tctl *ctl, struct netconsd_params *p)
135 | {
136 | 	struct ncrx_worker *cur, *workers;
137 | 	int i, r;
138 | 
139 | 	workers = calloc(p->nr_workers, sizeof(*workers));
140 | 	if (!workers)
141 | 		fatal("Couldn't allocate thread structures\n");
142 | 
143 | 	for (i = 0; i < p->nr_workers; i++) {
144 | 		cur = &workers[i];
145 | 
146 | 		pthread_mutex_init(&cur->queuelock, NULL);
147 | 		pthread_condattr_init(&cur->condattr);
148 | 		pthread_condattr_setclock(&cur->condattr, CLOCK_MONOTONIC);
149 | 		pthread_cond_init(&cur->cond, &cur->condattr);
150 | 		cur->queue_head = NULL;
151 | 		cur->thread_nr = i;
152 | 
153 | 		cur->gc_int_ms = p->gc_int_ms;
154 | 		cur->gc_age_ms = p->gc_age_ms;
155 | 		cur->lastgc = p->gc_int_ms ? now_mono_ms() / p->gc_int_ms : 0;
156 | 
157 | 		r = pthread_create(&cur->id, NULL, ncrx_worker_thread, cur);
158 | 		if (r)
159 | 			fatal("%d/%d failed: -%d\n", i, p->nr_workers, r);
160 | 	}
161 | 
162 | 	ctl->nr_workers = p->nr_workers;
163 | 	ctl->workers = workers;
164 | }
165 | 
166 | static void create_listener_threads(struct tctl *ctl, struct netconsd_params *p)
167 | {
168 | 	struct ncrx_prequeue *prequeues;
169 | 	struct ncrx_listener *cur, *listeners;
170 | 	int i, r;
171 | 
172 | 	listeners = calloc(p->nr_listeners, sizeof(*listeners));
173 | 	if (!listeners)
174 | 		fatal("Couldn't allocate listeners: %m\n");
175 | 
176 | 	for (i = 0; i < p->nr_listeners; i++) {
177 | 		cur = &listeners[i];
178 | 
179 | 		prequeues = calloc(ctl->nr_workers, sizeof(*prequeues));
180 | 		if (!prequeues)
181 | 			fatal("ENOMEM %d/%d\n", i, p->nr_listeners);
182 | 
183 | 		cur->thread_nr = i;
184 | 		cur->prequeues = prequeues;
185 | 		cur->workers = ctl->workers;
186 | 		cur->nr_workers = ctl->nr_workers;
187 | 		cur->batch = p->mmsg_batch;
188 | 		cur->address = &p->listen_addr;
189 | 
190 | 		r = pthread_create(&cur->id, NULL, udp_listener_thread, cur);
191 | 		if (r)
192 | 			fatal("%d/%d failed: -%d\n", i, p->nr_listeners, r);
193 | 	}
194 | 
195 | 	ctl->nr_listeners = p->nr_listeners;
196 | 	ctl->listeners = listeners;
197 | }
198 | 
199 | void destroy_threads(struct tctl *ctl)
200 | {
201 | 	stop_and_wait_for_listeners(ctl);
202 | 	stop_and_wait_for_workers(ctl);
203 | 	free(ctl);
204 | }
205 | 
206 | struct tctl *create_threads(struct netconsd_params *p)
207 | {
208 | 	struct tctl *ret;
209 | 
210 | 	ret = calloc(1, sizeof(*ret));
211 | 	if (!ret)
212 | 		fatal("Couldn't allocate thread structures\n");
213 | 
214 | 	ret->nr_workers = p->nr_workers;
215 | 
216 | 	create_worker_threads(ret, p);
217 | 	create_listener_threads(ret, p);
218 | 
219 | 	return ret;
220 | }
221 | 


--------------------------------------------------------------------------------
/util/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS ?= -O2 -fPIC
 2 | CFLAGS += -D_GNU_SOURCE
 3 | CPPFLAGS ?=
 4 | LDFLAGS ?=
 5 | LIBS = -lpthread
 6 | 
 7 | all: netconsblaster
 8 | 
 9 | netconsblaster:
10 | 	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) netconsblaster.c $(LIBS) -o netconsblaster
11 | 
12 | clean:
13 | 	rm -f netconsblaster
14 | 


--------------------------------------------------------------------------------
/util/netconsblaster.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * netconsblaster: A test excerciser for netconsd and libncrx
  3 |  *
  4 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  5 |  *
  6 |  * This source code is licensed under the BSD-style license found in the
  7 |  * LICENSE file in the root directory of this source tree.
  8 |  */
  9 | 
 10 | #include <stdlib.h>
 11 | #include <stdio.h>
 12 | #include <string.h>
 13 | #include <signal.h>
 14 | #include <pthread.h>
 15 | #include <unistd.h>
 16 | #include <getopt.h>
 17 | #include <inttypes.h>
 18 | #include <sys/socket.h>
 19 | #include <sys/syscall.h>
 20 | #include <arpa/inet.h>
 21 | #include <netinet/in.h>
 22 | #include <netinet/ip6.h>
 23 | #include <netinet/udp.h>
 24 | 
 25 | #ifndef __linux__
 26 | #error Sorry, SOCK_RAW is not portable
 27 | #endif
 28 | 
 29 | #define fatal(...) \
 30 | do { \
 31 | 	printf(__VA_ARGS__); \
 32 | 	exit(EXIT_FAILURE); \
 33 | } while (0)
 34 | 
 35 | static uint64_t rand64(unsigned int *seed)
 36 | {
 37 | 	uint64_t ret;
 38 | 	ret = (uint64_t) rand_r(seed) << 32 | rand_r(seed);
 39 | 	return ret;
 40 | }
 41 | 
 42 | static uint64_t now_epoch_ms(void)
 43 | {
 44 | 	struct timespec t;
 45 | 
 46 | 	clock_gettime(CLOCK_MONOTONIC, &t);
 47 | 	return t.tv_sec * 1000 + t.tv_nsec / 1000000L;
 48 | }
 49 | 
 50 | static int ones_complement_sum(uint16_t *data, int len, int sum)
 51 | {
 52 | 	unsigned int tmp;
 53 | 	int i;
 54 | 
 55 | 	for (i = 0; i < len / 2; i++) {
 56 | 		tmp = ntohs(data[i]);
 57 | 
 58 | 		/*
 59 | 		 * Kill -0
 60 | 		 */
 61 | 		if (tmp == 65535)
 62 | 			tmp = 0;
 63 | 
 64 | 		sum += tmp;
 65 | 		if (sum >= 65536) {
 66 | 			sum &= 65535;
 67 | 			sum++;
 68 | 		}
 69 | 	}
 70 | 
 71 | 	if (len & 1)
 72 | 		fatal("Use test data with even lengths please\n");
 73 | 
 74 | 	return sum;
 75 | }
 76 | 
 77 | /*
 78 |  * From RFC768: "Checksum is the 16-bit one's complement of the one's
 79 |  * complement sum of a pseudo header of information from the IP header, the UDP
 80 |  * header, and the data, padded with zero octets at the end (if necessary) to
 81 |  * make a multiple of two octets."
 82 |  *
 83 |  * See RFC2460 section 8.1 for definition of pseudoheader for IPv6.
 84 |  *
 85 |  * In case you're wondering why I bothered with this: "Unlike IPv4, when UDP
 86 |  * packets are originated by an IPv6 node, the UDP checksum is NOT optional.
 87 |  * IPv6 receivers MUST discard packets containing a zero checksum."
 88 |  *
 89 |  * @addrs: Pointer to the begnning of the src/dst addresses in the ipv6hdr
 90 |  * @udppkt: Pointer to the udphdr
 91 |  * @len: Length of the udphdr and its payload
 92 |  */
 93 | static int udp_csum(void *addrptr, void *udppkt, int len)
 94 | {
 95 | 	unsigned int sum = 0;
 96 | 	uint16_t *addrs = addrptr;
 97 | 	uint16_t pseudohdr[4] = {0, htons(len), 0, htons(IPPROTO_UDP)};
 98 | 
 99 | 	sum = ones_complement_sum(addrs, 32, 0);
100 | 	sum = ones_complement_sum(pseudohdr, 8, sum);
101 | 	sum = ones_complement_sum(udppkt, len, sum);
102 | 	sum = ~sum;
103 | 
104 | 	/*
105 | 	 * From RFC768: "If the computed checksum is zero, it is transmitted as
106 | 	 * all ones. An all zero transmitted checksum value means that the
107 | 	 * transmitter generated no checksum"
108 | 	 */
109 | 	if (sum == 0)
110 | 		sum = 65535;
111 | 
112 | 	return sum;
113 | }
114 | 
115 | /*
116 |  * Length of payload to send with every netconsole packet
117 |  */
118 | #define NETCONSLEN 64
119 | 
120 | /*
121 |  * Layout of a raw netconsole packet
122 |  */
123 | struct netcons_packet {
124 | 	struct ip6_hdr l3;
125 | 	struct udphdr l4;
126 | 	char payload[];
127 | } __attribute__((packed));
128 | 
129 | /*
130 |  * Metadata for extended netconsole packets
131 |  */
132 | struct netcons_metadata {
133 | 	uint64_t seq;
134 | 	uint64_t ts;
135 | 	uint8_t cont;
136 | 	uint8_t lvl;
137 | };
138 | 
139 | static void bump_metadata(struct netcons_metadata *md)
140 | {
141 | 	md->seq++;
142 | 	md->ts += 1337;
143 | }
144 | 
145 | /*
146 |  * Filler text for packets.
147 |  */
148 | static const char *filler = "012345678901234567890123456789012345678901234567890123456789012";
149 | 
150 | /*
151 |  * Numeric to symbol for the CONT flag
152 |  */
153 | static const char *contflag(int cont)
154 | {
155 | 	switch (cont) {
156 | 	case 0:
157 | 		/*
158 | 		 * No CONT flag present
159 | 		 */
160 | 		return "-";
161 | 	case 1:
162 | 		/*
163 | 		 * CONT_START
164 | 		 */
165 | 		return "c";
166 | 	case 2:
167 | 		/*
168 | 		 * CONT
169 | 		 */
170 | 		return "+";
171 | 	default:
172 | 		fatal("CONT value %d invalid?\n", cont);
173 | 	};
174 | }
175 | 
176 | static void make_packet(struct netcons_packet *pkt, const struct in6_addr *src,
177 | 		const struct in6_addr *dst, const int16_t *dst_port, const struct netcons_metadata *md)
178 | {
179 | 	const int len = NETCONSLEN;
180 | 	unsigned int nr;
181 | 
182 | 	memset(pkt, 0, sizeof(pkt->l3) + sizeof(pkt->l4));
183 | 
184 | 	memcpy(&pkt->l3.ip6_src, src, sizeof(*src));
185 | 	memcpy(&pkt->l3.ip6_dst, dst, sizeof(*dst));
186 | 	pkt->l3.ip6_vfc |= (6 << 4);
187 | 	pkt->l3.ip6_nxt = IPPROTO_UDP;
188 | 	pkt->l3.ip6_plen = htons(sizeof(pkt->l4) + len);
189 | 	pkt->l3.ip6_hlim = 64;
190 | 
191 | 	nr = snprintf(pkt->payload, len - 1, "%d,%" PRIu64 ",%" PRIu64 ",%s;",
192 | 		      md->lvl, md->seq, md->ts, contflag(md->cont));
193 | 	if (nr < len)
194 | 		snprintf(pkt->payload + nr, len - nr, "%s", filler);
195 | 	pkt->payload[len - 1] = '\n';
196 | 
197 | 	pkt->l4.uh_sport = htons(6666);
198 | 	pkt->l4.uh_dport = htons(*dst_port);
199 | 	pkt->l4.uh_ulen = htons(sizeof(pkt->l4) + len);
200 | 	pkt->l4.uh_sum = htons(udp_csum(&pkt->l3.ip6_src, &pkt->l4,
201 | 			 sizeof(pkt->l4) + len));
202 | }
203 | 
204 | static int write_packet(int sockfd, struct netcons_packet *pkt)
205 | {
206 | 	const int len = sizeof(pkt->l3) + sizeof(pkt->l4) + NETCONSLEN;
207 | 	struct sockaddr_in6 bogus = {
208 | 		.sin6_family = AF_INET6,
209 | 	};
210 | 
211 | 	memcpy(&bogus.sin6_addr, &pkt->l3.ip6_dst, sizeof(pkt->l3.ip6_dst));
212 | 	return sendto(sockfd, pkt, len, 0, (const struct sockaddr *)&bogus,
213 | 		      sizeof(bogus)) != len;
214 | }
215 | 
216 | static int get_raw_socket(void)
217 | {
218 | 	int fd;
219 | 
220 | 	fd = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
221 | 	if (fd == -1)
222 | 		fatal("Couldn't get raw socket: %m\n");
223 | 
224 | 	return fd;
225 | }
226 | 
227 | static struct netcons_packet *alloc_packet(void)
228 | {
229 | 	struct netcons_packet *ret;
230 | 
231 | 	ret = malloc(sizeof(struct netcons_packet) + NETCONSLEN);
232 | 	if (!ret)
233 | 		fatal("ENOMEM allocating packet\n");
234 | 
235 | 	return ret;
236 | }
237 | 
238 | static struct netcons_metadata *alloc_metadata_array(int bits)
239 | {
240 | 	struct netcons_metadata *ret;
241 | 
242 | 	ret = calloc(1 << bits, sizeof(*ret));
243 | 	if (!ret)
244 | 		fatal("ENOMEM allocating metadata\n");
245 | 
246 | 	return ret;
247 | }
248 | 
249 | static uint64_t mask_long(uint64_t val, int bits)
250 | {
251 | 	uint64_t mask = (1UL << bits) - 1;
252 | 
253 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
254 | 	mask = __builtin_bswap64(mask);
255 | #endif
256 | 
257 | 	return val & mask;
258 | }
259 | 
260 | static uint64_t permute_addr(struct in6_addr *addr, int bits,
261 | 			     unsigned int *seed)
262 | {
263 | 	uint64_t *punned;
264 | 
265 | 	punned = (uint64_t *)&addr->s6_addr[16 - sizeof(uint64_t)];
266 | 	*punned ^= mask_long(rand64(seed), bits);
267 | 	return mask_long(*punned, bits);
268 | }
269 | 
270 | struct blaster_state {
271 | 	pthread_t id;
272 | 	int nr;
273 | 
274 | 	struct in6_addr dst;
275 | 	struct in6_addr src;
276 | 	int16_t dst_port;
277 | 	unsigned int seed;
278 | 	long blastcount;
279 | 	int *stopptr;
280 | 	int bits;
281 | };
282 | 
283 | static void *blaster_thread(void *arg)
284 | {
285 | 	struct blaster_state *_blaster_state = arg;
286 | 	struct netcons_metadata *mdarr;
287 | 	struct netcons_packet *pkt;
288 | 	struct in6_addr src;
289 | 	long idx, count = 0;
290 | 	int fd;
291 | 
292 | 	fd = get_raw_socket();
293 | 	pkt = alloc_packet();
294 | 	mdarr = alloc_metadata_array(_blaster_state->bits);
295 | 	memcpy(&src, &_blaster_state->src, sizeof(src));
296 | 	_blaster_state->seed = syscall(SYS_gettid);
297 | 
298 | 	while (!*_blaster_state->stopptr) {
299 | 		idx = permute_addr(&src, _blaster_state->bits, &_blaster_state->seed);
300 | 		make_packet(pkt, &src, &_blaster_state->dst, &_blaster_state->dst_port, &mdarr[idx]);
301 | 		bump_metadata(&mdarr[idx]);
302 | 
303 | 		if (!write_packet(fd, pkt))
304 | 			count++;
305 | 
306 | 		if (_blaster_state->blastcount && count == _blaster_state->blastcount)
307 | 			break;
308 | 	}
309 | 
310 | 	return (void*)count;
311 | }
312 | 
313 | static struct params {
314 | 	int srcaddr_order;
315 | 	int thread_order;
316 | 	struct in6_addr src;
317 | 	struct in6_addr dst;
318 | 	int16_t dst_port;
319 | 	long blastcount;
320 | 
321 | 	int stop_blasting;
322 | } params;
323 | 
324 | static void parse_arguments(int argc, char **argv, struct params *p)
325 | {
326 | 	int i;
327 | 	const char *optstr = "o:s:d:t:n:p:";
328 | 	const struct option optlong[] = {
329 | 		{
330 | 			.name = "help",
331 | 			.has_arg = no_argument,
332 | 			.val = 'h',
333 | 		},
334 | 		{
335 | 			.name = NULL,
336 | 		},
337 | 	};
338 | 
339 | 	/*
340 | 	 * Defaults
341 | 	 */
342 | 	p->srcaddr_order = 16;
343 | 	p->thread_order = 0;
344 | 	p->dst_port = 1514;
345 | 	memcpy(&p->src, &in6addr_loopback, sizeof(in6addr_loopback));
346 | 	memcpy(&p->dst, &in6addr_loopback, sizeof(in6addr_loopback));
347 | 	p->blastcount = 0;
348 | 
349 | 	p->stop_blasting = 0;
350 | 
351 | 	while ((i = getopt_long(argc, argv, optstr, optlong, NULL)) != -1) {
352 | 		switch (i) {
353 | 		case 'o':
354 | 			/*
355 | 			 * Controls the number of bits to randomly flip in the
356 | 			 * actual IPv6 address of this machine. So the program
357 | 			 * will effectively simulate 2^N clients.
358 | 			 */
359 | 			p->srcaddr_order = atoi(optarg);
360 | 			if (p->srcaddr_order > 64 - 8)
361 | 				fatal("Source address order too large\n");
362 | 			break;
363 | 		case 't':
364 | 			/*
365 | 			 * Split the work among 2^N worker threads.
366 | 			 */
367 | 			p->thread_order = atoi(optarg);
368 | 			if (p->thread_order > 8)
369 | 				fatal("Largest supported thread order is 8\n");
370 | 			break;
371 | 		case 's':
372 | 			/*
373 | 			 * Source address to permute the low N bits of.
374 | 			 */
375 | 			if (inet_pton(AF_INET6, optarg, &p->src) != 1)
376 | 				fatal("Bad src '%s': %m\n", optarg);
377 | 			break;
378 | 		case 'd':
379 | 			/*
380 | 			 * Destination address for all generated packets.
381 | 			 */
382 | 			if (inet_pton(AF_INET6, optarg, &p->dst) != 1)
383 | 				fatal("Bad dst '%s': %m\n", optarg);
384 | 			break;
385 | 		case 'n':
386 | 			/*
387 | 			 * Write N packets from each worker thread and exit.
388 | 			 */
389 | 			p->blastcount = atol(optarg);
390 | 			break;
391 | 		case 'p':
392 | 			/*
393 | 			 * Set the destination UDP port for outgoing packets.
394 | 			 */
395 | 			 p->dst_port = atoi(optarg);
396 | 			 break;
397 | 		case 'h':
398 | 			puts("Usage: netconsblaster [-o srcaddr_bits] [-t thread_order]\n"
399 | 			     "                      [-s srcaddr] [-d dstaddr]\n"
400 | 			     "                      [-n pktcount] [-p dst_port]\n");
401 | 			puts("  srcaddr_bits: Randomize low N bits of srcaddr");
402 | 			puts("  thread_order: Split work among 2^N threads");
403 | 			puts("  pktcount:     Stop after N pkts per thread\n");
404 | 			puts("  dst_port:     The UDP destination port\n");
405 | 			exit(0);
406 | 		default:
407 | 			fatal("Invalid command line parameters\n");
408 | 		}
409 | 	}
410 | }
411 | 
412 | static void stop_signal(__attribute__((__unused__))int signum)
413 | {
414 | 	params.stop_blasting = 1;
415 | }
416 | 
417 | int main(int argc, char **argv)
418 | {
419 | 	int i, nr_threads, srcaddr_per_thread;
420 | 	uint64_t tmp, count, start, finish;
421 | 	struct blaster_state *threadstates, *threadstate;
422 | 	struct sigaction stopper = {
423 | 		.sa_handler = stop_signal,
424 | 	};
425 | 
426 | 	parse_arguments(argc, argv, &params);
427 | 
428 | 	nr_threads = 1 << params.thread_order;
429 | 	srcaddr_per_thread = params.srcaddr_order - params.thread_order;
430 | 
431 | 	if (srcaddr_per_thread <= 0)
432 | 		fatal("More thread bits than srcaddr bits\n");
433 | 
434 | 	threadstates = calloc(nr_threads, sizeof(*threadstates));
435 | 	if (!threadstates)
436 | 		fatal("ENOMEM allocating state for threads\n");
437 | 
438 | 	sigaction(SIGINT, &stopper, NULL);
439 | 
440 | 	for (i = 0; i < nr_threads; i++) {
441 | 		threadstate = &threadstates[i];
442 | 
443 | 		memcpy(&threadstate->src, &params.src, sizeof(threadstate->src));
444 | 		memcpy(&threadstate->dst, &params.dst, sizeof(threadstate->dst));
445 | 		memcpy(&threadstate->dst_port, &params.dst_port, sizeof(threadstate->dst_port));
446 | 		threadstate->blastcount = params.blastcount;
447 | 		threadstate->stopptr = &params.stop_blasting;
448 | 		threadstate->bits = srcaddr_per_thread;
449 | 
450 | 		threadstate->src.s6_addr[15] = (unsigned char)i;
451 | 		threadstate->nr = i;
452 | 
453 | 		if (pthread_create(&threadstate->id, NULL, blaster_thread, threadstate))
454 | 			fatal("Thread %d/%d failed: %m\n", i, nr_threads);
455 | 	}
456 | 
457 | 	count = 0;
458 | 
459 | 	start = now_epoch_ms();
460 | 	for (i = 0; i < nr_threads; i++) {
461 | 		pthread_join(threadstates[i].id, (void**)&tmp);
462 | 		count += tmp;
463 | 	}
464 | 	finish = now_epoch_ms();
465 | 
466 | 	printf("Wrote %" PRIu64 " packets (%" PRIu64 " pkts/sec)\n", count,
467 | 			count / (finish - start) * 1000UL);
468 | 	return 0;
469 | }
470 | 


--------------------------------------------------------------------------------
/worker.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the BSD-style license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <stdlib.h>
  9 | #include <stdint.h>
 10 | #include <inttypes.h>
 11 | #include <pthread.h>
 12 | #include <string.h>
 13 | #include <limits.h>
 14 | #include <netinet/in.h>
 15 | 
 16 | #include <ncrx.h>
 17 | 
 18 | #include "include/common.h"
 19 | #include "include/msgbuf-struct.h"
 20 | #include "include/output.h"
 21 | #include "include/worker.h"
 22 | 
 23 | static const struct ncrx_param ncrx_param = {
 24 | 	.nr_slots = 512,
 25 | 	.retx_intv = NETCONS_RTO,
 26 | 	.msg_timeout = NETCONS_RTO,
 27 | 	.oos_timeout = NETCONS_RTO,
 28 | };
 29 | 
 30 | /*
 31 |  * Keep it simple: just use a boring probing hashtable that resizes.
 32 |  */
 33 | 
 34 | struct timerlist {
 35 | 	struct timerlist *prev;
 36 | 	struct timerlist *next;
 37 | 	uint64_t when;
 38 | };
 39 | 
 40 | struct bucket {
 41 | 	struct in6_addr src;
 42 | 	struct ncrx *ncrx;
 43 | 	uint64_t last_seen;
 44 | 	struct timerlist timernode;
 45 | };
 46 | 
 47 | struct hashtable {
 48 | 	unsigned long order;
 49 | 	unsigned long load;
 50 | 	struct bucket table[];
 51 | };
 52 | 
 53 | static unsigned long hash_srcaddr(struct in6_addr *addr)
 54 | {
 55 | 	uint32_t *addrptr = (uint32_t *)addr;
 56 | 
 57 | 	return jhash2(addrptr, sizeof(*addr) / sizeof(*addrptr), WORKER_SEED);
 58 | }
 59 | 
 60 | static unsigned long order_mask(int order)
 61 | {
 62 | 	return (1UL << order) - 1;
 63 | }
 64 | 
 65 | static unsigned long htable_mask(unsigned long hash, int order)
 66 | {
 67 | 	return hash & order_mask(order);
 68 | }
 69 | 
 70 | static unsigned long htable_hash(struct hashtable *h, struct in6_addr *s)
 71 | {
 72 | 	return htable_mask(hash_srcaddr(s), h->order);
 73 | }
 74 | 
 75 | static int srcaddr_compar(struct in6_addr *a, struct in6_addr *b)
 76 | {
 77 | 	return memcmp(a, b, sizeof(*a));
 78 | }
 79 | 
 80 | static struct bucket *hlookup(struct hashtable *h, struct in6_addr *src)
 81 | {
 82 | 	unsigned long origidx, idx;
 83 | 
 84 | 	origidx = htable_hash(h, src);
 85 | 	idx = origidx;
 86 | 
 87 | 	while (h->table[idx].ncrx && srcaddr_compar(&h->table[idx].src, src)) {
 88 | 		idx = htable_mask(idx + 1, h->order);
 89 | 		fatal_on(idx == origidx, "Worker hashtable is full\n");
 90 | 	}
 91 | 
 92 | 	return &h->table[idx];
 93 | }
 94 | 
 95 | /*
 96 |  * Use -1 to represent "no wake needed"
 97 |  */
 98 | static void reset_waketime(struct ncrx_worker *cur)
 99 | {
100 | 	cur->wake.tv_sec = -1;
101 | }
102 | 
103 | static uint64_t ms_from_timespec(struct timespec *t)
104 | {
105 | 	return t->tv_sec * 1000LL + t->tv_nsec / 1000000L;
106 | }
107 | 
108 | /*
109 |  * Update the waketime if @when is before the current waketime.
110 |  *
111 |  * We assume that CLOCK_MONOTONIC cannot wrap: strictly speaking this is wrong,
112 |  * since POSIX allows the MONOTONIC clock to start from any arbitrary value; but
113 |  * since it starts from zero on Linux I'm not going to jump through the hoops.
114 |  */
115 | static void maybe_update_wake(struct ncrx_worker *cur, uint64_t when)
116 | {
117 | 	uint64_t curwake = ms_from_timespec(&cur->wake);
118 | 	if ((int64_t)curwake >= 0LL && curwake <= when)
119 | 		return;
120 | 
121 | 	cur->wake.tv_sec = when / 1000LL;
122 | 	cur->wake.tv_nsec = (when % 1000LL) * 1000000L;
123 | }
124 | 
125 | static const struct timespec end_of_time = {
126 | 	.tv_sec = (time_t)((1ULL << ((sizeof(time_t) << 3) - 1)) - 1),
127 | };
128 | 
129 | static const struct timespec *next_waketime(struct ncrx_worker *cur)
130 | {
131 | 	if (cur->wake.tv_sec == -1)
132 | 		return &end_of_time;
133 | 
134 | 	return &cur->wake;
135 | }
136 | 
137 | static struct bucket *bucket_from_timernode(struct timerlist *node)
138 | {
139 | 	return container_of(node, struct bucket, timernode);
140 | }
141 | 
142 | static void timerlist_init(struct timerlist *node)
143 | {
144 | 	node->next = node;
145 | 	node->prev = node;
146 | 	node->when = 0;
147 | }
148 | 
149 | static int timerlist_empty(struct timerlist *node)
150 | {
151 | 	return node->next == node;
152 | }
153 | 
154 | static void timerlist_append(struct timerlist *node, struct timerlist *list)
155 | {
156 | 	struct timerlist *prev = list->prev;
157 | 
158 | 	fatal_on(!timerlist_empty(node), "Queueing node already on list\n");
159 | 
160 | 	node->next = list;
161 | 	node->prev = prev;
162 | 	prev->next = node;
163 | 	list->prev = node;
164 | }
165 | 
166 | static void timerlist_del(struct timerlist *node)
167 | {
168 | 	struct timerlist *prev = node->prev;
169 | 	struct timerlist *next = node->next;
170 | 
171 | 	prev->next = next;
172 | 	next->prev = prev;
173 | 	timerlist_init(node);
174 | }
175 | 
176 | /*
177 |  * Return the callback time of the newest item on the list
178 |  */
179 | static uint64_t timerlist_peek(struct timerlist *list)
180 | {
181 | 	if (timerlist_empty(list))
182 | 		return 0;
183 | 
184 | 	return list->prev->when;
185 | }
186 | 
187 | #define timerlist_for_each(this, n, thead) \
188 | 	for (this = (thead)->next, n = this->next; this != (thead); \
189 | 		this = n, n = this->next)
190 | 
191 | static struct timerlist *create_timerlists(void)
192 | {
193 | 	struct timerlist *ret;
194 | 	int i;
195 | 
196 | 	ret = calloc(NETCONS_RTO, sizeof(*ret));
197 | 	if (!ret)
198 | 		fatal("Unable to allocate timerlist\n");
199 | 
200 | 	for (i = 0; i < NETCONS_RTO; i++)
201 | 		timerlist_init(&ret[i]);
202 | 
203 | 	return ret;
204 | }
205 | 
206 | static void destroy_timerlists(struct timerlist *timerlist)
207 | {
208 | 	free(timerlist);
209 | }
210 | 
211 | static struct hashtable *create_hashtable(int order, struct hashtable *old)
212 | {
213 | 	struct hashtable *new;
214 | 	struct bucket *bkt;
215 | 	unsigned long i;
216 | 
217 | 	new = zalloc(sizeof(*new) + sizeof(struct bucket) * (1UL << order));
218 | 	if (!new)
219 | 		fatal("Unable to allocate hashtable\n");
220 | 
221 | 	new->order = order;
222 | 
223 | 	if (!old)
224 | 		return new;
225 | 
226 | 	for (i = 0; i < (1UL << old->order); i++) {
227 | 		if (old->table[i].ncrx) {
228 | 			bkt = hlookup(new, &old->table[i].src);
229 | 			memcpy(bkt, &old->table[i], sizeof(*bkt));
230 | 
231 | 			/*
232 | 			 * If the timernode wasn't on a list, initialize it as
233 | 			 * empty for the new bucket. If it was, update its
234 | 			 * neighbors to point to the new bucket.
235 | 			 */
236 | 			if (bkt->timernode.next == &old->table[i].timernode) {
237 | 				timerlist_init(&bkt->timernode);
238 | 			} else {
239 | 				bkt->timernode.next->prev = &bkt->timernode;
240 | 				bkt->timernode.prev->next = &bkt->timernode;
241 | 			}
242 | 		}
243 | 	}
244 | 
245 | 	new->load = old->load;
246 | 
247 | 	free(old);
248 | 	return new;
249 | }
250 | 
251 | static void destroy_hashtable(struct hashtable *ht)
252 | {
253 | 	unsigned long i;
254 | 
255 | 	for (i = 0; i < (1UL << ht->order); i++)
256 | 		if (ht->table[i].ncrx)
257 | 			ncrx_destroy(ht->table[i].ncrx);
258 | 
259 | 	free(ht);
260 | }
261 | 
262 | static void maybe_resize_hashtable(struct ncrx_worker *cur, unsigned long new)
263 | {
264 | 	unsigned long neworder;
265 | 
266 | 	if ((cur->ht->load + new) >> (cur->ht->order - 2) < 3)
267 | 		return;
268 | 
269 | 	/*
270 | 	 * The hashtable is more than 75% full. Resize it such that it can take
271 | 	 * @new additional client hosts and be less than 50% full.
272 | 	 */
273 | 	neworder = LONG_BIT - __builtin_clzl(cur->ht->load + new) + 1;
274 | 	cur->ht = create_hashtable(neworder, cur->ht);
275 | }
276 | 
277 | static void hdelete(struct hashtable *h, struct bucket *victim)
278 | {
279 | 	struct bucket *old, *new;
280 | 	unsigned long origidx, idx;
281 | 
282 | 	fatal_on(!victim->ncrx, "Attempt to delete free bucket\n");
283 | 
284 | 	if (!timerlist_empty(&victim->timernode))
285 | 		timerlist_del(&victim->timernode);
286 | 
287 | 	h->load--;
288 | 	ncrx_destroy(victim->ncrx);
289 | 	memset(victim, 0, sizeof(*victim));
290 | 
291 | 	/*
292 | 	 * There's potential to be clever here, but for now just be pedantic and
293 | 	 * rebucket any potentially probed entries.
294 | 	 */
295 | 
296 | 	origidx = victim - h->table;
297 | 	idx = origidx;
298 | 	while (h->table[idx].ncrx) {
299 | 		old = &h->table[idx];
300 | 		new = hlookup(h, &old->src);
301 | 		if (new != old) {
302 | 			memcpy(new, old, sizeof(*new));
303 | 			memset(old, 0, sizeof(*old));
304 | 
305 | 			/*
306 | 			 * If the timernode wasn't on a list, initialize it as
307 | 			 * empty for the new bucket. If it was, update its
308 | 			 * neighbors to point to the new bucket.
309 | 			 */
310 | 			if (new->timernode.next == &old->timernode) {
311 | 				timerlist_init(&new->timernode);
312 | 			} else {
313 | 				new->timernode.next->prev = &new->timernode;
314 | 				new->timernode.prev->next = &new->timernode;
315 | 			}
316 | 		}
317 | 
318 | 		idx = htable_mask(idx + 1, h->order);
319 | 		fatal_on(idx == origidx, "Infinite loop in hdelete()\n");
320 | 	}
321 | }
322 | 
323 | /*
324 |  * Simple garbage collection. This is meant to be rare (on the order of once per
325 |  * hour), so maintaining an LRU list isn't worth the overhead: just blow through
326 |  * the whole table. Worst case it's ~50MB.
327 |  */
328 | static void try_to_garbage_collect(struct ncrx_worker *cur)
329 | {
330 | 	unsigned long i, count = 0;
331 | 	uint64_t now, end;
332 | 	struct bucket *bkt;
333 | 
334 | 	now = now_mono_ms();
335 | 	for (i = 0; i < (1UL << cur->ht->order); i++) {
336 | 		bkt = &cur->ht->table[i];
337 | 
338 | 		if (bkt->ncrx && now - bkt->last_seen > cur->gc_age_ms) {
339 | 			hdelete(cur->ht, bkt);
340 | 			count++;
341 | 		}
342 | 	}
343 | 	end = now_mono_ms();
344 | 
345 | 	log("Worker %d GC'd %lu in %" PRIu64 "ms\n", cur->thread_nr, count,
346 | 			end - now);
347 | }
348 | 
349 | static void maybe_garbage_collect(struct ncrx_worker *cur)
350 | {
351 | 	uint64_t nowgc;
352 | 
353 | 	if (!cur->gc_int_ms)
354 | 		return;
355 | 
356 | 	nowgc = now_mono_ms() / cur->gc_int_ms;
357 | 	if (nowgc > cur->lastgc) {
358 | 		try_to_garbage_collect(cur);
359 | 		cur->lastgc = nowgc;
360 | 	}
361 | }
362 | 
363 | static void schedule_ncrx_callback(struct ncrx_worker *cur, struct bucket *bkt,
364 | 		uint64_t when)
365 | {
366 | 	struct timerlist *tgtlist;
367 | 	uint64_t now;
368 | 
369 | 	if (when == UINT64_MAX) {
370 | 		/*
371 | 		 * No callback needed. If we had one we no longer need it, so
372 | 		 * just remove ourselves from the timerlist.
373 | 		 */
374 | 		if (!timerlist_empty(&bkt->timernode))
375 | 			timerlist_del(&bkt->timernode);
376 | 
377 | 		return;
378 | 	}
379 | 
380 | 	/*
381 | 	 * Never queue messages outside the current window. This clamp() is what
382 | 	 * guarantees that the callbacks in the timerlists are strictly ordered
383 | 	 * from least to most recent: at any given moment only one callback time
384 | 	 * corresponds to each bucket, and time cannot go backwards.
385 | 	 */
386 | 	now = now_mono_ms();
387 | 	when = clamp(when, now + 1, now + NETCONS_RTO);
388 | 
389 | 	/*
390 | 	 * If the bucket is already on a timerlist, we only requeue it if the
391 | 	 * callback needs to happen earlier than the one currently queued.
392 | 	 */
393 | 	if (!timerlist_empty(&bkt->timernode)) {
394 | 		if (when > bkt->timernode.when)
395 | 			return;
396 | 
397 | 		timerlist_del(&bkt->timernode);
398 | 	}
399 | 
400 | 	tgtlist = &cur->tlist[when % NETCONS_RTO];
401 | 	fatal_on(when < timerlist_peek(tgtlist), "Timerlist ordering broken\n");
402 | 
403 | 	bkt->timernode.when = when;
404 | 	timerlist_append(&bkt->timernode, tgtlist);
405 | 	maybe_update_wake(cur, when);
406 | }
407 | 
408 | /*
409 |  * Read any pending messages out of the bucket, and invoke the output pipeline
410 |  * with the extended metadata.
411 |  */
412 | static void drain_bucket_ncrx(struct ncrx_worker *cur, struct bucket *bkt)
413 | {
414 | 	struct ncrx_msg *out;
415 | 	uint64_t when;
416 | 
417 | 	while ((out = ncrx_next_msg(bkt->ncrx))) {
418 | 		execute_output_pipeline(cur->thread_nr, &bkt->src, NULL, out);
419 | 		free(out);
420 | 	}
421 | 
422 | 	when = ncrx_invoke_process_at(bkt->ncrx);
423 | 	schedule_ncrx_callback(cur, bkt, when);
424 | }
425 | 
426 | /*
427 |  * Execute callbacks for a specific timerlist, until either the list is empty or
428 |  * we reach an entry that was queued for a time in the future.
429 |  */
430 | static void do_ncrx_callbacks(struct ncrx_worker *cur, struct timerlist *list)
431 | {
432 | 	uint64_t now = now_mono_ms();
433 | 	struct timerlist *tnode, *tmp;
434 | 	struct bucket *bkt;
435 | 
436 | 	timerlist_for_each(tnode, tmp, list) {
437 | 		if (tnode->when > now)
438 | 			break;
439 | 
440 | 		/*
441 | 		 * Remove the bucket from the list first, since it might end up
442 | 		 * being re-added to another timerlist by drain_bucket_ncrx().
443 | 		 */
444 | 		timerlist_del(tnode);
445 | 
446 | 		bkt = bucket_from_timernode(tnode);
447 | 		ncrx_process(NULL, now, 0, bkt->ncrx);
448 | 		drain_bucket_ncrx(cur, bkt);
449 | 	}
450 | }
451 | 
452 | /*
453 |  * We have no idea how large the queue we just processed was: it could have
454 |  * taken tens of seconds. So we must handle wraparound in the tlist array.
455 |  */
456 | static uint64_t run_ncrx_callbacks(struct ncrx_worker *cur, uint64_t lastrun)
457 | {
458 | 	uint64_t i, now = now_mono_ms();
459 | 
460 | 	if (now == lastrun)
461 | 		goto out;
462 | 
463 | 	fatal_on(now < lastrun, "Time went backwards\n");
464 | 
465 | 	/*
466 | 	 * It's possible we wrapped: in that case, we simply iterate over the
467 | 	 * entire wheel and drain each list until we hit a callback after now.
468 | 	 * Otherwise, we only iterate over the buckets that lie on [last,now].
469 | 	 */
470 | 	for (i = max(lastrun, now - NETCONS_RTO + 1); i <= now; i++)
471 | 		do_ncrx_callbacks(cur, &cur->tlist[i % NETCONS_RTO]);
472 | 
473 | out:
474 | 	return now;
475 | }
476 | 
477 | static void consume_msgbuf(struct ncrx_worker *cur, struct msg_buf *buf)
478 | {
479 | 	struct bucket *ncrx_bucket;
480 | 
481 | 	ncrx_bucket = hlookup(cur->ht, &buf->src.sin6_addr);
482 | 	if (!ncrx_bucket->ncrx) {
483 | 		ncrx_bucket->ncrx = ncrx_create(&ncrx_param);
484 | 		timerlist_init(&ncrx_bucket->timernode);
485 | 		memcpy(&ncrx_bucket->src, &buf->src.sin6_addr,
486 | 				sizeof(ncrx_bucket->src));
487 | 		cur->ht->load++;
488 | 	}
489 | 
490 | 	ncrx_bucket->last_seen = buf->rcv_time;
491 | 
492 | 	buf->buf[buf->rcv_bytes] = '\0';
493 | 	if (!ncrx_process(buf->buf, now_mono_ms(), buf->rcv_time,
494 | 			ncrx_bucket->ncrx)) {
495 | 		drain_bucket_ncrx(cur, ncrx_bucket);
496 | 		return;
497 | 	}
498 | 
499 | 	execute_output_pipeline(cur->thread_nr, &ncrx_bucket->src, buf, NULL);
500 | }
501 | 
502 | static struct msg_buf *grab_prequeue(struct ncrx_worker *cur)
503 | {
504 | 	struct msg_buf *ret;
505 | 
506 | 	assert_pthread_mutex_locked(&cur->queuelock);
507 | 	ret = cur->queue_head;
508 | 	cur->queue_head = NULL;
509 | 
510 | 	return ret;
511 | }
512 | 
513 | void *ncrx_worker_thread(void *arg)
514 | {
515 | 	struct ncrx_worker *cur = arg;
516 | 	struct msg_buf *curbuf, *tmp;
517 | 	uint64_t lastrun = now_mono_ms();
518 | 	int nr_dequeued;
519 | 
520 | 	cur->ht = create_hashtable(16, NULL);
521 | 	cur->tlist = create_timerlists();
522 | 
523 | 	reset_waketime(cur);
524 | 	pthread_mutex_lock(&cur->queuelock);
525 | 	while (!cur->stop) {
526 | 		pthread_cond_timedwait(&cur->cond, &cur->queuelock,
527 | 				next_waketime(cur));
528 | 
529 | 		reset_waketime(cur);
530 | morework:
531 | 		curbuf = grab_prequeue(cur);
532 | 		nr_dequeued = cur->nr_queued;
533 | 		cur->nr_queued = 0;
534 | 		pthread_mutex_unlock(&cur->queuelock);
535 | 
536 | 		maybe_resize_hashtable(cur, nr_dequeued);
537 | 
538 | 		while ((tmp = curbuf)) {
539 | 			consume_msgbuf(cur, curbuf);
540 | 			curbuf = curbuf->next;
541 | 			free(tmp);
542 | 
543 | 			cur->processed++;
544 | 		}
545 | 
546 | 		if (!cur->stop) {
547 | 			maybe_garbage_collect(cur);
548 | 			lastrun = run_ncrx_callbacks(cur, lastrun);
549 | 		}
550 | 
551 | 		pthread_mutex_lock(&cur->queuelock);
552 | 		if (cur->queue_head)
553 | 			goto morework;
554 | 	}
555 | 
556 | 	assert_pthread_mutex_locked(&cur->queuelock);
557 | 	fatal_on(cur->queue_head != NULL, "Worker queue not empty at exit\n");
558 | 
559 | 	cur->hosts_seen = cur->ht->load;
560 | 	destroy_timerlists(cur->tlist);
561 | 	destroy_hashtable(cur->ht);
562 | 	return NULL;
563 | }
564 | 


--------------------------------------------------------------------------------