├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .mdlrc ├── .packit.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── include ├── common.h ├── jhash.h ├── listener.h ├── log.h ├── msgbuf-struct.h ├── output.h ├── threads.h └── worker.h ├── listener.c ├── main.c ├── modules ├── Makefile ├── logger.cc └── printer.c ├── ncrx ├── Makefile ├── libncrx.c ├── ncrx-struct.h ├── ncrx.c ├── ncrx.h ├── nctx.c └── netcons-gen.py ├── output.c ├── threads.c ├── util ├── Makefile └── netconsblaster.c └── worker.c /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | jobs: 7 | build: 8 | name: Build netconsd 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | include: 13 | - cc: gcc 14 | cxx: g++ 15 | - cc: clang 16 | cxx: clang++ 17 | env: 18 | CC: ${{ matrix.cc }} 19 | CXX: ${{ matrix.cxx }} 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v2 23 | - name: Build netconsd 24 | run: make 25 | - name: Build netconsblaster 26 | run: make -C util 27 | markdown: 28 | name: Markdown 29 | runs-on: ubuntu-latest 30 | steps: 31 | - name: Checkout repository 32 | uses: actions/checkout@v2 33 | - name: Lint Markdown 34 | uses: actionshub/markdownlint@2.0.2 35 | - name: Check links 36 | uses: gaurav-nelson/github-action-markdown-link-check@1.0.13 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | *.o 3 | netconsd 4 | modules/*.so 5 | util/netconsblaster 6 | /netconsd-*.tar.gz 7 | /netconsd-*.src.rpm 8 | /netconsd.spec 9 | Cargo.lock 10 | /target 11 | libnetconsd.a 12 | -------------------------------------------------------------------------------- /.mdlrc: -------------------------------------------------------------------------------- 1 | rules '~MD013', '~MD014', '~MD029', '~MD034' 2 | -------------------------------------------------------------------------------- /.packit.yaml: -------------------------------------------------------------------------------- 1 | # See the documentation for more information: 2 | # https://packit.dev/docs/configuration/ 3 | 4 | specfile_path: netconsd.spec 5 | files_to_sync: 6 | - netconsd.spec 7 | - .packit.yaml 8 | 9 | upstream_package_name: netconsd 10 | downstream_package_name: netconsd 11 | actions: 12 | # Fetch the specfile from Rawhide, drop any patches and disable rpmautospec 13 | post-upstream-clone: "bash -c \"curl -s https://src.fedoraproject.org/rpms/netconsd/raw/main/f/netconsd.spec | sed -e '/^Patch[0-9]/d' -e '/^%autochangelog$/d' > netconsd.spec\"" 14 | 15 | srpm_build_deps: 16 | - bash 17 | - curl 18 | - sed 19 | 20 | jobs: 21 | - job: copr_build 22 | trigger: commit 23 | owner: "@meta" 24 | project: netconsd 25 | targets: 26 | - fedora-all-aarch64 27 | - fedora-all-i386 28 | - fedora-all-ppc64le 29 | - fedora-all-s390x 30 | - fedora-all-x86_64 31 | - fedora-eln-aarch64 32 | - fedora-eln-i386 33 | - fedora-eln-ppc64le 34 | - fedora-eln-s390x 35 | - fedora-eln-x86_64 36 | - epel-8-aarch64 37 | - epel-8-ppc64le 38 | - epel-8-s390x 39 | - epel-8-x86_64 40 | - epel-9-aarch64 41 | - epel-9-ppc64le 42 | - epel-9-s390x 43 | - epel-9-x86_64 44 | - job: copr_build 45 | trigger: pull_request 46 | owner: "@meta" 47 | project: netconsd 48 | targets: 49 | - fedora-all-aarch64 50 | - fedora-all-i386 51 | - fedora-all-ppc64le 52 | - fedora-all-s390x 53 | - fedora-all-x86_64 54 | - fedora-eln-aarch64 55 | - fedora-eln-i386 56 | - fedora-eln-ppc64le 57 | - fedora-eln-s390x 58 | - fedora-eln-x86_64 59 | - epel-8-aarch64 60 | - epel-8-ppc64le 61 | - epel-8-s390x 62 | - epel-8-x86_64 63 | - epel-9-aarch64 64 | - epel-9-ppc64le 65 | - epel-9-s390x 66 | - epel-9-x86_64 67 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to netconsd 2 | 3 | We want to make contributing to this project as easy and transparent as 4 | possible. 5 | 6 | ## Our Development Process 7 | 8 | This repository is synced from an internal repository. We gladly accept 9 | pull requests and will deal with the merging appropriately. 10 | 11 | ## Contributor License Agreement ("CLA") 12 | 13 | In order to accept your pull request, we need you to submit a CLA. You only 14 | need to do this once to work on any of Facebook's open source projects. 15 | 16 | Complete your CLA here: 17 | 18 | ## Issues 19 | 20 | We use GitHub issues to track public bugs. Please ensure your description is 21 | clear and has sufficient instructions to be able to reproduce the issue. 22 | 23 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the 24 | safe disclosure of security bugs. In those cases, please go through the 25 | process outlined on that page and do not file a public issue. 26 | 27 | ## Sending a pull request 28 | 29 | Have a fix or feature? Awesome! When you send the pull request we suggest you 30 | include a build output. 31 | 32 | We will hold all contributions to the same quality and style standards as the 33 | existing code. 34 | 35 | ## License 36 | 37 | By contributing to this repository, you agree that your contributions will be 38 | licensed in accordance to the LICENSE document in the root of this repository. 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC ?= gcc 2 | 3 | LIBS = -lpthread 4 | CFLAGS ?= -O2 -fPIC 5 | CFLAGS += -D_GNU_SOURCE -fno-strict-aliasing -Wall -Wextra \ 6 | -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations \ 7 | -Wdeclaration-after-statement -Wno-missing-field-initializers \ 8 | -Wno-unused-parameter 9 | CPPFLAGS ?= 10 | INCLUDES = -Incrx 11 | 12 | UNAME := $(shell uname) 13 | ifneq ($(UNAME), OpenBSD) 14 | LIBS += -lrt -ldl 15 | endif 16 | 17 | debug debug32: CFLAGS += -O0 -gdwarf-4 -fno-omit-frame-pointer \ 18 | -fstack-protector-all -fsanitize=address \ 19 | -fsanitize=undefined 20 | debug debug32: LDFLAGS ?= -lasan -lubsan 21 | 22 | 32bit: CFLAGS += -m32 23 | 32bit: LDFLAGS ?= -m32 24 | 25 | disasm: CFLAGS += -fverbose-asm 26 | 27 | binary = netconsd 28 | lib = ncrx/libncrx.o 29 | liball = libnetconsd.a 30 | obj = threads.o listener.o worker.o output.o main.o 31 | rlibobj = threads.o listener.o worker.o output.o 32 | asm = $(obj:.o=.s) 33 | 34 | all: $(binary) mods 35 | rlib: $(liball) 36 | 32bit: $(binary) mods 37 | 38 | debug: all 39 | debug32: 32bit 40 | disasm: $(asm) 41 | 42 | -include $(obj:.o=.d) 43 | 44 | $(binary): $(lib) $(obj) 45 | $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(lib) $(obj) $(LIBS) -o $@ 46 | 47 | $(liball): $(rlibobj) $(lib) 48 | ar rc $@ $(rlibobj) $(lib) 49 | 50 | %.o: %.c 51 | $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $@ 52 | $(CC) -MM $< $(INCLUDES) > $(@:.o=.d) 53 | 54 | %.s: %.c 55 | $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -S -o $@ 56 | 57 | $(lib): 58 | $(MAKE) -e -C ncrx 59 | 60 | mods: 61 | $(MAKE) -e -C modules 62 | 63 | utils: 64 | $(MAKE) -e -C util 65 | 66 | clean: 67 | rm -f netconsd *.o *.d *.s 68 | rm -f modules/*.o modules/*.so 69 | rm -f ncrx/*.o ncrx/*.d 70 | rm -f util/netconsblaster 71 | rm -f libnetconsd.a 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Netconsd: The Netconsole Daemon 2 | 3 | [![Continuous Integration](https://github.com/facebook/netconsd/workflows/Continuous%20Integration/badge.svg?event=push)](https://github.com/facebook/netconsd/actions?query=workflow%3A%22Continuous+Integration%22) 4 | 5 | This is a daemon for receiving and processing logs from the Linux Kernel, as 6 | emitted over a network by the kernel's netconsole module. It supports both the 7 | old "legacy" text-only format, and the new extended format added in v4.4. 8 | 9 | The core of the daemon does nothing but process messages and drop them: in order 10 | to make the daemon useful, the user must supply one or more "output modules". 11 | These modules are shared object files which expose a small ABI that is called by 12 | netconsd with the content and metadata for netconsole messages it receives. 13 | 14 | This README explains how to build netconsd and use it with one of the existing 15 | output modules in the modules/ directory. The end discusses how to write your 16 | own custom output module. 17 | 18 | ## Building netconsd 19 | 20 | The default Makefile target intended for production use has no external 21 | dependencies besides glibc. To build it, just say `make` (or `gmake` on BSD): 22 | you'll end up with a single executable in this directory called `netconsd`, and 23 | a `*.so` file for every module in the `modules/` directory. 24 | 25 | The Makefile includes a few other handy targets: 26 | 27 | * `debug`: Adds the usual debug flags, and also enables the ASAN and 28 | UBSAN sanitizers. You'll need to install libasan/libubsan on 29 | your system to build this target and run the binaries. 30 | * `32bit`: Forces 32-bit compilation on x86_64 systems, for easily 31 | testing portability to 32-bit CPU architectures. You'll need 32 | to install 32-bit libraries if your distro doesn't have them. 33 | * `debug32`: Union of the `32bit` and `debug` targets. 34 | * `disasm`: Emits verbose annotated disassembly in `*.s` files. 35 | 36 | If you want to build the daemon with clang, just append `CC="clang"` to your 37 | make invocation. All the above targets should build with both clang and gcc. 38 | 39 | ## Running netconsd 40 | 41 | ### Setting up the server 42 | 43 | By default, netconsd will use 1 listener and 2 worker threads, and listen on 44 | port 1514 for messages. You can use `-l`, `-w`, and `-u` respectively to change 45 | the defaults. 46 | 47 | There's no universal wisdom about how many threads to use: just experiment with 48 | different numbers and use netconsblaster to load up the server. Both the blaster 49 | and the server will print how many packets they sent/processed. 50 | 51 | If you run out of memory and OOM, you need more workers; if you see messages 52 | being dropped, you need more listeners. The tuning here will obviously depend on 53 | what your output module does: make sure to pass it when you do your testing. 54 | 55 | For the simplest setup, just run: 56 | 57 | ``` 58 | $ make -s 59 | $ ./netconsd ./modules/printer.so 60 | ``` 61 | 62 | Netconsd will listen on `INADDR_ANY` and `IN6ADDR_ANY`, unless you pass a 63 | specific IPv4 or IPv6 address to listen on using the `-a` argument. 64 | 65 | Note that some systems (at least, OpenBSD) do not allow dual stack sockets at 66 | all, so as currently written netconsd is only capable of receiving IPv6 67 | netconsole packets on those systems. 68 | 69 | ### Setting up the client 70 | 71 | The netconsole module takes a parameter like this: 72 | 73 | ``` 74 | netconsole=[+][r]${sport}@${saddr}/${intf},${dport}@${daddr}/${dmac} 75 | ``` 76 | 77 | The fields are as follows: 78 | 79 | 1. `sport`: Source port for the netconsole UDP packets 80 | 2. `saddr`: Source address for the netconsole UDP packets 81 | 3. `intf`: The name of the interface to send the UDP packets from 82 | 4. `dport`: Destination port for the netconsole UDP packets 83 | 5. `daddr`: Destination address for the netconsole UDP packets 84 | 6. `dmac`: Destination L2 MAC address for the netconsole UDP packets 85 | 86 | We need (6) because of how low-level netconsole is: it can't consult the routing 87 | table to send the packet, so it must know a priori what MAC address to use in 88 | the Ethernet frame it builds. 89 | 90 | If you're talking to a server on the same L2 segment as the client, use the MAC 91 | address of that server. Otherwise, use the MAC address of your router. You can 92 | use the following quick shell one-liners to easily get the MAC of the router: 93 | 94 | * IPv6: `ip -6 neighbor show | grep router` 95 | * IPv4: `sudo arp –a | grep gateway` 96 | 97 | Here are a couple examples for the parameter above: 98 | 99 | ``` 100 | IPv6: netconsole=+r6666@2401:db00:11:801e:face:0:31:0/eth0,1514@2401:db00:11:d0be:face:0:1b:0/c0:8c:60:3d:0d:bc 101 | IPv4: netconsole=6666@192.168.0.22/eth0,1514@192.168.0.1/00:00:0c:9f:f1:90 102 | ``` 103 | 104 | Prepending `+` to the cmdline will cause kernels that support it to use extended 105 | netconsole, which you almost certainly want. Kernels too old to support extcon 106 | will silently ignore the `+`. 107 | 108 | Adding the `r` to the command line will cause netcons to emit the kernel 109 | release version in the first field of the extended message. For that, you need 110 | to have extended log (extcon) enabled. 111 | 112 | Once you have your parameter constructed, just insert the module with it: 113 | 114 | ``` 115 | $ sudo modprobe netconsole netconsole=+r6666@2401:db00:11:801e:face:0:31:0/eth0,1514@2401:db00:11:d0be:face:0:1b:0/c0:8c:60:3d:0d:bc 116 | ``` 117 | 118 | You're good to go! 119 | 120 | ### Testing on the client 121 | 122 | Now that everything is running, you can use `/dev/kmsg` to write some logs: 123 | 124 | ``` 125 | $ sudo bash -c 'echo "Hello world!" > /dev/kmsg' 126 | $ sudo bash -c 'echo "<0>OMG!" > /dev/kmsg' 127 | ``` 128 | 129 | The `<0>` tells the kernel what loglevel to use: 0 is `KERN_EMERG`, which ensures 130 | your message will actually get transmitted. 131 | 132 | ## Writing an output module 133 | 134 | ### Interface to netconsd 135 | 136 | Output modules are shared object files loaded with `dlopen()` at runtime by 137 | netconsd. Netconsd will look for three functions in your module: 138 | 139 | 1. `int netconsd_output_init(int worker_thread_count)` 140 | 2. `void netconsd_output_handler(int thread, struct in6_addr *src, struct msgbuf *buf, struct ncrx_msg *msg)` 141 | 3. `void netconsd_output_exit(void)` 142 | 143 | If (1) exists, it is called when your module is loaded: the argument tells you 144 | how many worker threads netconsd is going to call your module from. If you 145 | return non-zero from this function, netconsd will `abort()` and exit. 146 | 147 | If (3) exists, it is called when netconsd unloads your module. 148 | 149 | For every message it receives, netconsd will call (2) in your module. The code 150 | must be reentrant: `netconsd_output_handler()` will be called concurrently from 151 | all of the worker threads in netconsd. The `thread` argument tells you which 152 | worker is invoking the function, which makes it easy to have per-thread data. 153 | 154 | Netconsd uses a consistent hash to decide which worker to pass messages to, so 155 | messages from same remote address will always be queued to the same thread. 156 | 157 | The `src` argument will always point to an `in6_addr` struct containing the source 158 | address of the netconsole packet. If the source was an IPv4 address, it will be 159 | formatted like `::FFFF:` (see `man ipv6` for details). 160 | 161 | If the message had extended metadata, `msg` will point to the `ncrx_msg` struct 162 | containing that metadata and `buf` will be `NULL`. Otherwise, `msg` will be `NULL` 163 | and `buf` will point to a `msgbuf` struct with the raw message text. 164 | 165 | Output modules must not modify the structures passed in. The memory backing all 166 | the pointers passed in will be freed immediately after the handler returns. 167 | 168 | ### Building the modules 169 | 170 | For modules written in C this is trivial: just compile with `-shared`. 171 | 172 | For modules written in C++ it can be a bit trickier: you will probably need to 173 | build with `-static-libstdc++` and/or `-static-libgcc` to make this work. 174 | 175 | See the code and Makefile in `modules/` for some examples of the above. 176 | 177 | ## Contributing 178 | 179 | See the CONTRIBUTING file for how to help out. 180 | 181 | ## License 182 | 183 | netconsd is BSD licensed, see the LICENSE file for more information. 184 | 185 | netconsd was originally written by Calvin Owens as part of 186 | [fbkutils](https://github.com/facebookarchive/fbkutils) in 2016, with later 187 | contributions by several other people. The ncrx library was originally written 188 | by Tejun Heo. This repository is a direct continuation of that codebase. 189 | -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __COMMON_H__ 9 | #define __COMMON_H__ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "log.h" 21 | #include "jhash.h" 22 | 23 | #define min(x, y) ({ \ 24 | typeof(x) _min1 = (x); \ 25 | typeof(y) _min2 = (y); \ 26 | (void) (&_min1 == &_min2); \ 27 | _min1 < _min2 ? _min1 : _min2; }) 28 | 29 | #define max(x, y) ({ \ 30 | typeof(x) _max1 = (x); \ 31 | typeof(y) _max2 = (y); \ 32 | (void) (&_max1 == &_max2); \ 33 | _max1 > _max2 ? _max1 : _max2; }) 34 | 35 | #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) 36 | 37 | #define container_of(ptr, type, member) ({ \ 38 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 39 | (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) 40 | 41 | static inline void *zalloc(size_t n) 42 | { 43 | return calloc(1, n); 44 | } 45 | 46 | #define assert_pthread_mutex_locked(m) \ 47 | do { \ 48 | fatal_on(pthread_mutex_trylock(m) != EBUSY, "UNLOCKED!\n"); \ 49 | } while (0) 50 | 51 | static inline uint64_t now_ms(clockid_t clock) 52 | { 53 | struct timespec t; 54 | int ret; 55 | 56 | ret = clock_gettime(clock, &t); 57 | fatal_on(ret, "Oops, clock_gettime() barfed: %m (-%d)\n", errno); 58 | 59 | return t.tv_sec * 1000LL + t.tv_nsec / 1000000L; 60 | } 61 | 62 | static inline uint64_t now_mono_ms(void) 63 | { 64 | return now_ms(CLOCK_MONOTONIC); 65 | } 66 | 67 | static inline uint64_t now_real_ms(void) 68 | { 69 | return now_ms(CLOCK_REALTIME); 70 | } 71 | 72 | struct netconsd_params { 73 | int nr_workers; 74 | int nr_listeners; 75 | int mmsg_batch; 76 | unsigned int gc_int_ms; 77 | unsigned int gc_age_ms; 78 | struct sockaddr_in6 listen_addr; 79 | }; 80 | 81 | #endif /* __COMMON_H__ */ 82 | -------------------------------------------------------------------------------- /include/jhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * Lifted from 4.4 Linux kernel source. Alterations for netconsd: 5 | * - Pulled in rol32() from linux/bitops.h 6 | * - Use stdint fixed-width types instead of kernel shorthand types 7 | * - Deleted unaligned jhash() because we don't use it and C++ hates it. 8 | */ 9 | 10 | #ifndef _LINUX_JHASH_H 11 | #define _LINUX_JHASH_H 12 | 13 | /* jhash.h: Jenkins hash support. 14 | * 15 | * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) 16 | * 17 | * http://burtleburtle.net/bob/hash/ 18 | * 19 | * These are the credits from Bob's sources: 20 | * 21 | * lookup3.c, by Bob Jenkins, May 2006, Public Domain. 22 | * 23 | * These are functions for producing 32-bit hashes for hash table lookup. 24 | * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 25 | * are externally useful functions. Routines to test the hash are included 26 | * if SELF_TEST is defined. You can use this free for any purpose. It's in 27 | * the public domain. It has no warranty. 28 | * 29 | * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) 30 | * 31 | * I've modified Bob's hash to be useful in the Linux kernel, and 32 | * any bugs present are my fault. 33 | * Jozsef 34 | */ 35 | 36 | #include 37 | 38 | static inline uint32_t rol32(uint32_t word, unsigned int shift) 39 | { 40 | return (word << shift) | (word >> (32 - shift)); 41 | } 42 | 43 | /* Best hash sizes are of power of two */ 44 | #define jhash_size(n) ((uint32_t)1<<(n)) 45 | /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ 46 | #define jhash_mask(n) (jhash_size(n)-1) 47 | 48 | /* __jhash_mix -- mix 3 32-bit values reversibly. */ 49 | #define __jhash_mix(a, b, c) \ 50 | { \ 51 | a -= c; a ^= rol32(c, 4); c += b; \ 52 | b -= a; b ^= rol32(a, 6); a += c; \ 53 | c -= b; c ^= rol32(b, 8); b += a; \ 54 | a -= c; a ^= rol32(c, 16); c += b; \ 55 | b -= a; b ^= rol32(a, 19); a += c; \ 56 | c -= b; c ^= rol32(b, 4); b += a; \ 57 | } 58 | 59 | /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ 60 | #define __jhash_final(a, b, c) \ 61 | { \ 62 | c ^= b; c -= rol32(b, 14); \ 63 | a ^= c; a -= rol32(c, 11); \ 64 | b ^= a; b -= rol32(a, 25); \ 65 | c ^= b; c -= rol32(b, 16); \ 66 | a ^= c; a -= rol32(c, 4); \ 67 | b ^= a; b -= rol32(a, 14); \ 68 | c ^= b; c -= rol32(b, 24); \ 69 | } 70 | 71 | /* 72 | * Arbitrary initial parameters 73 | */ 74 | #define JHASH_INITVAL 0xdeadbeef 75 | #define LISTEN_SEED 0xfaceb00c 76 | #define WORKER_SEED 0xb00cface 77 | 78 | /* jhash2 - hash an array of uint32_t's 79 | * @k: the key which must be an array of uint32_t's 80 | * @length: the number of uint32_t's in the key 81 | * @initval: the previous hash, or an arbitray value 82 | * 83 | * Returns the hash value of the key. 84 | */ 85 | static inline __attribute__((pure)) uint32_t jhash2(const uint32_t *k, 86 | uint32_t length, uint32_t initval) 87 | { 88 | uint32_t a, b, c; 89 | 90 | /* Set up the internal state */ 91 | a = b = c = JHASH_INITVAL + (length<<2) + initval; 92 | 93 | /* Handle most of the key */ 94 | while (length > 3) { 95 | a += k[0]; 96 | b += k[1]; 97 | c += k[2]; 98 | __jhash_mix(a, b, c); 99 | length -= 3; 100 | k += 3; 101 | } 102 | 103 | /* Handle the last 3 uint32_t's: all the case statements fall through */ 104 | switch (length) { 105 | case 3: c += k[2]; __attribute__((fallthrough)); 106 | case 2: b += k[1]; __attribute__((fallthrough)); 107 | case 1: a += k[0]; 108 | __jhash_final(a, b, c); 109 | break; 110 | case 0: /* Nothing left to add */ 111 | break; 112 | } 113 | 114 | return c; 115 | } 116 | 117 | #endif /* _LINUX_JHASH_H */ 118 | -------------------------------------------------------------------------------- /include/listener.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __LISTENER_H__ 9 | #define __LISTENER_H__ 10 | 11 | #include "threads.h" 12 | #include 13 | 14 | #define RCVBUF_SIZE 1024 15 | 16 | struct ncrx_worker; 17 | 18 | struct ncrx_prequeue { 19 | struct msg_buf *queue_head; 20 | struct msg_buf *queue_tail; 21 | int count; 22 | }; 23 | 24 | struct ncrx_listener { 25 | pthread_t id; 26 | int thread_nr; 27 | struct ncrx_prequeue *prequeues; 28 | struct ncrx_worker *workers; 29 | int nr_workers; 30 | int batch; 31 | uint64_t processed; 32 | struct sockaddr_in6 *address; 33 | 34 | /* 35 | * Flags 36 | */ 37 | unsigned stop:1; 38 | }; 39 | 40 | void *udp_listener_thread(void *arg); 41 | 42 | #endif /* __LISTENER_H__ */ 43 | -------------------------------------------------------------------------------- /include/log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | #ifndef __LOG_H__ 8 | #define __LOG_H__ 9 | 10 | #include 11 | #include 12 | 13 | #define LOGPFX "[fb-netconsd] " 14 | 15 | #define S(x) #x 16 | #define S_(x) S(x) 17 | #define S__LINE__ S_(__LINE__) 18 | 19 | #define __log(pfx, ...) \ 20 | do { \ 21 | printf(LOGPFX __FILE__ ":" S__LINE__ ": " pfx __VA_ARGS__); \ 22 | fflush(stdout); \ 23 | } while (0) 24 | 25 | #define fatal(...) \ 26 | do { \ 27 | __log("FATAL: ", __VA_ARGS__); \ 28 | abort(); \ 29 | } while (0) 30 | 31 | #define warn(...) \ 32 | do { \ 33 | __log("WARNING: ", __VA_ARGS__); \ 34 | } while (0) 35 | 36 | #define log(...) \ 37 | do { \ 38 | __log("INFO: ", __VA_ARGS__); \ 39 | } while (0) 40 | 41 | #ifdef DEBUG 42 | #define debug(...) \ 43 | do { \ 44 | __log("DEBUG: ", __VA_ARGS__); \ 45 | } while (0) 46 | #else 47 | #define debug(...) do {} while (0) 48 | #endif 49 | 50 | #define fatal_on(cond, ...) \ 51 | do { \ 52 | if (__builtin_expect(cond, 0)) { \ 53 | fatal(__VA_ARGS__); \ 54 | } \ 55 | } while (0) 56 | 57 | #define log_once(...) \ 58 | do { \ 59 | static int _t; \ 60 | if (__builtin_expect(!_t, 0)) { \ 61 | log(__VA_ARGS__); \ 62 | _t = -1; \ 63 | } \ 64 | } while (0) 65 | 66 | #define log_every(n, ...) \ 67 | do { \ 68 | static int _t = 1; \ 69 | if (!(_t % n), 0) \ 70 | log(__VA_ARGS__); \ 71 | _t++; \ 72 | } while (0) 73 | 74 | #endif /* __LOG_H__ */ 75 | -------------------------------------------------------------------------------- /include/msgbuf-struct.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __MSGBUF_STRUCT_H__ 9 | #define __MSGBUF_STRUCT_H__ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef __cplusplus 17 | #define __cpp extern "C" 18 | #else 19 | #define __cpp 20 | #endif 21 | 22 | struct ncrx_msg; 23 | 24 | struct msg_buf { 25 | struct msg_buf *next; 26 | 27 | struct iovec iovec; 28 | struct sockaddr_in6 src; 29 | uint64_t rcv_time; 30 | int rcv_flags; 31 | int rcv_bytes; 32 | 33 | char buf[]; 34 | }; 35 | 36 | __cpp int netconsd_output_init(int nr_workers); 37 | __cpp void netconsd_output_exit(void); 38 | __cpp void netconsd_output_handler(int t, struct in6_addr *src, 39 | struct msg_buf *b, struct ncrx_msg *m); 40 | 41 | #endif /* __MSGBUF_STRUCT_H__ */ 42 | -------------------------------------------------------------------------------- /include/output.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __OUTPUT_H__ 9 | #define __OUTPUT_H__ 10 | 11 | #include 12 | 13 | #include "msgbuf-struct.h" 14 | 15 | #define MAXOUTS 32 16 | 17 | int register_output_module(char *path, int nr_workers); 18 | void destroy_output_modules(void); 19 | 20 | void execute_output_pipeline(int thread_nr, struct in6_addr *src, 21 | struct msg_buf *buf, struct ncrx_msg *msg); 22 | 23 | #endif /* __OUTPUT_H__ */ 24 | -------------------------------------------------------------------------------- /include/threads.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __NCRX_THREADS_H__ 9 | #define __NCRX_THREADS_H__ 10 | 11 | #include "msgbuf-struct.h" 12 | #include "common.h" 13 | 14 | struct tctl; 15 | struct ncrx_listener; 16 | 17 | void enqueue_and_wake_all(struct ncrx_listener *listener); 18 | struct tctl *create_threads(struct netconsd_params *p); 19 | void destroy_threads(struct tctl *ctl); 20 | 21 | #endif /* __NCRX_THREADS_H__ */ 22 | -------------------------------------------------------------------------------- /include/worker.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __WORKER_H__ 9 | #define __WORKER_H__ 10 | 11 | #include 12 | 13 | #include "msgbuf-struct.h" 14 | 15 | /* 16 | * How long to wait for messages before giving up, in milliseconds 17 | */ 18 | #define NETCONS_RTO 200 19 | 20 | struct hashtable; 21 | struct timerlist; 22 | 23 | struct ncrx_worker { 24 | struct msg_buf *queue_head; 25 | struct msg_buf *queue_tail; 26 | 27 | pthread_t id; 28 | pthread_condattr_t condattr; 29 | pthread_cond_t cond; 30 | pthread_mutex_t queuelock; 31 | int nr_queued; 32 | 33 | struct hashtable *ht; 34 | struct timerlist *tlist; 35 | struct timespec wake; 36 | 37 | unsigned int gc_age_ms; 38 | unsigned int gc_int_ms; 39 | uint64_t lastgc; 40 | 41 | uint64_t processed; 42 | uint64_t hosts_seen; 43 | int thread_nr; 44 | 45 | /* 46 | * Flags 47 | */ 48 | unsigned stop:1; 49 | }; 50 | 51 | void *ncrx_worker_thread(void *arg); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /listener.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "include/common.h" 15 | #include "include/msgbuf-struct.h" 16 | #include "include/threads.h" 17 | #include "include/listener.h" 18 | 19 | static void handle_listen_error(int err) 20 | { 21 | switch(err) { 22 | case EINTR: 23 | /* 24 | * The fact that we got an error return means that recvmmsg() 25 | * hadn't actually done anything, so we can just loop back over 26 | * the call no problem. 27 | */ 28 | return; 29 | case 0: 30 | fatal("Unexpected EOF from recvmmsg()\n"); 31 | default: 32 | fatal("Unexpected listen error: %m (-%d)\n", errno); 33 | } 34 | } 35 | 36 | static struct msg_buf *msgbuf_from_iovec(struct iovec *vecptr) 37 | { 38 | return container_of(vecptr, struct msg_buf, iovec); 39 | } 40 | 41 | static unsigned long hash_srcaddr(struct in6_addr *addr) 42 | { 43 | uint32_t *addrptr = (uint32_t *)addr; 44 | 45 | return jhash2(addrptr, sizeof(*addr) / sizeof(*addrptr), LISTEN_SEED); 46 | } 47 | 48 | static void prequeue_msgbuf(struct ncrx_listener *listener, struct msg_buf *buf) 49 | { 50 | struct ncrx_prequeue *prequeue; 51 | unsigned long hash; 52 | 53 | hash = hash_srcaddr(&buf->src.sin6_addr); 54 | prequeue = &listener->prequeues[hash % listener->nr_workers]; 55 | 56 | if (prequeue->queue_head) 57 | prequeue->queue_tail->next = buf; 58 | else 59 | prequeue->queue_head = buf; 60 | 61 | prequeue->queue_tail = buf; 62 | prequeue->count++; 63 | } 64 | 65 | static void reinit_mmsghdr_vec(struct mmsghdr *vec, int nr, int rcvbufsz) 66 | { 67 | struct msg_buf *cur; 68 | int i; 69 | 70 | memset(vec, 0, sizeof(*vec) * nr); 71 | for (i = 0; i < nr; i++) { 72 | cur = malloc(sizeof(*cur) + rcvbufsz); 73 | if (!cur) 74 | fatal("-ENOMEM after %d/%d rcvbufs\n", i, nr); 75 | 76 | memset(cur, 0, sizeof(*cur)); 77 | cur->buf[rcvbufsz - 1] = '\0'; 78 | 79 | cur->iovec.iov_base = &cur->buf; 80 | cur->iovec.iov_len = rcvbufsz - 1; 81 | 82 | vec[i].msg_hdr.msg_iov = &cur->iovec; 83 | vec[i].msg_hdr.msg_iovlen = 1; 84 | 85 | vec[i].msg_hdr.msg_name = &cur->src; 86 | vec[i].msg_hdr.msg_namelen = sizeof(cur->src); 87 | } 88 | } 89 | 90 | static struct mmsghdr *alloc_mmsghdr_vec(int nr, int rcvbufsz) 91 | { 92 | struct mmsghdr *mmsgvec; 93 | 94 | mmsgvec = malloc(sizeof(*mmsgvec) * nr); 95 | if (!mmsgvec) 96 | fatal("Unable to allocate mmsghdr array\n"); 97 | 98 | reinit_mmsghdr_vec(mmsgvec, nr, rcvbufsz); 99 | return mmsgvec; 100 | } 101 | 102 | static void free_mmsghdr_vec(struct mmsghdr *vec, int nr) 103 | { 104 | struct msg_buf *cur; 105 | int i; 106 | 107 | for (i = 0; i < nr; i++) { 108 | cur = msgbuf_from_iovec(vec[i].msg_hdr.msg_iov); 109 | free(cur); 110 | } 111 | 112 | free(vec); 113 | } 114 | 115 | static int get_listen_socket(struct sockaddr_in6 *bindaddr) 116 | { 117 | int fd, ret, optval = 1; 118 | 119 | fd = socket(AF_INET6, SOCK_DGRAM, 0); 120 | if (fd == -1) 121 | fatal("Couldn't get socket: %m\n"); 122 | 123 | ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)); 124 | if (ret == -1) 125 | fatal("Couldn't set SO_REUSEPORT on socket: %m\n"); 126 | 127 | ret = bind(fd, (const struct sockaddr *)bindaddr, sizeof(*bindaddr)); 128 | if (ret == -1) 129 | fatal("Couldn't bind: %m\n"); 130 | 131 | return fd; 132 | } 133 | 134 | void *udp_listener_thread(void *arg) 135 | { 136 | int fd, nr_recv, i; 137 | uint64_t now; 138 | struct ncrx_listener *us = arg; 139 | struct mmsghdr *vec; 140 | struct msg_buf *cur; 141 | 142 | fd = get_listen_socket(us->address); 143 | vec = alloc_mmsghdr_vec(us->batch, RCVBUF_SIZE); 144 | 145 | while (!us->stop) { 146 | nr_recv = recvmmsg(fd, vec, us->batch, MSG_WAITFORONE, NULL); 147 | if (nr_recv <= 0) { 148 | handle_listen_error(errno); 149 | continue; 150 | } 151 | 152 | debug("recvmmsg() got %d packets\n", nr_recv); 153 | 154 | now = now_real_ms(); 155 | for (i = 0; i < nr_recv; i++) { 156 | cur = msgbuf_from_iovec(vec[i].msg_hdr.msg_iov); 157 | 158 | cur->rcv_flags = vec[i].msg_hdr.msg_flags; 159 | cur->rcv_bytes = vec[i].msg_len; 160 | cur->rcv_time = now; 161 | 162 | prequeue_msgbuf(us, cur); 163 | us->processed++; 164 | } 165 | 166 | enqueue_and_wake_all(us); 167 | reinit_mmsghdr_vec(vec, nr_recv, RCVBUF_SIZE); 168 | } 169 | 170 | free_mmsghdr_vec(vec, us->batch); 171 | 172 | return NULL; 173 | } 174 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "include/common.h" 14 | #include "include/output.h" 15 | #include "include/threads.h" 16 | 17 | static void parse_arguments(int argc, char **argv, struct netconsd_params *p) 18 | { 19 | int i; 20 | char *tmp; 21 | static const char *optstr = "hw:l:b:a:u:g:"; 22 | static const struct option optlong[] = { 23 | { 24 | .name = "help", 25 | .has_arg = no_argument, 26 | .val = 'h', 27 | }, 28 | { 29 | .name = NULL, 30 | }, 31 | }; 32 | 33 | while (1) { 34 | i = getopt_long(argc, argv, optstr, optlong, NULL); 35 | 36 | switch (i) { 37 | case 'w': 38 | p->nr_workers = atoi(optarg); 39 | break; 40 | case 'l': 41 | p->nr_listeners = atoi(optarg); 42 | break; 43 | case 'b': 44 | p->mmsg_batch = atoi(optarg); 45 | break; 46 | case 'a': 47 | if (!inet_pton(AF_INET6, optarg, &p->listen_addr.sin6_addr)) { 48 | char v4[sizeof("::ffff:XXX.XXX.XXX.XXX")]; 49 | snprintf(v4, sizeof(v4), "::ffff:%s", optarg); 50 | if (!inet_pton(AF_INET6, v4, &p->listen_addr.sin6_addr)) 51 | fatal("invalid listen address\n"); 52 | } 53 | 54 | debug("listening for address %s\n", optarg); 55 | break; 56 | case 'u': 57 | p->listen_addr.sin6_port = htons(atoi(optarg)); 58 | break; 59 | case 'g': 60 | tmp = index(optarg, '/'); 61 | if (!tmp) 62 | fatal("'-g' expects 'INTERVAL/AGE' in ms\n"); 63 | 64 | p->gc_int_ms = atoi(optarg); 65 | p->gc_age_ms = atoi(tmp + 1); 66 | 67 | if (p->gc_age_ms < p->gc_int_ms) 68 | fatal("GC age must be >= GC interval\n"); 69 | 70 | break; 71 | case -1: 72 | goto done; 73 | case 'h': 74 | printf("Usage: %s [-w workers] [-l listeners] " 75 | "[-b mmsg_batch] [-a udp_listen_addr] [-u udp_listen_port] " 76 | "[-g '${interval}/${age}'] [output module path] " 77 | "[another output module path...]\n", argv[0]); 78 | exit(0); 79 | default: 80 | exit(1); 81 | } 82 | } 83 | 84 | done: 85 | 86 | /* 87 | * Register output modules 88 | */ 89 | if (optind == argc) 90 | warn("You passed no output modules, which is sort of silly\n"); 91 | 92 | if (argc - optind > MAXOUTS) 93 | fatal("Too many output mods: %d>%d\n", argc - optind, MAXOUTS); 94 | 95 | for (i = optind; i < argc; i++) 96 | if (register_output_module(argv[i], p->nr_workers)) 97 | fatal("Can't register '%s'\n", argv[i]); 98 | } 99 | 100 | /* 101 | * This exists to kick the blocking recvmmsg() call in the listener threads, so 102 | * they get -EINTR, notice the stop flag, and terminate. 103 | * 104 | * See also: stop_and_wait_for_listeners() in threads.c 105 | */ 106 | static void interrupter_handler(int sig) 107 | { 108 | return; 109 | } 110 | 111 | /* 112 | * Initialize the set of signals for which we try to terminate gracefully. 113 | */ 114 | static void init_sigset(sigset_t *set) 115 | { 116 | sigemptyset(set); 117 | sigaddset(set, SIGTERM); 118 | sigaddset(set, SIGINT); 119 | sigaddset(set, SIGHUP); 120 | } 121 | 122 | static void init_sighandlers(void) 123 | { 124 | struct sigaction ignorer = { 125 | .sa_handler = SIG_IGN, 126 | }; 127 | struct sigaction interrupter = { 128 | .sa_handler = interrupter_handler, 129 | .sa_flags = SA_NODEFER, 130 | }; 131 | 132 | sigaction(SIGUSR1, &interrupter, NULL); 133 | sigaction(SIGPIPE, &ignorer, NULL); 134 | } 135 | 136 | int main(int argc, char **argv) 137 | { 138 | int num; 139 | sigset_t set; 140 | struct tctl *ctl; 141 | struct netconsd_params params = { 142 | .nr_workers = 2, 143 | .nr_listeners = 1, 144 | .mmsg_batch = 512, 145 | .gc_int_ms = 0, 146 | .gc_age_ms = 0, 147 | .listen_addr = { 148 | .sin6_family = AF_INET6, 149 | .sin6_addr = IN6ADDR_ANY_INIT, 150 | .sin6_port = htons(1514), 151 | } 152 | }; 153 | 154 | parse_arguments(argc, argv, ¶ms); 155 | 156 | init_sighandlers(); 157 | init_sigset(&set); 158 | sigprocmask(SIG_BLOCK, &set, NULL); 159 | 160 | ctl = create_threads(¶ms); 161 | sigwait(&set, &num); 162 | 163 | log("Signal: '%s', terminating\n", strsignal(num)); 164 | destroy_threads(ctl); 165 | destroy_output_modules(); 166 | 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /modules/Makefile: -------------------------------------------------------------------------------- 1 | CC ?= gcc 2 | CXX ?= g++ 3 | CPPFLAGS ?= 4 | LDFLAGS ?= 5 | 6 | override CFLAGS += -fPIC 7 | override CXXFLAGS += -std=c++11 -fPIC 8 | override LDFLAGS += -shared 9 | INCLUDES = -I../ncrx -I../include 10 | 11 | mods = printer.so logger.so 12 | 13 | all: $(mods) 14 | 15 | %.so: %.c 16 | $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $(<:.c=.o) 17 | $(CC) $(<:.c=.o) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ 18 | 19 | %.so: %.cc 20 | $(CXX) $< $(CPPFLAGS) $(CXXFLAGS) $(INCLUDES) -c -o $(<:.cc=.o) 21 | $(CXX) $(<:.cc=.o) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ 22 | 23 | clean: 24 | rm -f *.o *.so 25 | -------------------------------------------------------------------------------- /modules/logger.cc: -------------------------------------------------------------------------------- 1 | /* logger.cc: Very simple example C++ netconsd module 2 | * 3 | * Copyright (c) Meta Platforms, Inc. and affiliates. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | /* 31 | * The below allows us to index an unordered_map by an IP address. 32 | */ 33 | 34 | static bool operator==(const struct in6_addr& lhs, const struct in6_addr& rhs) 35 | { 36 | return std::memcmp(&lhs, &rhs, 16) == 0; 37 | } 38 | 39 | namespace std { 40 | 41 | template<> struct hash 42 | { 43 | std::size_t operator()(struct in6_addr const& s) const 44 | { 45 | return jhash2((uint32_t*)&s, sizeof(s) / sizeof(uint32_t), 46 | 0xbeefdead); 47 | } 48 | }; 49 | 50 | } /* namespace std */ 51 | 52 | /* 53 | * Basic struct to hold the hostname and the FD for its logfile. 54 | */ 55 | struct logtarget { 56 | char hostname[INET6_ADDRSTRLEN + 1]; 57 | int fd; 58 | 59 | /* 60 | * Resolve the hostname, and open() an appropriately named file to 61 | * write the logs into. 62 | */ 63 | logtarget(struct in6_addr *src) 64 | { 65 | int ret; 66 | struct sockaddr_in6 sa = { 67 | .sin6_family = AF_INET6, 68 | .sin6_port = 0, 69 | }; 70 | 71 | memcpy(&sa.sin6_addr, src, sizeof(*src)); 72 | ret = getnameinfo((const struct sockaddr *)&sa, sizeof(sa), 73 | hostname, sizeof(hostname) - 1, NULL, 0, NI_NAMEREQD); 74 | if (ret) { 75 | const char *ptr; 76 | fprintf(stderr, "getnameinfo failed: %s\n", gai_strerror(ret)); 77 | ptr = inet_ntop(AF_INET6, src, hostname, INET6_ADDRSTRLEN); 78 | if (ptr == NULL) { 79 | fprintf(stderr, "inet_ntop failed: %s\n", strerror(errno)); 80 | snprintf(hostname, 8, "unknown"); 81 | } 82 | } 83 | 84 | ret = open(hostname, O_TRUNC|O_WRONLY|O_CREAT, 0644); 85 | if (ret == -1) { 86 | fprintf(stderr, "FATAL: open() failed: %m\n"); 87 | abort(); 88 | } 89 | 90 | fd = ret; 91 | } 92 | 93 | /* 94 | * Close the file 95 | */ 96 | ~logtarget(void) 97 | { 98 | close(fd); 99 | } 100 | }; 101 | 102 | /* 103 | * This relates the IP address of the remote host to its logtarget struct. 104 | */ 105 | static std::unordered_map *maps; 106 | 107 | /* 108 | * Return the existing logtarget struct if we've seen this host before; else, 109 | * initialize a new logtarget, insert it, and return that. 110 | */ 111 | static struct logtarget& get_target(int thread_nr, struct in6_addr *src) 112 | { 113 | auto itr = maps[thread_nr].find(*src); 114 | if (itr == maps[thread_nr].end()) 115 | return maps[thread_nr].emplace(*src, src).first->second; 116 | 117 | return itr->second; 118 | } 119 | 120 | /* 121 | * Actually write the line to the file 122 | */ 123 | static void write_log(struct logtarget& tgt, struct msg_buf *buf, 124 | struct ncrx_msg *msg) 125 | { 126 | /* legacy non-extended netcons message */ 127 | if (!msg) { 128 | dprintf(tgt.fd, "%s\n", buf->buf); 129 | return; 130 | } 131 | 132 | /* extended netcons msg with metadata */ 133 | if (std::strlen(msg->version) > 1) 134 | dprintf(tgt.fd, "%s ", msg->version); 135 | dprintf(tgt.fd, "%06" PRIu64 " ", msg->seq); 136 | dprintf(tgt.fd, "%014" PRIu64 " ", msg->ts_usec); 137 | dprintf(tgt.fd, "%d ", msg->facility); 138 | dprintf(tgt.fd, "%d ", msg->level); 139 | if (msg->cont_start) 140 | dprintf(tgt.fd, "[CONT START] "); 141 | if (msg->cont) 142 | dprintf(tgt.fd, "[CONT] "); 143 | if (msg->oos) 144 | dprintf(tgt.fd, "[OOS] "); 145 | if (msg->seq_reset) 146 | dprintf(tgt.fd, "[SEQ RESET] "); 147 | dprintf(tgt.fd, "%s\n", msg->text); 148 | } 149 | 150 | extern "C" int netconsd_output_init(int nr) 151 | { 152 | maps = new std::unordered_map[nr]; 153 | return 0; 154 | } 155 | 156 | extern "C" void netconsd_output_exit(void) 157 | { 158 | delete[] maps; 159 | } 160 | 161 | /* 162 | * This is the actual function called by netconsd. 163 | */ 164 | extern "C" void netconsd_output_handler(int t, struct in6_addr *src, 165 | struct msg_buf *buf, struct ncrx_msg *msg) 166 | { 167 | struct logtarget& cur = get_target(t, src); 168 | write_log(cur, buf, msg); 169 | } 170 | -------------------------------------------------------------------------------- /modules/printer.c: -------------------------------------------------------------------------------- 1 | /* printer.c: Very simple example C netconsd module 2 | * 3 | * Copyright (c) Meta Platforms, Inc. and affiliates. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | int netconsd_output_init(int nr_workers) 18 | { 19 | printf("From init hook: %d worker threads", nr_workers); 20 | return 0; 21 | } 22 | 23 | void netconsd_output_exit(void) 24 | { 25 | puts("From exit hook"); 26 | } 27 | 28 | /* 29 | * This is the actual function called by netconsd. 30 | */ 31 | void netconsd_output_handler(int t, struct in6_addr *src, struct msg_buf *buf, 32 | struct ncrx_msg *msg) 33 | { 34 | char addr[INET6_ADDRSTRLEN] = {0}; 35 | 36 | inet_ntop(AF_INET6, src, addr, INET6_ADDRSTRLEN); 37 | if (!msg) 38 | printf("%40s: %s\n", addr, buf->buf); 39 | else 40 | printf("%40s: %s S%06" PRIu64 " T%014" PRIu64 " F%d/L%d %s%s%s%s%s\n", addr, 41 | msg->version, msg->seq, msg->ts_usec, msg->facility, msg->level, 42 | msg->cont_start ? "[CONT START] " : "", 43 | msg->cont ? "[CONT] " : "", 44 | msg->oos ? "[OOS] ": "", 45 | msg->seq_reset ? "[SEQ RESET] " : "", 46 | msg->text); 47 | } 48 | -------------------------------------------------------------------------------- /ncrx/Makefile: -------------------------------------------------------------------------------- 1 | CC ?= gcc 2 | 3 | CFLAGS ?= -O2 -fPIC 4 | CFLAGS += -D_GNU_SOURCE -fno-strict-aliasing -Wall -Wextra \ 5 | -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations \ 6 | -Wdeclaration-after-statement -Wno-missing-field-initializers \ 7 | -Wno-unused-function -Wno-unused-parameter 8 | CPPFLAGS ?= 9 | 10 | obj = libncrx.o 11 | 12 | all: $(obj) 13 | 14 | %.o: %.c 15 | $(CC) $< $(CPPFLAGS) $(CFLAGS) $(INCLUDES) -c -o $@ 16 | 17 | clean: 18 | rm -f *.o 19 | -------------------------------------------------------------------------------- /ncrx/libncrx.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ncrx - extended netconsole receiver library 3 | * 4 | * Copyright (c) Meta Platforms, Inc. and affiliates. 5 | * 6 | * This source code is licensed under the BSD-style license found in the 7 | * LICENSE file in the root directory of this source tree. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ncrx.h" 19 | 20 | /* oos history is tracked with a uint32_t */ 21 | #define NCRX_OOS_MAX 32 22 | 23 | struct ncrx_msg_list { 24 | struct ncrx_list head; 25 | int nr; /* number of msgs on the list */ 26 | }; 27 | 28 | struct ncrx_slot { 29 | struct ncrx_msg *msg; 30 | uint64_t timestamp; /* last rx on this slot */ 31 | uint64_t retx_timestamp; /* last retransmission */ 32 | struct ncrx_list hole_node; /* anchored @ ncrx->hole_list */ 33 | }; 34 | 35 | struct ncrx { 36 | struct ncrx_param p; 37 | 38 | uint64_t now_mono; /* latest time in msecs */ 39 | 40 | int head; /* next slot to use */ 41 | int tail; /* last slot in use */ 42 | uint64_t head_seq; /* next expected seq, unset=0 */ 43 | struct ncrx_slot *slots; /* msg slots */ 44 | struct ncrx_list hole_list; /* missing or !complete slots */ 45 | 46 | uint32_t oos_history; /* bit history of oos msgs */ 47 | struct ncrx_msg_list oos_list; /* buffered oos msgs */ 48 | 49 | struct ncrx_msg_list retired_list; /* msgs to be fetched by user */ 50 | 51 | uint64_t acked_seq; /* last seq acked, unset=max */ 52 | uint64_t acked_at; /* and when */ 53 | 54 | /* response buffer for ncrx_response() */ 55 | char resp_buf[NCRX_PKT_MAX + 1]; 56 | int resp_len; 57 | }; 58 | 59 | static const struct ncrx_param ncrx_dfl_param = { 60 | .nr_slots = NCRX_DFL_NR_SLOTS, 61 | 62 | .ack_intv = NCRX_DFL_ACK_INTV, 63 | .retx_intv = NCRX_DFL_RETX_INTV, 64 | .retx_stride = NCRX_DFL_RETX_STRIDE, 65 | .msg_timeout = NCRX_DFL_MSG_TIMEOUT, 66 | 67 | .oos_thr = NCRX_DFL_OOS_THR, 68 | .oos_intv = NCRX_DFL_OOS_INTV, 69 | .oos_timeout = NCRX_DFL_OOS_TIMEOUT, 70 | }; 71 | 72 | /* utilities mostly stolen from kernel */ 73 | #define min(x, y) ({ \ 74 | typeof(x) _min1 = (x); \ 75 | typeof(y) _min2 = (y); \ 76 | (void) (&_min1 == &_min2); \ 77 | _min1 < _min2 ? _min1 : _min2; }) 78 | 79 | #define max(x, y) ({ \ 80 | typeof(x) _max1 = (x); \ 81 | typeof(y) _max2 = (y); \ 82 | (void) (&_max1 == &_max2); \ 83 | _max1 > _max2 ? _max1 : _max2; }) 84 | 85 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 86 | 87 | #define container_of(ptr, type, member) ({ \ 88 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 89 | (type *)( (char *)__mptr - offsetof(type,member) );}) 90 | 91 | /* ncrx_msg from its ->node */ 92 | #define node_to_msg(ptr) container_of(ptr, struct ncrx_msg, node) 93 | 94 | /* iterate msg_list */ 95 | #define msg_list_for_each(pos, n, list) \ 96 | for (pos = node_to_msg((list)->head.next), \ 97 | n = node_to_msg(pos->node.next); \ 98 | &pos->node != &(list)->head; \ 99 | pos = n, n = node_to_msg(pos->node.next)) 100 | 101 | /* ncrx_slot from its ->hole_node */ 102 | #define hole_to_slot(ptr) \ 103 | container_of(ptr, struct ncrx_slot, hole_node) 104 | 105 | /* iterate hole_list */ 106 | #define hole_list_for_each(pos, n, list) \ 107 | for (pos = hole_to_slot((list)->next), \ 108 | n = hole_to_slot(pos->hole_node.next); \ 109 | &pos->hole_node != (list); \ 110 | pos = n, n = hole_to_slot(pos->hole_node.next)) 111 | 112 | static unsigned int hweight32(uint32_t w) 113 | { 114 | w -= (w >> 1) & 0x55555555; 115 | w = (w & 0x33333333) + ((w >> 2) & 0x33333333); 116 | w = (w + (w >> 4)) & 0x0f0f0f0f; 117 | return (w * 0x01010101) >> 24; 118 | } 119 | 120 | static void init_list(struct ncrx_list *head) 121 | { 122 | head->next = head; 123 | head->prev = head; 124 | } 125 | 126 | static int list_empty(struct ncrx_list *head) 127 | { 128 | return head->next == head; 129 | } 130 | 131 | static void list_del(struct ncrx_list *head) 132 | { 133 | struct ncrx_list *prev = head->prev; 134 | struct ncrx_list *next = head->next; 135 | 136 | prev->next = next; 137 | next->prev = prev; 138 | init_list(head); 139 | } 140 | 141 | static void list_append(struct ncrx_list *node, struct ncrx_list *list) 142 | { 143 | struct ncrx_list *prev = list->prev; 144 | 145 | assert(node->next == node && node->prev == node); 146 | 147 | node->next = list; 148 | node->prev = prev; 149 | prev->next = node; 150 | list->prev = node; 151 | } 152 | 153 | static void msg_list_del(struct ncrx_msg *msg, struct ncrx_msg_list *list) 154 | { 155 | list_del(&msg->node); 156 | list->nr--; 157 | 158 | if (!list->nr) 159 | assert(list->head.next == &list->head && 160 | list->head.prev == &list->head); 161 | } 162 | 163 | static void msg_list_append(struct ncrx_msg *msg, struct ncrx_msg_list *list) 164 | { 165 | list_append(&msg->node, &list->head); 166 | list->nr++; 167 | } 168 | 169 | static struct ncrx_msg *msg_list_peek(struct ncrx_msg_list *list) 170 | { 171 | if (list_empty(&list->head)) 172 | return NULL; 173 | return node_to_msg(list->head.next); 174 | } 175 | 176 | static struct ncrx_msg *msg_list_pop(struct ncrx_msg_list *list) 177 | { 178 | struct ncrx_msg *msg; 179 | 180 | msg = msg_list_peek(list); 181 | if (msg) 182 | msg_list_del(msg, list); 183 | return msg; 184 | } 185 | 186 | /* 187 | * Check if we have a kernel version in the very first field 188 | */ 189 | static int release_prepended(char *ptr) 190 | { 191 | char *dot_pos, *comma_pos; 192 | 193 | if (!ptr) 194 | return 0; 195 | 196 | dot_pos = memchr(ptr, '.', NCRX_KVERSION_MAX_LEN); 197 | comma_pos = memchr(ptr, ',', NCRX_KVERSION_MAX_LEN); 198 | 199 | if (!dot_pos || !comma_pos) 200 | return 0; 201 | 202 | if (dot_pos < comma_pos) 203 | return 1; 204 | 205 | return 0; 206 | } 207 | /* 208 | * Parse @payload into @msg. The data is not copied into @msg's buffer. 209 | * @msg->text and ->dict are updated to point into @payload instead. 210 | */ 211 | static int parse_packet(const char *payload, struct ncrx_msg *msg) 212 | { 213 | char buf[1024]; 214 | char *p, *tok; 215 | int idx; 216 | bool is_frag_seen = false, is_emg_seen = false; 217 | 218 | memset(msg, 0, sizeof(*msg)); 219 | 220 | p = strchr(payload, ';'); 221 | if (!p || p - payload >= (signed)sizeof(buf)) 222 | goto einval; 223 | memcpy(buf, payload, p - payload); 224 | buf[p - payload] = '\0'; 225 | 226 | msg->text = p + 1; 227 | msg->text_len = strlen(msg->text); 228 | if (msg->text_len > NCRX_LINE_MAX) 229 | msg->text_len = NCRX_LINE_MAX; 230 | 231 | /* [release,],,,[,KEY=VAL]* */ 232 | p = buf; 233 | if (release_prepended(p)) { 234 | idx = 0; 235 | } else { 236 | idx = 1; 237 | } 238 | while ((tok = strsep(&p, ","))) { 239 | char *endp, *key, *val; 240 | unsigned long long v; 241 | 242 | switch (idx++) { 243 | case 0: 244 | if (!tok) 245 | goto einval; 246 | strncpy(msg->version, tok, NCRX_KVERSION_MAX_LEN - 1); 247 | continue; 248 | case 1: 249 | v = strtoul(tok, &endp, 0); 250 | if (*endp != '\0' || v > UINT8_MAX) 251 | goto einval; 252 | msg->facility = v >> 3; 253 | msg->level = v & ((1 << 3) - 1); 254 | continue; 255 | case 2: 256 | v = strtoull(tok, &endp, 0); 257 | if (*endp != '\0') 258 | goto einval; 259 | msg->seq = v; 260 | continue; 261 | case 3: 262 | v = strtoull(tok, &endp, 0); 263 | if (*endp != '\0') 264 | goto einval; 265 | msg->ts_usec = v; 266 | continue; 267 | case 4: 268 | if (tok[0] == 'c') 269 | msg->cont_start = 1; 270 | else if (tok[0] == '+') 271 | msg->cont = 1; 272 | continue; 273 | } 274 | 275 | val = tok; 276 | key = strsep(&val, "="); 277 | if (!val) 278 | continue; 279 | if (!strcmp(key, "ncfrag")) { 280 | unsigned nf_off, nf_len; 281 | 282 | if (is_frag_seen) 283 | goto einval; 284 | if (sscanf(val, "%u/%u", &nf_off, &nf_len) != 2) 285 | goto einval; 286 | if (!msg->text_len || 287 | nf_len >= NCRX_LINE_MAX || 288 | nf_off >= nf_len || 289 | nf_off + msg->text_len > nf_len) 290 | goto einval; 291 | 292 | msg->ncfrag_off = nf_off; 293 | msg->ncfrag_len = msg->text_len; 294 | msg->ncfrag_left = nf_len - msg->ncfrag_len; 295 | msg->text_len = nf_len; 296 | is_frag_seen = true; 297 | } else if (!strcmp(key, "ncemg")) { 298 | if (is_emg_seen) 299 | goto einval; 300 | 301 | v = strtoul(val, &endp, 0); 302 | if (*endp != '\0') 303 | goto einval; 304 | msg->emg = v; 305 | is_emg_seen = true; 306 | } 307 | } 308 | return 0; 309 | einval: 310 | errno = EINVAL; 311 | return -1; 312 | } 313 | 314 | /* how far @idx is behind @ncrx->head */ 315 | static int slot_dist(int idx, struct ncrx *ncrx) 316 | { 317 | int dist = ncrx->head - idx; 318 | return dist >= 0 ? dist : dist + ncrx->p.nr_slots; 319 | } 320 | 321 | /* number of occupied slots */ 322 | static int nr_queued(struct ncrx *ncrx) 323 | { 324 | return slot_dist(ncrx->tail, ncrx); 325 | } 326 | 327 | /* seq of the last queued message */ 328 | static uint64_t tail_seq(struct ncrx *ncrx) 329 | { 330 | return ncrx->head_seq - nr_queued(ncrx); 331 | } 332 | 333 | /* slot index of a message with sequence number @ncrx->head_seq + @delta */ 334 | static int seq_delta_idx(struct ncrx *ncrx, int delta) 335 | { 336 | int idx = ncrx->head + delta; 337 | 338 | if (idx < 0) 339 | return idx + ncrx->p.nr_slots; 340 | else if (idx >= ncrx->p.nr_slots) 341 | return idx - ncrx->p.nr_slots; 342 | else 343 | return idx; 344 | } 345 | 346 | /* is @slot completely empty? */ 347 | static int slot_is_free(struct ncrx_slot *slot) 348 | { 349 | return !slot->msg && list_empty(&slot->hole_node); 350 | } 351 | 352 | /* @slot may have just been completed, if so, remove it from hole_list */ 353 | static void slot_maybe_complete(struct ncrx_slot *slot) 354 | { 355 | struct ncrx_msg *msg = slot->msg; 356 | 357 | if (!msg || msg->ncfrag_left || list_empty(&slot->hole_node)) 358 | return; 359 | 360 | list_del(&slot->hole_node); 361 | } 362 | 363 | /* retire the last queued slot whether complete or not */ 364 | static void retire_tail(struct ncrx *ncrx) 365 | { 366 | int ntail = (ncrx->tail + 1) % ncrx->p.nr_slots; 367 | struct ncrx_slot *slot = &ncrx->slots[ncrx->tail]; 368 | struct ncrx_slot *nslot = &ncrx->slots[ntail]; 369 | 370 | if (slot->msg) { 371 | msg_list_append(slot->msg, &ncrx->retired_list); 372 | slot->msg = NULL; 373 | } 374 | 375 | list_del(&slot->hole_node); /* free slot is never a hole */ 376 | ncrx->tail = ntail; 377 | /* 378 | * Activities of past msgs are considered activities for newer ones 379 | * too. This prevents oos interval verdicts from flipping as 380 | * sequence progresses. 381 | */ 382 | nslot->timestamp = max(slot->timestamp, nslot->timestamp); 383 | } 384 | 385 | /* make room for message with seq ncrx->head_seq + @delta */ 386 | static void make_room(struct ncrx *ncrx, int delta) 387 | { 388 | int i; 389 | 390 | /* head_seq is for the next msg, need to advance for 0 @delta too */ 391 | for (i = 0; i <= delta; i++) { 392 | struct ncrx_slot *slot; 393 | int max_busy = ncrx->p.nr_slots - ncrx->p.retx_stride; 394 | 395 | /* a new slot is considered hole until it gets completed */ 396 | slot = &ncrx->slots[ncrx->head]; 397 | assert(slot_is_free(slot)); 398 | list_append(&slot->hole_node, &ncrx->hole_list); 399 | slot->timestamp = ncrx->now_mono; 400 | slot->retx_timestamp = 0; 401 | 402 | /* 403 | * Wind the ring buffer and push out if overflowed. Always 404 | * keep at least one stride empty so that retransmissions 405 | * of expired slots don't count as oos. 406 | */ 407 | ncrx->head_seq++; 408 | ncrx->head = (ncrx->head + 1) % ncrx->p.nr_slots; 409 | if (slot_dist(ncrx->tail, ncrx) > max_busy) 410 | retire_tail(ncrx); 411 | } 412 | } 413 | 414 | /* 415 | * Get slot for @tmsg. On success, returns pointer to the slot which may 416 | * be free or occupied with partial or complete message. Returns NULL with 417 | * errno set to ERANGE if oos, NULL / ENOENT if already retired. 418 | */ 419 | static struct ncrx_slot *get_seq_slot(struct ncrx_msg *tmsg, struct ncrx *ncrx) 420 | { 421 | struct ncrx_slot *slot; 422 | int64_t delta; 423 | int idx; 424 | 425 | /* new seq stream */ 426 | if (!ncrx->head_seq) { 427 | ncrx->head_seq = tmsg->seq; 428 | ncrx->acked_seq = UINT64_MAX; 429 | tmsg->seq_reset = 1; 430 | } 431 | 432 | delta = tmsg->seq - ncrx->head_seq; 433 | 434 | /* 435 | * Consider oos if outside reorder window or if the slot is 436 | * complete and the last activity on it was more than oos_intv ago. 437 | * Emergency messages are never considered oos as they don't follow 438 | * the usual transmission pattern and may repeat indefinitely. 439 | */ 440 | if (-delta > ncrx->p.nr_slots || delta > ncrx->p.nr_slots) { 441 | errno = ERANGE; 442 | return NULL; 443 | } 444 | 445 | idx = seq_delta_idx(ncrx, delta); 446 | slot = &ncrx->slots[idx]; 447 | 448 | if (-delta > nr_queued(ncrx)) { 449 | int is_free = slot_is_free(slot); 450 | 451 | if (!tmsg->emg && 452 | (!is_free || 453 | slot->timestamp + ncrx->p.oos_intv < ncrx->now_mono)) { 454 | errno = ERANGE; 455 | return NULL; 456 | } 457 | 458 | if (is_free) 459 | slot->timestamp = ncrx->now_mono; 460 | errno = ENOENT; 461 | return NULL; 462 | } 463 | 464 | make_room(ncrx, delta); 465 | slot->timestamp = ncrx->now_mono; 466 | 467 | return slot; 468 | } 469 | 470 | /* make @src's copy, if @src is a fragment, allocate full size as it may grow */ 471 | static struct ncrx_msg *copy_msg(struct ncrx_msg *src) 472 | { 473 | struct ncrx_msg *dst; 474 | 475 | assert(!src->dict && !src->dict_len); 476 | 477 | dst = malloc(sizeof(*dst) + src->text_len + 1); 478 | if (!dst) 479 | return NULL; 480 | 481 | *dst = *src; 482 | init_list(&dst->node); 483 | 484 | dst->text = dst->buf; 485 | if (src->ncfrag_len) { 486 | memset(dst->text, 0, src->text_len + 1); 487 | memcpy(dst->text + src->ncfrag_off, src->text, src->ncfrag_len); 488 | dst->ncfrag_off = 0; 489 | dst->ncfrag_len = 0; 490 | } else { 491 | memcpy(dst->text, src->text, src->text_len); 492 | dst->text[dst->text_len] = '\0'; 493 | } 494 | return dst; 495 | } 496 | 497 | /* 498 | * @tmsg is a newly parsed msg which is out-of-sequence. Queue it on 499 | * @ncrx->oos_list until the message times out, gets pushed out by other 500 | * oos messages or the sequence stream gets reset. 501 | */ 502 | static int queue_oos_msg(struct ncrx_msg *tmsg, struct ncrx *ncrx) 503 | { 504 | struct ncrx_slot *slot; 505 | struct ncrx_msg *msg, *nmsg, *first; 506 | 507 | msg = copy_msg(tmsg); 508 | if (!msg) 509 | return -1; 510 | 511 | msg_list_append(msg, &ncrx->oos_list); 512 | 513 | /* 514 | * Shifted left automatically on each new msg. Set oos and see if 515 | * there have been too many oos among the last 32 messages. 516 | */ 517 | ncrx->oos_history |= 1; 518 | if ((signed)hweight32(ncrx->oos_history) < ncrx->p.oos_thr) { 519 | /* nope, handle oos overflow and handle */ 520 | if (ncrx->oos_list.nr > NCRX_OOS_MAX) { 521 | msg = msg_list_pop(&ncrx->oos_list); 522 | if (msg) { 523 | msg->oos = 1; 524 | msg_list_append(msg, &ncrx->retired_list); 525 | } 526 | } 527 | return 0; 528 | } 529 | 530 | /* 531 | * The current sequence stream seems no good. Let's reset by 532 | * retiring all pending, picking the oos msg with the lowest seq, 533 | * queueing it to reset the seq and then queueing all other oos 534 | * msgs. If a msg is still oos after reset, just retire it. 535 | */ 536 | while (ncrx->tail != ncrx->head) 537 | retire_tail(ncrx); 538 | 539 | ncrx->head_seq = 0; 540 | ncrx->acked_seq = UINT64_MAX; 541 | 542 | first = node_to_msg(ncrx->oos_list.head.next); 543 | msg_list_for_each(msg, nmsg, &ncrx->oos_list) 544 | first = msg->seq < first->seq ? msg : first; 545 | 546 | msg_list_del(first, &ncrx->oos_list); 547 | slot = get_seq_slot(first, ncrx); 548 | slot->msg = first; 549 | slot_maybe_complete(slot); 550 | 551 | while ((msg = msg_list_pop(&ncrx->oos_list))) { 552 | slot = get_seq_slot(msg, ncrx); 553 | if (slot) { 554 | slot->msg = msg; 555 | slot_maybe_complete(slot); 556 | } else { 557 | msg->oos = 1; 558 | msg_list_append(msg, &ncrx->retired_list); 559 | } 560 | } 561 | 562 | return 0; 563 | } 564 | 565 | /* @payload has just been received, parse and queue it */ 566 | static int ncrx_queue_payload(const char *payload, struct ncrx *ncrx, 567 | uint64_t now_real) 568 | { 569 | struct ncrx_msg tmsg = {}; 570 | struct ncrx_slot *slot; 571 | int new_msg = 0; 572 | 573 | if (parse_packet(payload, &tmsg)) 574 | return -1; 575 | 576 | tmsg.rx_at_mono = ncrx->now_mono; 577 | tmsg.rx_at_real = now_real; 578 | ncrx->oos_history <<= 1; 579 | 580 | /* ack immediately if logging source is doing emergency transmissions */ 581 | if (tmsg.emg) { 582 | ncrx->acked_seq = UINT64_MAX; 583 | ncrx->acked_at = 0; 584 | } 585 | 586 | /* get the matching slot and allocate a new message if empty */ 587 | slot = get_seq_slot(&tmsg, ncrx); 588 | if (slot && !slot->msg) { 589 | slot->msg = copy_msg(&tmsg); 590 | new_msg = 1; 591 | } 592 | if (!slot || !slot->msg) { 593 | if (errno == ENOENT) 594 | return 0; 595 | if (errno == ERANGE) 596 | return queue_oos_msg(&tmsg, ncrx); 597 | return -1; 598 | } 599 | 600 | if (!new_msg && slot->msg->ncfrag_left) { 601 | struct ncrx_msg *msg = slot->msg; 602 | int off = tmsg.ncfrag_off; 603 | int i; 604 | 605 | /* 606 | * we're merging a text fragment into the message text buffer. 607 | * the checks done here ensure that the received fragment values 608 | * are within bounds of the message text buffer. 609 | */ 610 | if (off >= msg->text_len || 611 | off + tmsg.ncfrag_len > msg->text_len) { 612 | return -1; 613 | } 614 | 615 | for (i = 0; i < tmsg.ncfrag_len; i++) { 616 | if (msg->text[off + i]) 617 | continue; 618 | msg->text[off + i] = tmsg.text[i]; 619 | msg->ncfrag_left--; 620 | } 621 | } 622 | 623 | slot_maybe_complete(slot); 624 | 625 | return 0; 626 | } 627 | 628 | /* 629 | * Build ncrx_response() output. Ack for the last retired msg is always 630 | * added. If @slot is non-NULL, re-transmission for it is also added. 631 | */ 632 | static void ncrx_build_resp(struct ncrx_slot *slot, struct ncrx *ncrx) 633 | { 634 | /* no msg received? */ 635 | if (!ncrx->head_seq) 636 | return; 637 | 638 | /* "ncrx" */ 639 | if (!ncrx->resp_len) { 640 | ncrx->acked_seq = tail_seq(ncrx) - 1; 641 | ncrx->acked_at = ncrx->now_mono; 642 | 643 | ncrx->resp_len = snprintf(ncrx->resp_buf, NCRX_PKT_MAX, 644 | "ncrx%"PRIu64, ncrx->acked_seq); 645 | } 646 | 647 | /* " ..." truncated to NCRX_PKT_MAX */ 648 | if (slot) { 649 | int idx = slot - ncrx->slots; 650 | int len; 651 | 652 | len = snprintf(ncrx->resp_buf + ncrx->resp_len, 653 | NCRX_PKT_MAX - ncrx->resp_len, " %"PRIu64, 654 | ncrx->head_seq - slot_dist(idx, ncrx)); 655 | if (ncrx->resp_len + len <= NCRX_PKT_MAX) { 656 | ncrx->resp_len += len; 657 | ncrx->resp_buf[ncrx->resp_len] = '\0'; 658 | } 659 | } 660 | } 661 | 662 | int ncrx_process(const char *payload, uint64_t now_mono, uint64_t now_real, 663 | struct ncrx *ncrx) 664 | { 665 | struct ncrx_slot *slot, *tmp_slot; 666 | struct ncrx_msg *msg; 667 | uint64_t old_head_seq = ncrx->head_seq; 668 | int dist_retx, ret = 0; 669 | 670 | if (now_mono < ncrx->now_mono) 671 | fprintf(stderr, "ncrx: time regressed %"PRIu64"->%"PRIu64"\n", 672 | ncrx->now_mono, now_mono); 673 | 674 | ncrx->now_mono = now_mono; 675 | ncrx->resp_len = 0; 676 | 677 | /* 678 | * If fully acked, keep last ack timestamp current so that new 679 | * messages arriving doesn't trigger ack timeout immediately. 680 | */ 681 | if (ncrx->acked_seq == tail_seq(ncrx) - 1) 682 | ncrx->acked_at = now_mono; 683 | 684 | /* parse and queue @payload */ 685 | if (payload) 686 | ret = ncrx_queue_payload(payload, ncrx, now_real); 687 | 688 | /* retire complete & timed-out msgs from tail */ 689 | while (ncrx->tail != ncrx->head) { 690 | slot = &ncrx->slots[ncrx->tail]; 691 | 692 | if ((!slot->msg || !list_empty(&slot->hole_node)) && 693 | slot->timestamp + ncrx->p.msg_timeout > now_mono) 694 | break; 695 | retire_tail(ncrx); 696 | } 697 | 698 | /* retire timed-out oos msgs */ 699 | while ((msg = msg_list_peek(&ncrx->oos_list))) { 700 | if (msg->rx_at_mono + ncrx->p.oos_timeout > now_mono) 701 | break; 702 | msg->oos = 1; 703 | msg_list_del(msg, &ncrx->oos_list); 704 | msg_list_append(msg, &ncrx->retired_list); 705 | } 706 | 707 | /* if enabled, ack pending and timeout expired? */ 708 | if (ncrx->p.ack_intv && ncrx->acked_seq != tail_seq(ncrx) - 1 && 709 | ncrx->acked_at + ncrx->p.ack_intv < now_mono) 710 | ncrx_build_resp(NULL, ncrx); 711 | 712 | /* head passed one or more re-transmission boundaries? */ 713 | dist_retx = old_head_seq / ncrx->p.retx_stride != 714 | ncrx->head_seq / ncrx->p.retx_stride; 715 | 716 | hole_list_for_each(slot, tmp_slot, &ncrx->hole_list) { 717 | int retx = 0; 718 | 719 | /* 720 | * If so, request re-tx of holes further away than stride. 721 | * This ensures that a missing seq is requested at least 722 | * certain number of times regardless of incoming rate. 723 | */ 724 | if (dist_retx && 725 | slot_dist(slot - ncrx->slots, ncrx) > ncrx->p.retx_stride) 726 | retx = 1; 727 | 728 | /* request re-tx every retx_intv */ 729 | if (now_mono - max(slot->timestamp, slot->retx_timestamp) >= 730 | (unsigned)ncrx->p.retx_intv) { 731 | slot->retx_timestamp = now_mono; 732 | retx = 1; 733 | } 734 | 735 | if (retx) 736 | ncrx_build_resp(slot, ncrx); 737 | } 738 | 739 | return ret; 740 | } 741 | 742 | const char *ncrx_response(struct ncrx *ncrx, int *lenp) 743 | { 744 | if (lenp) 745 | *lenp = ncrx->resp_len; 746 | if (ncrx->resp_len) 747 | return ncrx->resp_buf; 748 | return NULL; 749 | } 750 | 751 | /* parse out the dictionary in a complete message, if it exists */ 752 | static void terminate_msg_and_dict(struct ncrx_msg *msg) 753 | { 754 | msg->dict = strchr(msg->text, '\n'); 755 | if (msg->dict) { 756 | int len = msg->text_len; 757 | msg->text_len = msg->dict - msg->text; 758 | msg->text[msg->text_len] = '\0'; 759 | msg->dict_len = len - msg->text_len - 1; 760 | msg->dict++; 761 | } 762 | } 763 | 764 | struct ncrx_msg *ncrx_next_msg(struct ncrx *ncrx) 765 | { 766 | struct ncrx_msg *msg = msg_list_pop(&ncrx->retired_list); 767 | 768 | if (msg) 769 | terminate_msg_and_dict(msg); 770 | 771 | return msg; 772 | } 773 | 774 | uint64_t ncrx_invoke_process_at(struct ncrx *ncrx) 775 | { 776 | uint64_t when = UINT64_MAX; 777 | struct ncrx_msg *msg; 778 | 779 | /* ack enabled and pending? */ 780 | if (ncrx->p.ack_intv && ncrx->head_seq && 781 | ncrx->acked_seq != tail_seq(ncrx) - 1) 782 | when = min(when, ncrx->acked_at + ncrx->p.ack_intv); 783 | 784 | /* 785 | * Holes to request for retransmission? msg_timeout is the same 786 | * condition but way longer. Checking on retx_intv is enough. 787 | */ 788 | if (!list_empty(&ncrx->hole_list)) 789 | when = min(when, ncrx->now_mono + ncrx->p.retx_intv); 790 | 791 | /* oos timeout */ 792 | if ((msg = msg_list_peek(&ncrx->oos_list))) 793 | when = min(when, msg->rx_at_mono + ncrx->p.oos_timeout); 794 | 795 | /* min 10ms intv to avoid busy loop in case something goes bonkers */ 796 | return max(when, ncrx->now_mono + 10); 797 | } 798 | 799 | struct ncrx *ncrx_create(const struct ncrx_param *param) 800 | { 801 | const struct ncrx_param *dfl = &ncrx_dfl_param; 802 | struct ncrx_param *p; 803 | struct ncrx *ncrx; 804 | int i; 805 | 806 | ncrx = calloc(1, sizeof(*ncrx)); 807 | if (!ncrx) 808 | return NULL; 809 | 810 | p = &ncrx->p; 811 | if (param) { 812 | p->nr_slots = param->nr_slots ?: dfl->nr_slots; 813 | 814 | p->ack_intv = param->ack_intv ?: dfl->ack_intv; 815 | p->retx_intv = param->retx_intv ?: dfl->retx_intv; 816 | p->retx_stride = param->retx_stride ?: dfl->retx_stride; 817 | p->msg_timeout = param->msg_timeout ?: dfl->msg_timeout; 818 | 819 | p->oos_thr = param->oos_thr ?: dfl->oos_thr; 820 | p->oos_intv = param->oos_intv ?: dfl->oos_intv; 821 | p->oos_timeout = param->oos_timeout ?: dfl->oos_timeout; 822 | } else { 823 | *p = *dfl; 824 | } 825 | 826 | ncrx->acked_seq = UINT64_MAX; 827 | init_list(&ncrx->hole_list); 828 | init_list(&ncrx->oos_list.head); 829 | init_list(&ncrx->retired_list.head); 830 | 831 | ncrx->slots = calloc(ncrx->p.nr_slots, sizeof(ncrx->slots[0])); 832 | if (!ncrx->slots) { 833 | free(ncrx); 834 | return NULL; 835 | } 836 | 837 | for (i = 0; i < ncrx->p.nr_slots; i++) 838 | init_list(&ncrx->slots[i].hole_node); 839 | 840 | return ncrx; 841 | } 842 | 843 | void ncrx_destroy(struct ncrx *ncrx) 844 | { 845 | struct ncrx_msg *msg; 846 | int i; 847 | 848 | for (i = 0; i < ncrx->p.nr_slots; i++) 849 | free(ncrx->slots[i].msg); 850 | 851 | while ((msg = msg_list_pop(&ncrx->oos_list))) 852 | free(msg); 853 | 854 | while ((msg = msg_list_pop(&ncrx->retired_list))) 855 | free(msg); 856 | 857 | free(ncrx->slots); 858 | free(ncrx); 859 | } 860 | -------------------------------------------------------------------------------- /ncrx/ncrx-struct.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifndef __NETCONSOLE_NCRX_STRUCT__ 9 | #define __NETCONSOLE_NCRX_STRUCT__ 10 | 11 | struct ncrx_list { 12 | struct ncrx_list *next; 13 | struct ncrx_list *prev; 14 | }; 15 | 16 | #define NCRX_KVERSION_MAX_LEN 64 17 | 18 | /* 19 | * ncrx_msg represents a single log message and what gets returned from 20 | * ncrx_next_msg(). Most of the public fields are self-explanatory except 21 | * for the followings. 22 | * 23 | * oos 24 | * The message's sequence number doesn't match up with the current 25 | * message stream. Could be from a foreign source or corrupt. Ignore 26 | * when counting missing messages. 27 | * 28 | * seq_reset 29 | * The sequence number stream has jumped. This usually happens when 30 | * the log source reboots. The first message returned after ncrx 31 | * initialization always has this flag set. 32 | */ 33 | struct ncrx_msg { 34 | /* public fields */ 35 | uint64_t seq; /* printk sequence number */ 36 | uint64_t ts_usec; /* printk timestamp in usec */ 37 | char *text; /* message body */ 38 | char *dict; /* optional dictionary */ 39 | int text_len; /* message body length */ 40 | int dict_len; /* dictionary length */ 41 | 42 | uint8_t facility; /* log facility */ 43 | uint8_t level; /* printk level */ 44 | unsigned cont_start:1; /* first of continued msgs */ 45 | unsigned cont:1; /* continuation of prev msg */ 46 | unsigned oos:1; /* sequence out-of-order */ 47 | unsigned seq_reset:1; /* sequence reset */ 48 | 49 | /* private fields */ 50 | struct ncrx_list node; 51 | uint64_t rx_at_mono; /* monotonic rx time in msec */ 52 | uint64_t rx_at_real; /* real rx time in msec */ 53 | int ncfrag_off; /* netconsole frag offset */ 54 | int ncfrag_len; /* netconsole frag len */ 55 | int ncfrag_left; /* number of missing bytes */ 56 | 57 | /* kernel release version */ 58 | char version[NCRX_KVERSION_MAX_LEN]; 59 | unsigned emg:1; /* emergency transmission */ 60 | 61 | char buf[]; 62 | }; 63 | 64 | #endif /* __NETCONSOLE_NCRX_STRUCT__ */ 65 | -------------------------------------------------------------------------------- /ncrx/ncrx.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ncrx - simple extended netconsole receiver 3 | * 4 | * Copyright (c) Meta Platforms, Inc. and affiliates. 5 | * 6 | * This source code is licensed under the BSD-style license found in the 7 | * LICENSE file in the root directory of this source tree. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "ncrx.h" 21 | 22 | union sockaddr_in46 { 23 | struct sockaddr addr; 24 | struct sockaddr_in6 in6; 25 | struct sockaddr_in in4; 26 | }; 27 | 28 | int main(int argc, char **argv) 29 | { 30 | char buf[NCRX_LINE_MAX + 1]; 31 | struct ncrx_param param = { .ack_intv = 1000 }; 32 | struct ncrx *ncrx; 33 | struct sockaddr_in6 laddr = { }; 34 | uint64_t next_seq = 0, next_at = UINT64_MAX, now; 35 | int prev_cont = 0; 36 | int fd; 37 | 38 | if (argc != 2) { 39 | fprintf(stderr, "Usage: ncrx PORT\n"); 40 | return 1; 41 | } 42 | 43 | fd = socket(AF_INET6, SOCK_DGRAM, 0); 44 | if (fd < 0) { 45 | perror("socket"); 46 | return 1; 47 | } 48 | 49 | laddr.sin6_family = AF_INET6; 50 | laddr.sin6_addr = in6addr_any; 51 | laddr.sin6_port = htons(atoi(argv[1])); 52 | 53 | if (bind(fd, (struct sockaddr *)&laddr, sizeof(laddr)) < 0) { 54 | perror("bind"); 55 | return 1; 56 | } 57 | 58 | ncrx = ncrx_create(¶m); 59 | if (!ncrx) { 60 | perror("ncrx_create"); 61 | return 1; 62 | } 63 | 64 | while (1) { 65 | struct pollfd pfd = { .fd = fd, .events = POLLIN }; 66 | union sockaddr_in46 raddr; 67 | struct ncrx_msg *msg; 68 | struct timespec ts; 69 | socklen_t raddr_len = sizeof(raddr); 70 | char *payload = NULL; 71 | const char *resp; 72 | int timeout; 73 | int len; 74 | 75 | /* determine sleep interval and poll */ 76 | timeout = -1; 77 | if (next_at != UINT64_MAX) { 78 | timeout = 0; 79 | if (next_at > now) 80 | timeout = next_at - now; 81 | } 82 | 83 | if (poll(&pfd, 1, timeout) < 0) { 84 | perror("poll"); 85 | return 1; 86 | } 87 | 88 | /* receive message */ 89 | len = recvfrom(fd, buf, sizeof(buf) - 1, MSG_DONTWAIT, 90 | (struct sockaddr *)&raddr, &raddr_len); 91 | 92 | payload = NULL; 93 | if (len >= 0) { 94 | buf[len] = '\0'; 95 | payload = buf; 96 | } else if (errno != EAGAIN) { 97 | perror("recv"); 98 | return 1; 99 | } 100 | 101 | /* determine the current time */ 102 | if (clock_gettime(CLOCK_MONOTONIC, &ts)) { 103 | perror("clock_gettime"); 104 | return 1; 105 | } 106 | now = ts.tv_sec * 1000 + ts.tv_nsec / 1000000; 107 | 108 | /* process the payload and perform rx operations */ 109 | if (ncrx_process(payload, now, 0, ncrx) && errno != ENOENT) { 110 | if (errno == EINVAL) { 111 | while (len && isspace(payload[len - 1])) 112 | payload[--len] = '\0'; 113 | printf("[%12s] %s\n", "INVAL", payload); 114 | } else { 115 | perror("ncrx_process"); 116 | } 117 | } 118 | 119 | resp = ncrx_response(ncrx, &len); 120 | if (resp && sendto(fd, resp, len, 0, 121 | (struct sockaddr *)&raddr, raddr_len) < 0) 122 | perror("sendto"); 123 | 124 | while ((msg = ncrx_next_msg(ncrx))) { 125 | const char *pnl = prev_cont ? "\n" : ""; 126 | 127 | if (msg->oos) { 128 | printf("%s[%12s] %s\n", pnl, "OOS", msg->text); 129 | prev_cont = 0; 130 | continue; 131 | } 132 | if (msg->seq_reset) { 133 | printf("%s[%12s] seq=%"PRIu64"\n", 134 | pnl, "SEQ RESET", msg->seq); 135 | next_seq = msg->seq; 136 | } 137 | if (msg->seq != next_seq) { 138 | printf("%s[%12s] %"PRIu64" messages skipped\n", 139 | pnl, "SEQ SKIPPED", msg->seq - next_seq); 140 | } 141 | 142 | next_seq = msg->seq + 1; 143 | 144 | if (!msg->cont || !prev_cont) 145 | printf("%s[%5"PRIu64".%06"PRIu64"] ", pnl, 146 | msg->ts_usec / 1000000, 147 | msg->ts_usec % 1000000); 148 | 149 | printf("%s", msg->text); 150 | 151 | prev_cont = msg->cont_start || msg->cont; 152 | if (!prev_cont) 153 | printf("\n"); 154 | } 155 | 156 | next_at = ncrx_invoke_process_at(ncrx); 157 | } 158 | 159 | return 0; 160 | } 161 | -------------------------------------------------------------------------------- /ncrx/ncrx.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ncrx - extended netconsole receiver library 3 | * 4 | * Copyright (c) Meta Platforms, Inc. and affiliates. 5 | * 6 | * This source code is licensed under the BSD-style license found in the 7 | * LICENSE file in the root directory of this source tree. 8 | */ 9 | 10 | #ifndef __NETCONSOLE_NCRX__ 11 | #define __NETCONSOLE_NCRX__ 12 | 13 | #include 14 | 15 | #define NCRX_LINE_MAX 8192 16 | 17 | /* max payload len for responses, this is what netconsole uses on tx side */ 18 | #define NCRX_PKT_MAX 1000 19 | 20 | #include "ncrx-struct.h" 21 | 22 | /* 23 | * ncrx parameters. Specify NULL to use defaults for all. Specify 0 to use 24 | * default for individual parameters. All time periods are in millisecs. 25 | * 26 | * nr_slots 27 | * The number of reorder slots. This bounds the maximum memory which 28 | * may be consumed by the ncrx instance. Lowering this number 29 | * increases the chance of the ordering window passing by a missing 30 | * message before it can be obtained leading to missed messages. 31 | * 32 | * ack_intv 33 | * A received message is acked after this period. Transmission side 34 | * ack timeout is 10s and this should be shorter than that. 35 | * 36 | * retx_intv 37 | * Retransmission request is sent and repeated every this period. 38 | * 39 | * retx_stride 40 | * A missing message generates retransmission request whenever it gets 41 | * pushed back this number of slots by newly arriving message. 42 | * 43 | * msg_timeout 44 | * A missing message expires after this period and the sequence number 45 | * will be skipped in the output. 46 | * 47 | * oos_thr 48 | * Among last 32 message, if more than this number of messages are 49 | * out-of-order, the message stream is reset. 50 | * 51 | * oos_intv 52 | * A message is considered out-of-sequence only if the last message 53 | * received with the sequence number is older than this. 54 | * 55 | * oos_timeout 56 | * If sequence is not reset in this period after reception of an 57 | * out-of-order message, the message is output. 58 | */ 59 | struct ncrx_param { 60 | int nr_slots; 61 | 62 | int ack_intv; 63 | int retx_intv; 64 | int retx_stride; 65 | int msg_timeout; 66 | 67 | int oos_thr; 68 | int oos_intv; 69 | int oos_timeout; 70 | }; 71 | 72 | /* default params */ 73 | #define NCRX_DFL_NR_SLOTS 8192 74 | 75 | #define NCRX_DFL_ACK_INTV 0 /* disable ack logic by default */ 76 | 77 | #define NCRX_DFL_RETX_INTV 1000 78 | #define NCRX_DFL_RETX_STRIDE 256 79 | #define NCRX_DFL_MSG_TIMEOUT 30000 80 | 81 | #define NCRX_DFL_OOS_THR (32 * 3 / 5) /* 19 */ 82 | #define NCRX_DFL_OOS_INTV 5000 83 | #define NCRX_DFL_OOS_TIMEOUT NCRX_DFL_MSG_TIMEOUT 84 | 85 | /* 86 | * A ncrx instance is created by ncrx_create() and destroyed by 87 | * ncrx_destroy(). All accesses to a given instance must be serialized; 88 | * however, a process may create any number of instances and use them 89 | * concurrently. 90 | */ 91 | struct ncrx; 92 | 93 | struct ncrx *ncrx_create(const struct ncrx_param *param); 94 | void ncrx_destroy(struct ncrx *ncrx); 95 | 96 | /* 97 | * A ncrx instance doesn't do any IO or blocking. It's just a state 98 | * machine that the user can feed data into and get the results out of. 99 | * 100 | * ncrx_process() 101 | * Process @payload of a packet. @now_mono is the current time in msecs. 102 | * The origin doesn't matter as long as it's monotonously increasing. 103 | * @payload may be NULL. See ncrx_invoke_process_at(). 104 | * 105 | * @now_real is an optional timestamp which will be stored at rx_at_real 106 | * in the resulting ncrx_msg struct. The library does not use this value 107 | * at all, so it can be zero. 108 | * 109 | * Returns 0 on success. 1 on failure with errno set. EINVAL 110 | * indicates that @payload is not a valid extended netconsole message. 111 | * 112 | * ncrx_response() 113 | * The response to send to log source. If the user calls this 114 | * function after each ncrx_process() invocation and sends back the 115 | * output, re- and emergency transmissions are activated increasing 116 | * the reliability especially if the network is flaky. If not, ncrx 117 | * will passively reorder and assemble messages. 118 | * 119 | * Returns pointer to '\0' terminated response string or NULL if 120 | * there's nothing to send back. If @lenp is not NULL, *@lenp is set 121 | * to the length of the response string. 122 | * 123 | * ncrx_next_msg() 124 | * Fetches the next completed message. Call repeatedly until NULL is 125 | * returned after each ncrx_process() invocation. Each message should 126 | * be free()'d by the user after consumption. 127 | * 128 | * ncrx_invoke_process_at() 129 | * Message processing is timing dependent and ncrx often needs to take 130 | * actions after a certain time period even when there hasn't been any 131 | * new packets. This function indicates when the caller should invoke 132 | * ncrx_process() at the latest. 133 | * 134 | * The returned time is relative to @now previously provided to 135 | * ncrx_process(). e.g. if ncrx_process() needs to be invoked after 4 136 | * seconds since the last invocation where @now was 60000, this 137 | * function will return 64000. Returns UINT64_MAX if there's no 138 | * pending timing dependent operation. 139 | * 140 | * See tools/ncrx/ncrx.c for a simple example. 141 | */ 142 | int ncrx_process(const char *payload, uint64_t now_mono, uint64_t now_real, 143 | struct ncrx *ncrx); 144 | const char *ncrx_response(struct ncrx *ncrx, int *lenp); 145 | struct ncrx_msg *ncrx_next_msg(struct ncrx *ncrx); 146 | uint64_t ncrx_invoke_process_at(struct ncrx *ncrx); 147 | 148 | #endif /* __NETCONSOLE_NCRX__ */ 149 | -------------------------------------------------------------------------------- /ncrx/nctx.c: -------------------------------------------------------------------------------- 1 | /* 2 | * nctx - extended netconsole sender 3 | * 4 | * Copyright (c) Meta Platforms, Inc. and affiliates. 5 | * 6 | * This source code is licensed under the BSD-style license found in the 7 | * LICENSE file in the root directory of this source tree. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "ncrx.h" 24 | 25 | /* in msecs */ 26 | #define ACK_TIMEOUT 10000 27 | #define EMG_TX_MAX_INTV 1000 28 | #define EMG_TX_MIN_INTV 100 29 | 30 | union sockaddr_in46 { 31 | struct sockaddr addr; 32 | struct sockaddr_in6 in6; 33 | struct sockaddr_in in4; 34 | }; 35 | 36 | struct kmsg_slot { 37 | char *msg; 38 | uint64_t ts; 39 | }; 40 | 41 | struct kmsg_ring { 42 | int head; 43 | int tail; 44 | int nr_slots; 45 | uint64_t head_seq; 46 | union sockaddr_in46 raddr; 47 | int raddr_len; 48 | int emg_tx_intv; 49 | uint64_t emg_tx_seq; 50 | uint64_t emg_tx_ts; 51 | struct kmsg_slot *slots; 52 | }; 53 | 54 | /* relative time in msecs */ 55 | static uint64_t current_msec(void) 56 | { 57 | struct timespec ts; 58 | 59 | if (clock_gettime(CLOCK_MONOTONIC, &ts)) { 60 | perror("clock_gettime"); 61 | exit(1); 62 | } 63 | return ts.tv_sec * 1000 + ts.tv_nsec / 1000000; 64 | } 65 | 66 | static int kmsg_ring_init(struct kmsg_ring *ring, int nr_slots) 67 | { 68 | memset(ring, 0, sizeof(*ring)); 69 | 70 | ring->slots = malloc(sizeof(ring->slots[0]) * nr_slots); 71 | if (!ring->slots) 72 | return -1; 73 | 74 | ring->nr_slots = nr_slots; 75 | return 0; 76 | } 77 | 78 | /* advance @ring's head by one, if head catches up with tail, clip it */ 79 | static void kmsg_ring_advance(struct kmsg_ring *ring) 80 | { 81 | struct kmsg_slot *slot; 82 | 83 | ring->head_seq++; 84 | ring->head = (ring->head + 1) % ring->nr_slots; 85 | slot = &ring->slots[ring->head]; 86 | 87 | if (ring->tail == ring->head) { 88 | free(slot->msg); 89 | memset(slot, 0, sizeof(*slot)); 90 | ring->tail = (ring->tail + 1) % ring->nr_slots; 91 | } 92 | } 93 | 94 | /* fill @ring with kmsgs from @devkmsg, returns 0 on success, -1 on failure */ 95 | static int kmsg_ring_fill(struct kmsg_ring *ring, int devkmsg) 96 | { 97 | char buf[NCRX_LINE_MAX]; 98 | struct kmsg_slot *slot; 99 | int level; 100 | uint64_t seq; 101 | ssize_t len; 102 | 103 | next_line: 104 | do { 105 | len = read(devkmsg, buf, sizeof(buf) - 1); 106 | /* 107 | * EPIPE indicates skipped messages. kmsgs are always 108 | * stored according to their sequence numbers, so we don't 109 | * need to do anything special on EPIPE. Keep reading. 110 | */ 111 | } while (len < 0 && errno == EPIPE); 112 | 113 | if (len < 0) { 114 | if (errno == EAGAIN) 115 | return 0; 116 | return -1; 117 | } 118 | 119 | /* read seq and see if it makes sense */ 120 | buf[len] = '\0'; 121 | if (sscanf(buf, "%d,%"SCNu64",", &level, &seq) != 2 || 122 | seq < ring->head_seq) { 123 | fprintf(stderr, "Warning: malformed kmsg \"%s\"\n", buf); 124 | goto next_line; 125 | } 126 | 127 | /* wind ring till head is at the right slot and store */ 128 | while (ring->head_seq < seq) 129 | kmsg_ring_advance(ring); 130 | 131 | slot = &ring->slots[ring->head]; 132 | slot->msg = strdup(buf); 133 | if (!slot->msg) 134 | return -1; 135 | 136 | slot->ts = current_msec(); 137 | kmsg_ring_advance(ring); 138 | goto next_line; 139 | } 140 | 141 | /* sequence number of the oldest occupied slot in @ring */ 142 | static uint64_t kmsg_ring_tail_seq(struct kmsg_ring *ring) 143 | { 144 | int nr; 145 | 146 | nr = ring->head - ring->tail; 147 | if (nr < 0) 148 | nr += ring->nr_slots; 149 | return ring->head_seq - nr; 150 | } 151 | 152 | /* peek kmsg matching @seq, NULL if not found */ 153 | static char *kmsg_ring_peek(struct kmsg_ring *ring, uint64_t seq) 154 | { 155 | int idx; 156 | 157 | if (seq < kmsg_ring_tail_seq(ring) || seq >= ring->head_seq) 158 | return NULL; 159 | 160 | idx = ring->head - (int)(ring->head_seq - seq); 161 | if (idx < 0) 162 | idx += ring->nr_slots; 163 | 164 | return ring->slots[idx].msg; 165 | } 166 | 167 | /* free slots upto @upto_seq, tail_seq is @upto_seq + 1 afterwards */ 168 | static void kmsg_ring_consume(struct kmsg_ring *ring, uint64_t upto_seq) 169 | { 170 | uint64_t tail_seq = kmsg_ring_tail_seq(ring); 171 | int tail = ring->tail; 172 | 173 | if (!ring->head_seq || upto_seq < tail_seq) 174 | return; 175 | 176 | if (upto_seq >= ring->head_seq) 177 | upto_seq = ring->head_seq - 1; 178 | 179 | while (tail_seq <= upto_seq) { 180 | struct kmsg_slot *slot = &ring->slots[ring->head]; 181 | 182 | free(slot->msg); 183 | memset(slot, 0, sizeof(*slot)); 184 | tail_seq++; 185 | tail = (tail + 1) % ring->nr_slots; 186 | 187 | /* made progress, reset emergency tx */ 188 | ring->emg_tx_intv = 0; 189 | } 190 | 191 | ring->tail = tail; 192 | } 193 | 194 | /* 195 | * Send @msg to @addr via @sock. If @msg is too long, split into 196 | * NCRX_PKT_MAX byte chunks with ncfrag header added. If @is_emg_tx is 197 | * set, add ncemg header. 198 | */ 199 | static void send_kmsg(int sock, char *msg, int is_emg_tx, 200 | struct sockaddr *addr, int addr_len) 201 | { 202 | char buf[NCRX_PKT_MAX + 1]; 203 | const int max_extra_len = sizeof(",ncemg=1,ncfrag=0000/0000"); 204 | const char *header, *body; 205 | int msg_len = strlen(msg); 206 | int header_len = msg_len, body_len = 0; 207 | int chunk_len, nr_chunks, i; 208 | 209 | if (!is_emg_tx && msg_len <= NCRX_PKT_MAX) { 210 | sendto(sock, msg, msg_len, 0, addr, addr_len); 211 | return; 212 | } 213 | 214 | /* need to insert extra header fields, detect header and body */ 215 | header = msg; 216 | body = memchr(msg, ';', msg_len); 217 | if (body) { 218 | header_len = body - header; 219 | body_len = msg_len - header_len - 1; 220 | body++; 221 | } 222 | 223 | chunk_len = NCRX_PKT_MAX - header_len - max_extra_len; 224 | if (chunk_len <= 0) { 225 | fprintf(stderr, "Error: invalid chunk_len %d in send_kmsg()\n", 226 | chunk_len); 227 | return; 228 | } 229 | 230 | /* 231 | * Transfer possibly multiple chunks with extra header fields. 232 | * 233 | * For emergency transfers due to missing acks, add "emg=1". 234 | * 235 | * If @msg needs to be split to fit NCRX_PKT_MAX, add 236 | * "ncfrag=/" to identify each chunk. 237 | */ 238 | memcpy(buf, header, header_len); 239 | nr_chunks = (body_len + chunk_len - 1) / chunk_len; 240 | 241 | for (i = 0; i < nr_chunks; i++) { 242 | int offset = i * chunk_len; 243 | int this_header = header_len; 244 | int this_chunk; 245 | 246 | this_chunk = body_len - offset; 247 | if (this_chunk > chunk_len) 248 | this_chunk = chunk_len; 249 | 250 | if (is_emg_tx && this_header < sizeof(buf)) 251 | this_header += snprintf(buf + this_header, 252 | sizeof(buf) - this_header, 253 | ",ncemg=1"); 254 | if (nr_chunks > 1 && this_header < sizeof(buf)) 255 | this_header += snprintf(buf + this_header, 256 | sizeof(buf) - this_header, 257 | ",ncfrag=%d/%d", 258 | offset, body_len); 259 | if (this_header < sizeof(buf)) 260 | this_header += snprintf(buf + this_header, 261 | sizeof(buf) - this_header, ";"); 262 | 263 | if (this_header + chunk_len > NCRX_PKT_MAX) { 264 | fprintf(stderr, "Error: this_header %d is too large for chunk_len %d in send_kmsg()\n", 265 | this_header, chunk_len); 266 | return; 267 | } 268 | 269 | memcpy(buf + this_header, body, this_chunk); 270 | 271 | sendto(sock, buf, this_header + this_chunk, 0, addr, addr_len); 272 | 273 | body += this_chunk; 274 | } 275 | } 276 | 277 | /* rx and handle response packets from @sock, returns 0 on success, -1 on err */ 278 | static int kmsg_ring_process_resps(struct kmsg_ring *ring, int sock) 279 | { 280 | char rx_buf[NCRX_PKT_MAX + 1]; 281 | union sockaddr_in46 raddr; 282 | struct iovec iov = { .iov_base = rx_buf, .iov_len = NCRX_PKT_MAX }; 283 | struct msghdr msgh = { .msg_name = &raddr.addr, .msg_iov = &iov, 284 | .msg_iovlen = 1 }; 285 | ssize_t len; 286 | char *pos, *tok; 287 | uint64_t seq; 288 | 289 | next_packet: 290 | msgh.msg_namelen = sizeof(raddr); 291 | len = recvmsg(sock, &msgh, MSG_DONTWAIT); 292 | if (len < 0) { 293 | if (errno == EAGAIN) 294 | return 0; 295 | return -1; 296 | } 297 | 298 | rx_buf[len] = '\0'; 299 | pos = rx_buf; 300 | tok = strsep(&pos, " "); 301 | 302 | /* "ncrx" header */ 303 | if (strncmp(tok, "ncrx", 4)) { 304 | char addr_str[INET6_ADDRSTRLEN]; 305 | 306 | if (raddr.addr.sa_family == AF_INET6) 307 | inet_ntop(AF_INET6, &raddr.in6.sin6_addr, 308 | addr_str, sizeof(addr_str)); 309 | else 310 | inet_ntop(AF_INET, &raddr.in4.sin_addr, 311 | addr_str, sizeof(addr_str)); 312 | 313 | fprintf(stderr, "Warning: malformed packet from [%s]:%u\n", 314 | addr_str, ntohs(raddr.in4.sin_port)); 315 | goto next_packet; 316 | } 317 | tok += 4; 318 | 319 | /* */ 320 | if (sscanf(tok, "%"SCNu64, &seq)) 321 | kmsg_ring_consume(ring, seq); 322 | 323 | /* ... */ 324 | while ((tok = strsep(&pos, " "))) { 325 | if (sscanf(tok, "%"SCNu64, &seq)) { 326 | char *msg = kmsg_ring_peek(ring, seq); 327 | if (msg) 328 | send_kmsg(sock, msg, 0, 329 | &raddr.addr, msgh.msg_namelen); 330 | } 331 | } 332 | 333 | /* stash remote address for emergency tx */ 334 | ring->raddr = raddr; 335 | ring->raddr_len = msgh.msg_namelen; 336 | 337 | goto next_packet; 338 | } 339 | 340 | /* 341 | * Perform emergency tx if necessary. Must be called after @ring is filled 342 | * and responses are processed. Returns the duration in msecs after which 343 | * this function should be invoked again. If -1, timeout isn't necessary. 344 | */ 345 | static int kmsg_ring_emg_tx(struct kmsg_ring *ring, int sock) 346 | { 347 | struct kmsg_slot *slot = &ring->slots[ring->tail]; 348 | uint64_t target, now; 349 | uint64_t tail_seq; 350 | char *msg; 351 | 352 | /* if @ring is empty or remote site is not established, nothing to do */ 353 | if (ring->head == ring->tail || !ring->raddr_len) { 354 | ring->emg_tx_intv = 0; 355 | return -1; 356 | } 357 | 358 | /* calculate the next deadline, if in the future, return the diff */ 359 | if (!ring->emg_tx_intv) 360 | target = slot->ts + ACK_TIMEOUT; 361 | else 362 | target = ring->emg_tx_ts + ring->emg_tx_intv; 363 | 364 | now = current_msec(); 365 | 366 | if (target > now) 367 | return target - now; 368 | 369 | tail_seq = kmsg_ring_tail_seq(ring); 370 | 371 | if (!ring->emg_tx_intv) { 372 | /* new emg tx session */ 373 | ring->emg_tx_intv = EMG_TX_MIN_INTV; 374 | ring->emg_tx_seq = tail_seq; 375 | } else if (ring->emg_tx_seq < ring->head_seq) { 376 | /* in the middle of emg tx session */ 377 | ring->emg_tx_seq++; 378 | if (ring->emg_tx_seq < tail_seq) 379 | ring->emg_tx_seq = tail_seq; 380 | } else { 381 | /* finished one session, increase intv and repeat */ 382 | ring->emg_tx_intv *= 2; 383 | if (ring->emg_tx_intv < EMG_TX_MAX_INTV) 384 | ring->emg_tx_intv = EMG_TX_MAX_INTV; 385 | ring->emg_tx_seq = tail_seq; 386 | } 387 | 388 | msg = kmsg_ring_peek(ring, ring->emg_tx_seq); 389 | if (msg) 390 | send_kmsg(sock, msg, 1, &ring->raddr.addr, ring->raddr_len); 391 | 392 | ring->emg_tx_ts = now; 393 | 394 | return ring->emg_tx_intv; 395 | } 396 | 397 | static void usage_err(const char *err) 398 | { 399 | if (err) 400 | fprintf(stderr, "Error: %s\n", err); 401 | fprintf(stderr, "Usage: nctx [-n nr_slots] [-k devkmsg_path] ip port\n"); 402 | exit(1); 403 | } 404 | 405 | int main(int argc, char **argv) 406 | { 407 | union sockaddr_in46 laddr = { }; 408 | struct pollfd pfds[2] = { }; 409 | struct kmsg_ring kmsg_ring; 410 | const char *devkmsg_path = "/dev/kmsg"; 411 | int nr_slots = NCRX_DFL_NR_SLOTS; 412 | int sleep_dur = -1; 413 | int opt, port, sock, devkmsg; 414 | socklen_t addrlen; 415 | 416 | while ((opt = getopt(argc, argv, "n:k:h?")) != -1) { 417 | switch (opt) { 418 | case 'n': 419 | nr_slots = atoi(optarg); 420 | if (nr_slots <= 0) 421 | usage_err("nr_slots must be a positive number"); 422 | break; 423 | case 'k': 424 | devkmsg_path = optarg; 425 | break; 426 | default: 427 | usage_err(NULL); 428 | } 429 | } 430 | 431 | if (optind + 2 != argc) 432 | usage_err(NULL); 433 | 434 | if (inet_pton(AF_INET6, argv[optind], &laddr.in6.sin6_addr)) { 435 | laddr.addr.sa_family = AF_INET6; 436 | addrlen = sizeof(laddr.in6); 437 | } else if (inet_pton(AF_INET, argv[optind], &laddr.in4.sin_addr)) { 438 | laddr.addr.sa_family = AF_INET; 439 | addrlen = sizeof(laddr.in4); 440 | } else { 441 | usage_err("invalid IP address"); 442 | } 443 | 444 | port = atoi(argv[optind + 1]); 445 | if (port <= 0 || port > 65535) 446 | usage_err("invalid port number"); 447 | 448 | laddr.in4.sin_port = htons(port); 449 | 450 | sock = socket(laddr.addr.sa_family, SOCK_DGRAM, 0); 451 | if (sock < 0) { 452 | perror("socket"); 453 | return 1; 454 | } 455 | 456 | if (bind(sock, &laddr.addr, addrlen)) { 457 | perror("bind"); 458 | return 1; 459 | } 460 | 461 | devkmsg = open(devkmsg_path, O_RDONLY | O_NONBLOCK); 462 | if (devkmsg < 0) { 463 | perror("open"); 464 | return 1; 465 | } 466 | 467 | if (kmsg_ring_init(&kmsg_ring, nr_slots)) { 468 | perror("kmsg_ring_init"); 469 | return 1; 470 | } 471 | 472 | pfds[0].events = POLLIN; 473 | pfds[1].events = POLLIN; 474 | pfds[0].fd = devkmsg; 475 | pfds[1].fd = sock; 476 | 477 | while (poll(pfds, 2, sleep_dur) >= 0) { 478 | if (kmsg_ring_fill(&kmsg_ring, devkmsg)) { 479 | perror("kmsg_ring_fill"); 480 | return 1; 481 | } 482 | 483 | if (kmsg_ring_process_resps(&kmsg_ring, sock)) { 484 | perror("kmsg_ring_process_resps"); 485 | return 1; 486 | } 487 | 488 | sleep_dur = kmsg_ring_emg_tx(&kmsg_ring, sock); 489 | } 490 | perror("poll"); 491 | return 1; 492 | } 493 | -------------------------------------------------------------------------------- /ncrx/netcons-gen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | # 8 | 9 | """ 10 | This tool produces netcons messages for testing (mostly of {lib,}ncrx). 11 | 12 | Usual usage: 13 | 14 | 1. Run `ncrx [port]` listening in one shell 15 | 2. In another shell, run `netcons-gen [...] | nc -u 127.0.0.1 [port]` 16 | """ 17 | 18 | import argparse 19 | import random 20 | import sys 21 | import time 22 | from enum import Enum 23 | 24 | 25 | class Level(Enum): 26 | LOG_EMERG = 0 27 | LOG_ALERT = 1 28 | LOG_CRIT = 2 29 | LOG_ERR = 3 30 | LOG_WARNING = 4 31 | LOG_NOTICE = 5 32 | LOG_INFO = 6 33 | LOG_DEBUG = 7 34 | 35 | 36 | class Facility(Enum): 37 | LOG_KERN = 0 38 | LOG_USER = 1 39 | LOG_MAIL = 2 40 | LOG_DAEMON = 3 41 | LOG_AUTH = 4 42 | LOG_SYSLOG = 5 43 | LOG_LPR = 6 44 | LOG_NEWS = 7 45 | LOG_UUCP = 8 46 | LOG_CRON = 9 47 | LOG_AUTHPRIV = 10 48 | 49 | LOG_LOCAL0 = 16 50 | LOG_LOCAL1 = 17 51 | LOG_LOCAL2 = 18 52 | LOG_LOCAL3 = 19 53 | LOG_LOCAL4 = 20 54 | LOG_LOCAL5 = 21 55 | LOG_LOCAL6 = 22 56 | LOG_LOCAL7 = 23 57 | 58 | 59 | class Mode(Enum): 60 | NORMAL = 0 61 | SKIP = 1 62 | RESET = 2 63 | 64 | 65 | ARG_TO_MODE_MAP = {"reset": Mode.RESET, "skip": Mode.SKIP} 66 | 67 | 68 | def make_dictionary_string(msg): 69 | """Format X=Y\0X=Y, no trailing \0""" 70 | return "\0".join(f"{k}={v}" for k, v in msg.items()) 71 | 72 | 73 | def make_ext_header(seq, facility, level, cont): 74 | """ 75 | See printk.c's msg_print_ext_header for format spec. 76 | """ 77 | 78 | faclev = (facility.value << 3) | level.value 79 | ts_usec = int(time.monotonic() * (10**6)) 80 | return "{},{},{},{};".format(faclev, seq, ts_usec, "c" if cont else "-") 81 | 82 | 83 | def _body_escape(text): 84 | return text.replace("\0", "\n") 85 | 86 | 87 | def make_ext_body(text, dict_str): 88 | """ 89 | See printk.c's msg_print_ext_body for format spec. 90 | 91 | Escaping of unprintables is currently unimplemented. 92 | """ 93 | return f"{_body_escape(text)}\n{_body_escape(dict_str)}" 94 | 95 | 96 | def make_netcons_msg( 97 | seq=0, 98 | facility=Facility.LOG_KERN, 99 | level=Level.LOG_ERR, 100 | cont=False, 101 | text="text", 102 | meta_dict=None, 103 | ): 104 | if meta_dict is None: 105 | meta_dict = {"DICT": "test"} 106 | 107 | dict_str = make_dictionary_string(meta_dict) 108 | 109 | header = make_ext_header(seq=seq, facility=facility, level=level, cont=cont) 110 | body = make_ext_body(text=text, dict_str=dict_str) 111 | 112 | return f"{header}{body}" 113 | 114 | 115 | def parse_args(): 116 | parser = argparse.ArgumentParser(description=__doc__) 117 | parser.add_argument( 118 | "--skip", action="store_true", help="Randomly skip sequence numbers" 119 | ) 120 | parser.add_argument( 121 | "--reset", action="store_true", help="Randomly reset the sequence to 0 again" 122 | ) 123 | parser.add_argument( 124 | "--cont", action="store_true", help="Randomly insert LOG_CONT messages" 125 | ) 126 | return parser.parse_args() 127 | 128 | 129 | def main() -> None: 130 | args = parse_args() 131 | 132 | enabled_modes = [Mode.NORMAL] 133 | 134 | for arg_name, mode in ARG_TO_MODE_MAP.items(): 135 | if getattr(args, arg_name): 136 | enabled_modes.append(mode) 137 | 138 | seq = 0 139 | cont = False 140 | 141 | while True: 142 | print( 143 | make_netcons_msg( 144 | seq=seq, text="hi", meta_dict={"UNAME": "it's minix i swear"}, cont=cont 145 | ), 146 | flush=True, 147 | ) 148 | 149 | chosen_mode = random.choice(enabled_modes) 150 | 151 | if chosen_mode == Mode.NORMAL: 152 | new_seq = seq + 1 153 | elif chosen_mode == Mode.SKIP: 154 | new_seq = seq + random.randint(1, 5) 155 | elif chosen_mode == Mode.RESET: 156 | new_seq = 0 157 | 158 | if args.cont: 159 | cont = random.choice([True, False]) 160 | 161 | print( 162 | f"seq: {seq} -> {new_seq}, mode: {chosen_mode}, cont: {cont}", 163 | file=sys.stderr, 164 | ) 165 | seq = new_seq 166 | 167 | time.sleep(0.5) 168 | 169 | 170 | if __name__ == "__main__": 171 | main() # pragma: no cover 172 | -------------------------------------------------------------------------------- /output.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include "include/common.h" 17 | #include "include/msgbuf-struct.h" 18 | #include "include/output.h" 19 | 20 | static void *output_dlhandles[MAXOUTS]; 21 | static const char *output_dlpaths[MAXOUTS]; 22 | static void (*outputs[MAXOUTS])(int, struct in6_addr *, struct msg_buf *, 23 | struct ncrx_msg *); 24 | static int nr_outputs; 25 | 26 | int register_output_module(char *path, int nr_workers) 27 | { 28 | void *dl, *dlsym_addr; 29 | int (*mod_init)(int); 30 | int ret; 31 | 32 | if (nr_outputs == MAXOUTS) { 33 | warn("Too many output modules!\n"); 34 | return -1; 35 | } 36 | 37 | log("Loading module '%s'\n", path); 38 | dl = dlopen(path, RTLD_NOW|RTLD_LOCAL); 39 | if (!dl) { 40 | warn("Can't open '%s': %s", path, dlerror()); 41 | return -1; 42 | } 43 | 44 | dlsym_addr = dlsym(dl, "netconsd_output_handler"); 45 | if (!dlsym_addr) { 46 | warn("Can't find handler sym in '%s': %s", path, dlerror()); 47 | goto err_close; 48 | } 49 | 50 | mod_init = dlsym(dl, "netconsd_output_init"); 51 | if (mod_init) { 52 | log("Calling mod_init() for '%s'\n", path); 53 | ret = mod_init(nr_workers); 54 | 55 | if (ret) { 56 | warn("mod_init() for '%s' failed: %d\n", path, ret); 57 | goto err_close; 58 | } 59 | } 60 | 61 | log("Module '%s' registered (#%d@%p)\n", path, nr_outputs, dlsym_addr); 62 | output_dlhandles[nr_outputs] = dl; 63 | output_dlpaths[nr_outputs] = strdup(path); 64 | outputs[nr_outputs] = dlsym_addr; 65 | nr_outputs++; 66 | return 0; 67 | 68 | err_close: 69 | dlclose(dl); 70 | return -1; 71 | } 72 | 73 | void destroy_output_modules(void) 74 | { 75 | int i, ret; 76 | void (*mod_exit)(void); 77 | 78 | for (i = 0; i < nr_outputs; i++) { 79 | const char *path = output_dlpaths[i]; 80 | 81 | mod_exit = dlsym(output_dlhandles[i], "netconsd_output_exit"); 82 | if (mod_exit) { 83 | log("Calling mod_exit() for '%s'\n", path); 84 | mod_exit(); 85 | } 86 | 87 | log("Unloading module '%s' (#%d@%p)\n", path, i, outputs[i]); 88 | ret = dlclose(output_dlhandles[i]); 89 | if (ret) 90 | warn("dlclose() failed: %s\n", dlerror()); 91 | 92 | free((void *)path); 93 | } 94 | } 95 | 96 | void execute_output_pipeline(int thread_nr, struct in6_addr *src, 97 | struct msg_buf *buf, struct ncrx_msg *msg) 98 | { 99 | int i; 100 | 101 | for (i = 0; i < nr_outputs; i++) 102 | outputs[i](thread_nr, src, buf, msg); 103 | } 104 | -------------------------------------------------------------------------------- /threads.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "include/common.h" 15 | #include "include/msgbuf-struct.h" 16 | #include "include/listener.h" 17 | #include "include/worker.h" 18 | #include "include/threads.h" 19 | 20 | struct tctl { 21 | int nr_listeners; 22 | int nr_workers; 23 | struct ncrx_listener *listeners; 24 | struct ncrx_worker *workers; 25 | }; 26 | 27 | static void wake_thread(struct ncrx_listener *listener, int worker) 28 | { 29 | struct ncrx_worker *tgt = &listener->workers[worker]; 30 | 31 | assert_pthread_mutex_locked(&tgt->queuelock); 32 | 33 | debug("Waking thread %d\n", worker); 34 | pthread_cond_signal(&tgt->cond); 35 | } 36 | 37 | static void push_prequeue_to_worker(struct ncrx_listener *listener, int worker) 38 | { 39 | struct ncrx_worker *tgt = &listener->workers[worker]; 40 | struct ncrx_prequeue *prequeue = &listener->prequeues[worker]; 41 | 42 | assert_pthread_mutex_locked(&tgt->queuelock); 43 | 44 | if (tgt->queue_head) 45 | tgt->queue_tail->next = prequeue->queue_head; 46 | else 47 | tgt->queue_head = prequeue->queue_head; 48 | 49 | tgt->queue_tail = prequeue->queue_tail; 50 | prequeue->queue_head = NULL; 51 | 52 | debug("Listener %d pushed %d pkts to worker %d (backlog: %d)\n", 53 | listener->thread_nr, prequeue->count, worker->thread_nr, tgt->nr_queued); 54 | 55 | tgt->nr_queued += prequeue->count; 56 | prequeue->count = 0; 57 | } 58 | 59 | static void enqueue_and_wake_worker(struct ncrx_listener *listener, int worker) 60 | { 61 | struct ncrx_worker *tgt = &listener->workers[worker]; 62 | 63 | pthread_mutex_lock(&tgt->queuelock); 64 | push_prequeue_to_worker(listener, worker); 65 | wake_thread(listener, worker); 66 | pthread_mutex_unlock(&tgt->queuelock); 67 | } 68 | 69 | static int prequeue_is_empty(struct ncrx_listener *listener, int worker) 70 | { 71 | struct ncrx_prequeue *prequeue = &listener->prequeues[worker]; 72 | return prequeue->queue_head == NULL; 73 | } 74 | 75 | void enqueue_and_wake_all(struct ncrx_listener *listener) 76 | { 77 | int i; 78 | 79 | for (i = 0; i < listener->nr_workers; i++) 80 | if (!prequeue_is_empty(listener, i)) 81 | enqueue_and_wake_worker(listener, i); 82 | } 83 | 84 | static void stop_and_wait_for_workers(struct tctl *ctl) 85 | { 86 | int i; 87 | uint64_t total_processed = 0, total_hosts = 0; 88 | 89 | for (i = 0; i < ctl->nr_workers; i++) { 90 | pthread_mutex_lock(&ctl->workers[i].queuelock); 91 | ctl->workers[i].stop = 1; 92 | pthread_cond_signal(&ctl->workers[i].cond); 93 | pthread_mutex_unlock(&ctl->workers[i].queuelock); 94 | pthread_join(ctl->workers[i].id, NULL); 95 | 96 | pthread_mutex_destroy(&ctl->workers[i].queuelock); 97 | pthread_cond_destroy(&ctl->workers[i].cond); 98 | pthread_condattr_destroy(&ctl->workers[i].condattr); 99 | 100 | total_processed += ctl->workers[i].processed; 101 | total_hosts += ctl->workers[i].hosts_seen; 102 | log("Exiting worker %d got %" PRIu64 " msgs from %" PRIu64 " hosts\n", 103 | i, ctl->workers[i].processed, 104 | ctl->workers[i].hosts_seen); 105 | } 106 | 107 | log("Total messages processed by workers: %" PRIu64 " from %" PRIu64 " hosts\n", 108 | total_processed, total_hosts); 109 | free(ctl->workers); 110 | } 111 | 112 | static void stop_and_wait_for_listeners(struct tctl *ctl) 113 | { 114 | int i; 115 | uint64_t total_processed = 0; 116 | 117 | for (i = 0; i < ctl->nr_listeners; i++) { 118 | ctl->listeners[i].stop = 1; 119 | pthread_kill(ctl->listeners[i].id, SIGUSR1); 120 | pthread_join(ctl->listeners[i].id, NULL); 121 | 122 | free(ctl->listeners[i].prequeues); 123 | 124 | total_processed += ctl->listeners[i].processed; 125 | log("Exiting listener %d queued %" PRIu64 " messages\n", i, 126 | ctl->listeners[i].processed); 127 | } 128 | 129 | log("Total messages processed by listeners: %" PRIu64 "\n", 130 | total_processed); 131 | free(ctl->listeners); 132 | } 133 | 134 | static void create_worker_threads(struct tctl *ctl, struct netconsd_params *p) 135 | { 136 | struct ncrx_worker *cur, *workers; 137 | int i, r; 138 | 139 | workers = calloc(p->nr_workers, sizeof(*workers)); 140 | if (!workers) 141 | fatal("Couldn't allocate thread structures\n"); 142 | 143 | for (i = 0; i < p->nr_workers; i++) { 144 | cur = &workers[i]; 145 | 146 | pthread_mutex_init(&cur->queuelock, NULL); 147 | pthread_condattr_init(&cur->condattr); 148 | pthread_condattr_setclock(&cur->condattr, CLOCK_MONOTONIC); 149 | pthread_cond_init(&cur->cond, &cur->condattr); 150 | cur->queue_head = NULL; 151 | cur->thread_nr = i; 152 | 153 | cur->gc_int_ms = p->gc_int_ms; 154 | cur->gc_age_ms = p->gc_age_ms; 155 | cur->lastgc = p->gc_int_ms ? now_mono_ms() / p->gc_int_ms : 0; 156 | 157 | r = pthread_create(&cur->id, NULL, ncrx_worker_thread, cur); 158 | if (r) 159 | fatal("%d/%d failed: -%d\n", i, p->nr_workers, r); 160 | } 161 | 162 | ctl->nr_workers = p->nr_workers; 163 | ctl->workers = workers; 164 | } 165 | 166 | static void create_listener_threads(struct tctl *ctl, struct netconsd_params *p) 167 | { 168 | struct ncrx_prequeue *prequeues; 169 | struct ncrx_listener *cur, *listeners; 170 | int i, r; 171 | 172 | listeners = calloc(p->nr_listeners, sizeof(*listeners)); 173 | if (!listeners) 174 | fatal("Couldn't allocate listeners: %m\n"); 175 | 176 | for (i = 0; i < p->nr_listeners; i++) { 177 | cur = &listeners[i]; 178 | 179 | prequeues = calloc(ctl->nr_workers, sizeof(*prequeues)); 180 | if (!prequeues) 181 | fatal("ENOMEM %d/%d\n", i, p->nr_listeners); 182 | 183 | cur->thread_nr = i; 184 | cur->prequeues = prequeues; 185 | cur->workers = ctl->workers; 186 | cur->nr_workers = ctl->nr_workers; 187 | cur->batch = p->mmsg_batch; 188 | cur->address = &p->listen_addr; 189 | 190 | r = pthread_create(&cur->id, NULL, udp_listener_thread, cur); 191 | if (r) 192 | fatal("%d/%d failed: -%d\n", i, p->nr_listeners, r); 193 | } 194 | 195 | ctl->nr_listeners = p->nr_listeners; 196 | ctl->listeners = listeners; 197 | } 198 | 199 | void destroy_threads(struct tctl *ctl) 200 | { 201 | stop_and_wait_for_listeners(ctl); 202 | stop_and_wait_for_workers(ctl); 203 | free(ctl); 204 | } 205 | 206 | struct tctl *create_threads(struct netconsd_params *p) 207 | { 208 | struct tctl *ret; 209 | 210 | ret = calloc(1, sizeof(*ret)); 211 | if (!ret) 212 | fatal("Couldn't allocate thread structures\n"); 213 | 214 | ret->nr_workers = p->nr_workers; 215 | 216 | create_worker_threads(ret, p); 217 | create_listener_threads(ret, p); 218 | 219 | return ret; 220 | } 221 | -------------------------------------------------------------------------------- /util/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS ?= -O2 -fPIC 2 | CFLAGS += -D_GNU_SOURCE 3 | CPPFLAGS ?= 4 | LDFLAGS ?= 5 | LIBS = -lpthread 6 | 7 | all: netconsblaster 8 | 9 | netconsblaster: 10 | $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) netconsblaster.c $(LIBS) -o netconsblaster 11 | 12 | clean: 13 | rm -f netconsblaster 14 | -------------------------------------------------------------------------------- /util/netconsblaster.c: -------------------------------------------------------------------------------- 1 | /* 2 | * netconsblaster: A test excerciser for netconsd and libncrx 3 | * 4 | * Copyright (c) Meta Platforms, Inc. and affiliates. 5 | * 6 | * This source code is licensed under the BSD-style license found in the 7 | * LICENSE file in the root directory of this source tree. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #ifndef __linux__ 26 | #error Sorry, SOCK_RAW is not portable 27 | #endif 28 | 29 | #define fatal(...) \ 30 | do { \ 31 | printf(__VA_ARGS__); \ 32 | exit(EXIT_FAILURE); \ 33 | } while (0) 34 | 35 | static uint64_t rand64(unsigned int *seed) 36 | { 37 | uint64_t ret; 38 | ret = (uint64_t) rand_r(seed) << 32 | rand_r(seed); 39 | return ret; 40 | } 41 | 42 | static uint64_t now_epoch_ms(void) 43 | { 44 | struct timespec t; 45 | 46 | clock_gettime(CLOCK_MONOTONIC, &t); 47 | return t.tv_sec * 1000 + t.tv_nsec / 1000000L; 48 | } 49 | 50 | static int ones_complement_sum(uint16_t *data, int len, int sum) 51 | { 52 | unsigned int tmp; 53 | int i; 54 | 55 | for (i = 0; i < len / 2; i++) { 56 | tmp = ntohs(data[i]); 57 | 58 | /* 59 | * Kill -0 60 | */ 61 | if (tmp == 65535) 62 | tmp = 0; 63 | 64 | sum += tmp; 65 | if (sum >= 65536) { 66 | sum &= 65535; 67 | sum++; 68 | } 69 | } 70 | 71 | if (len & 1) 72 | fatal("Use test data with even lengths please\n"); 73 | 74 | return sum; 75 | } 76 | 77 | /* 78 | * From RFC768: "Checksum is the 16-bit one's complement of the one's 79 | * complement sum of a pseudo header of information from the IP header, the UDP 80 | * header, and the data, padded with zero octets at the end (if necessary) to 81 | * make a multiple of two octets." 82 | * 83 | * See RFC2460 section 8.1 for definition of pseudoheader for IPv6. 84 | * 85 | * In case you're wondering why I bothered with this: "Unlike IPv4, when UDP 86 | * packets are originated by an IPv6 node, the UDP checksum is NOT optional. 87 | * IPv6 receivers MUST discard packets containing a zero checksum." 88 | * 89 | * @addrs: Pointer to the begnning of the src/dst addresses in the ipv6hdr 90 | * @udppkt: Pointer to the udphdr 91 | * @len: Length of the udphdr and its payload 92 | */ 93 | static int udp_csum(void *addrptr, void *udppkt, int len) 94 | { 95 | unsigned int sum = 0; 96 | uint16_t *addrs = addrptr; 97 | uint16_t pseudohdr[4] = {0, htons(len), 0, htons(IPPROTO_UDP)}; 98 | 99 | sum = ones_complement_sum(addrs, 32, 0); 100 | sum = ones_complement_sum(pseudohdr, 8, sum); 101 | sum = ones_complement_sum(udppkt, len, sum); 102 | sum = ~sum; 103 | 104 | /* 105 | * From RFC768: "If the computed checksum is zero, it is transmitted as 106 | * all ones. An all zero transmitted checksum value means that the 107 | * transmitter generated no checksum" 108 | */ 109 | if (sum == 0) 110 | sum = 65535; 111 | 112 | return sum; 113 | } 114 | 115 | /* 116 | * Length of payload to send with every netconsole packet 117 | */ 118 | #define NETCONSLEN 64 119 | 120 | /* 121 | * Layout of a raw netconsole packet 122 | */ 123 | struct netcons_packet { 124 | struct ip6_hdr l3; 125 | struct udphdr l4; 126 | char payload[]; 127 | } __attribute__((packed)); 128 | 129 | /* 130 | * Metadata for extended netconsole packets 131 | */ 132 | struct netcons_metadata { 133 | uint64_t seq; 134 | uint64_t ts; 135 | uint8_t cont; 136 | uint8_t lvl; 137 | }; 138 | 139 | static void bump_metadata(struct netcons_metadata *md) 140 | { 141 | md->seq++; 142 | md->ts += 1337; 143 | } 144 | 145 | /* 146 | * Filler text for packets. 147 | */ 148 | static const char *filler = "012345678901234567890123456789012345678901234567890123456789012"; 149 | 150 | /* 151 | * Numeric to symbol for the CONT flag 152 | */ 153 | static const char *contflag(int cont) 154 | { 155 | switch (cont) { 156 | case 0: 157 | /* 158 | * No CONT flag present 159 | */ 160 | return "-"; 161 | case 1: 162 | /* 163 | * CONT_START 164 | */ 165 | return "c"; 166 | case 2: 167 | /* 168 | * CONT 169 | */ 170 | return "+"; 171 | default: 172 | fatal("CONT value %d invalid?\n", cont); 173 | }; 174 | } 175 | 176 | static void make_packet(struct netcons_packet *pkt, const struct in6_addr *src, 177 | const struct in6_addr *dst, const int16_t *dst_port, const struct netcons_metadata *md) 178 | { 179 | const int len = NETCONSLEN; 180 | unsigned int nr; 181 | 182 | memset(pkt, 0, sizeof(pkt->l3) + sizeof(pkt->l4)); 183 | 184 | memcpy(&pkt->l3.ip6_src, src, sizeof(*src)); 185 | memcpy(&pkt->l3.ip6_dst, dst, sizeof(*dst)); 186 | pkt->l3.ip6_vfc |= (6 << 4); 187 | pkt->l3.ip6_nxt = IPPROTO_UDP; 188 | pkt->l3.ip6_plen = htons(sizeof(pkt->l4) + len); 189 | pkt->l3.ip6_hlim = 64; 190 | 191 | nr = snprintf(pkt->payload, len - 1, "%d,%" PRIu64 ",%" PRIu64 ",%s;", 192 | md->lvl, md->seq, md->ts, contflag(md->cont)); 193 | if (nr < len) 194 | snprintf(pkt->payload + nr, len - nr, "%s", filler); 195 | pkt->payload[len - 1] = '\n'; 196 | 197 | pkt->l4.uh_sport = htons(6666); 198 | pkt->l4.uh_dport = htons(*dst_port); 199 | pkt->l4.uh_ulen = htons(sizeof(pkt->l4) + len); 200 | pkt->l4.uh_sum = htons(udp_csum(&pkt->l3.ip6_src, &pkt->l4, 201 | sizeof(pkt->l4) + len)); 202 | } 203 | 204 | static int write_packet(int sockfd, struct netcons_packet *pkt) 205 | { 206 | const int len = sizeof(pkt->l3) + sizeof(pkt->l4) + NETCONSLEN; 207 | struct sockaddr_in6 bogus = { 208 | .sin6_family = AF_INET6, 209 | }; 210 | 211 | memcpy(&bogus.sin6_addr, &pkt->l3.ip6_dst, sizeof(pkt->l3.ip6_dst)); 212 | return sendto(sockfd, pkt, len, 0, (const struct sockaddr *)&bogus, 213 | sizeof(bogus)) != len; 214 | } 215 | 216 | static int get_raw_socket(void) 217 | { 218 | int fd; 219 | 220 | fd = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); 221 | if (fd == -1) 222 | fatal("Couldn't get raw socket: %m\n"); 223 | 224 | return fd; 225 | } 226 | 227 | static struct netcons_packet *alloc_packet(void) 228 | { 229 | struct netcons_packet *ret; 230 | 231 | ret = malloc(sizeof(struct netcons_packet) + NETCONSLEN); 232 | if (!ret) 233 | fatal("ENOMEM allocating packet\n"); 234 | 235 | return ret; 236 | } 237 | 238 | static struct netcons_metadata *alloc_metadata_array(int bits) 239 | { 240 | struct netcons_metadata *ret; 241 | 242 | ret = calloc(1 << bits, sizeof(*ret)); 243 | if (!ret) 244 | fatal("ENOMEM allocating metadata\n"); 245 | 246 | return ret; 247 | } 248 | 249 | static uint64_t mask_long(uint64_t val, int bits) 250 | { 251 | uint64_t mask = (1UL << bits) - 1; 252 | 253 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 254 | mask = __builtin_bswap64(mask); 255 | #endif 256 | 257 | return val & mask; 258 | } 259 | 260 | static uint64_t permute_addr(struct in6_addr *addr, int bits, 261 | unsigned int *seed) 262 | { 263 | uint64_t *punned; 264 | 265 | punned = (uint64_t *)&addr->s6_addr[16 - sizeof(uint64_t)]; 266 | *punned ^= mask_long(rand64(seed), bits); 267 | return mask_long(*punned, bits); 268 | } 269 | 270 | struct blaster_state { 271 | pthread_t id; 272 | int nr; 273 | 274 | struct in6_addr dst; 275 | struct in6_addr src; 276 | int16_t dst_port; 277 | unsigned int seed; 278 | long blastcount; 279 | int *stopptr; 280 | int bits; 281 | }; 282 | 283 | static void *blaster_thread(void *arg) 284 | { 285 | struct blaster_state *_blaster_state = arg; 286 | struct netcons_metadata *mdarr; 287 | struct netcons_packet *pkt; 288 | struct in6_addr src; 289 | long idx, count = 0; 290 | int fd; 291 | 292 | fd = get_raw_socket(); 293 | pkt = alloc_packet(); 294 | mdarr = alloc_metadata_array(_blaster_state->bits); 295 | memcpy(&src, &_blaster_state->src, sizeof(src)); 296 | _blaster_state->seed = syscall(SYS_gettid); 297 | 298 | while (!*_blaster_state->stopptr) { 299 | idx = permute_addr(&src, _blaster_state->bits, &_blaster_state->seed); 300 | make_packet(pkt, &src, &_blaster_state->dst, &_blaster_state->dst_port, &mdarr[idx]); 301 | bump_metadata(&mdarr[idx]); 302 | 303 | if (!write_packet(fd, pkt)) 304 | count++; 305 | 306 | if (_blaster_state->blastcount && count == _blaster_state->blastcount) 307 | break; 308 | } 309 | 310 | return (void*)count; 311 | } 312 | 313 | static struct params { 314 | int srcaddr_order; 315 | int thread_order; 316 | struct in6_addr src; 317 | struct in6_addr dst; 318 | int16_t dst_port; 319 | long blastcount; 320 | 321 | int stop_blasting; 322 | } params; 323 | 324 | static void parse_arguments(int argc, char **argv, struct params *p) 325 | { 326 | int i; 327 | const char *optstr = "o:s:d:t:n:p:"; 328 | const struct option optlong[] = { 329 | { 330 | .name = "help", 331 | .has_arg = no_argument, 332 | .val = 'h', 333 | }, 334 | { 335 | .name = NULL, 336 | }, 337 | }; 338 | 339 | /* 340 | * Defaults 341 | */ 342 | p->srcaddr_order = 16; 343 | p->thread_order = 0; 344 | p->dst_port = 1514; 345 | memcpy(&p->src, &in6addr_loopback, sizeof(in6addr_loopback)); 346 | memcpy(&p->dst, &in6addr_loopback, sizeof(in6addr_loopback)); 347 | p->blastcount = 0; 348 | 349 | p->stop_blasting = 0; 350 | 351 | while ((i = getopt_long(argc, argv, optstr, optlong, NULL)) != -1) { 352 | switch (i) { 353 | case 'o': 354 | /* 355 | * Controls the number of bits to randomly flip in the 356 | * actual IPv6 address of this machine. So the program 357 | * will effectively simulate 2^N clients. 358 | */ 359 | p->srcaddr_order = atoi(optarg); 360 | if (p->srcaddr_order > 64 - 8) 361 | fatal("Source address order too large\n"); 362 | break; 363 | case 't': 364 | /* 365 | * Split the work among 2^N worker threads. 366 | */ 367 | p->thread_order = atoi(optarg); 368 | if (p->thread_order > 8) 369 | fatal("Largest supported thread order is 8\n"); 370 | break; 371 | case 's': 372 | /* 373 | * Source address to permute the low N bits of. 374 | */ 375 | if (inet_pton(AF_INET6, optarg, &p->src) != 1) 376 | fatal("Bad src '%s': %m\n", optarg); 377 | break; 378 | case 'd': 379 | /* 380 | * Destination address for all generated packets. 381 | */ 382 | if (inet_pton(AF_INET6, optarg, &p->dst) != 1) 383 | fatal("Bad dst '%s': %m\n", optarg); 384 | break; 385 | case 'n': 386 | /* 387 | * Write N packets from each worker thread and exit. 388 | */ 389 | p->blastcount = atol(optarg); 390 | break; 391 | case 'p': 392 | /* 393 | * Set the destination UDP port for outgoing packets. 394 | */ 395 | p->dst_port = atoi(optarg); 396 | break; 397 | case 'h': 398 | puts("Usage: netconsblaster [-o srcaddr_bits] [-t thread_order]\n" 399 | " [-s srcaddr] [-d dstaddr]\n" 400 | " [-n pktcount] [-p dst_port]\n"); 401 | puts(" srcaddr_bits: Randomize low N bits of srcaddr"); 402 | puts(" thread_order: Split work among 2^N threads"); 403 | puts(" pktcount: Stop after N pkts per thread\n"); 404 | puts(" dst_port: The UDP destination port\n"); 405 | exit(0); 406 | default: 407 | fatal("Invalid command line parameters\n"); 408 | } 409 | } 410 | } 411 | 412 | static void stop_signal(__attribute__((__unused__))int signum) 413 | { 414 | params.stop_blasting = 1; 415 | } 416 | 417 | int main(int argc, char **argv) 418 | { 419 | int i, nr_threads, srcaddr_per_thread; 420 | uint64_t tmp, count, start, finish; 421 | struct blaster_state *threadstates, *threadstate; 422 | struct sigaction stopper = { 423 | .sa_handler = stop_signal, 424 | }; 425 | 426 | parse_arguments(argc, argv, ¶ms); 427 | 428 | nr_threads = 1 << params.thread_order; 429 | srcaddr_per_thread = params.srcaddr_order - params.thread_order; 430 | 431 | if (srcaddr_per_thread <= 0) 432 | fatal("More thread bits than srcaddr bits\n"); 433 | 434 | threadstates = calloc(nr_threads, sizeof(*threadstates)); 435 | if (!threadstates) 436 | fatal("ENOMEM allocating state for threads\n"); 437 | 438 | sigaction(SIGINT, &stopper, NULL); 439 | 440 | for (i = 0; i < nr_threads; i++) { 441 | threadstate = &threadstates[i]; 442 | 443 | memcpy(&threadstate->src, ¶ms.src, sizeof(threadstate->src)); 444 | memcpy(&threadstate->dst, ¶ms.dst, sizeof(threadstate->dst)); 445 | memcpy(&threadstate->dst_port, ¶ms.dst_port, sizeof(threadstate->dst_port)); 446 | threadstate->blastcount = params.blastcount; 447 | threadstate->stopptr = ¶ms.stop_blasting; 448 | threadstate->bits = srcaddr_per_thread; 449 | 450 | threadstate->src.s6_addr[15] = (unsigned char)i; 451 | threadstate->nr = i; 452 | 453 | if (pthread_create(&threadstate->id, NULL, blaster_thread, threadstate)) 454 | fatal("Thread %d/%d failed: %m\n", i, nr_threads); 455 | } 456 | 457 | count = 0; 458 | 459 | start = now_epoch_ms(); 460 | for (i = 0; i < nr_threads; i++) { 461 | pthread_join(threadstates[i].id, (void**)&tmp); 462 | count += tmp; 463 | } 464 | finish = now_epoch_ms(); 465 | 466 | printf("Wrote %" PRIu64 " packets (%" PRIu64 " pkts/sec)\n", count, 467 | count / (finish - start) * 1000UL); 468 | return 0; 469 | } 470 | -------------------------------------------------------------------------------- /worker.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include "include/common.h" 19 | #include "include/msgbuf-struct.h" 20 | #include "include/output.h" 21 | #include "include/worker.h" 22 | 23 | static const struct ncrx_param ncrx_param = { 24 | .nr_slots = 512, 25 | .retx_intv = NETCONS_RTO, 26 | .msg_timeout = NETCONS_RTO, 27 | .oos_timeout = NETCONS_RTO, 28 | }; 29 | 30 | /* 31 | * Keep it simple: just use a boring probing hashtable that resizes. 32 | */ 33 | 34 | struct timerlist { 35 | struct timerlist *prev; 36 | struct timerlist *next; 37 | uint64_t when; 38 | }; 39 | 40 | struct bucket { 41 | struct in6_addr src; 42 | struct ncrx *ncrx; 43 | uint64_t last_seen; 44 | struct timerlist timernode; 45 | }; 46 | 47 | struct hashtable { 48 | unsigned long order; 49 | unsigned long load; 50 | struct bucket table[]; 51 | }; 52 | 53 | static unsigned long hash_srcaddr(struct in6_addr *addr) 54 | { 55 | uint32_t *addrptr = (uint32_t *)addr; 56 | 57 | return jhash2(addrptr, sizeof(*addr) / sizeof(*addrptr), WORKER_SEED); 58 | } 59 | 60 | static unsigned long order_mask(int order) 61 | { 62 | return (1UL << order) - 1; 63 | } 64 | 65 | static unsigned long htable_mask(unsigned long hash, int order) 66 | { 67 | return hash & order_mask(order); 68 | } 69 | 70 | static unsigned long htable_hash(struct hashtable *h, struct in6_addr *s) 71 | { 72 | return htable_mask(hash_srcaddr(s), h->order); 73 | } 74 | 75 | static int srcaddr_compar(struct in6_addr *a, struct in6_addr *b) 76 | { 77 | return memcmp(a, b, sizeof(*a)); 78 | } 79 | 80 | static struct bucket *hlookup(struct hashtable *h, struct in6_addr *src) 81 | { 82 | unsigned long origidx, idx; 83 | 84 | origidx = htable_hash(h, src); 85 | idx = origidx; 86 | 87 | while (h->table[idx].ncrx && srcaddr_compar(&h->table[idx].src, src)) { 88 | idx = htable_mask(idx + 1, h->order); 89 | fatal_on(idx == origidx, "Worker hashtable is full\n"); 90 | } 91 | 92 | return &h->table[idx]; 93 | } 94 | 95 | /* 96 | * Use -1 to represent "no wake needed" 97 | */ 98 | static void reset_waketime(struct ncrx_worker *cur) 99 | { 100 | cur->wake.tv_sec = -1; 101 | } 102 | 103 | static uint64_t ms_from_timespec(struct timespec *t) 104 | { 105 | return t->tv_sec * 1000LL + t->tv_nsec / 1000000L; 106 | } 107 | 108 | /* 109 | * Update the waketime if @when is before the current waketime. 110 | * 111 | * We assume that CLOCK_MONOTONIC cannot wrap: strictly speaking this is wrong, 112 | * since POSIX allows the MONOTONIC clock to start from any arbitrary value; but 113 | * since it starts from zero on Linux I'm not going to jump through the hoops. 114 | */ 115 | static void maybe_update_wake(struct ncrx_worker *cur, uint64_t when) 116 | { 117 | uint64_t curwake = ms_from_timespec(&cur->wake); 118 | if ((int64_t)curwake >= 0LL && curwake <= when) 119 | return; 120 | 121 | cur->wake.tv_sec = when / 1000LL; 122 | cur->wake.tv_nsec = (when % 1000LL) * 1000000L; 123 | } 124 | 125 | static const struct timespec end_of_time = { 126 | .tv_sec = (time_t)((1ULL << ((sizeof(time_t) << 3) - 1)) - 1), 127 | }; 128 | 129 | static const struct timespec *next_waketime(struct ncrx_worker *cur) 130 | { 131 | if (cur->wake.tv_sec == -1) 132 | return &end_of_time; 133 | 134 | return &cur->wake; 135 | } 136 | 137 | static struct bucket *bucket_from_timernode(struct timerlist *node) 138 | { 139 | return container_of(node, struct bucket, timernode); 140 | } 141 | 142 | static void timerlist_init(struct timerlist *node) 143 | { 144 | node->next = node; 145 | node->prev = node; 146 | node->when = 0; 147 | } 148 | 149 | static int timerlist_empty(struct timerlist *node) 150 | { 151 | return node->next == node; 152 | } 153 | 154 | static void timerlist_append(struct timerlist *node, struct timerlist *list) 155 | { 156 | struct timerlist *prev = list->prev; 157 | 158 | fatal_on(!timerlist_empty(node), "Queueing node already on list\n"); 159 | 160 | node->next = list; 161 | node->prev = prev; 162 | prev->next = node; 163 | list->prev = node; 164 | } 165 | 166 | static void timerlist_del(struct timerlist *node) 167 | { 168 | struct timerlist *prev = node->prev; 169 | struct timerlist *next = node->next; 170 | 171 | prev->next = next; 172 | next->prev = prev; 173 | timerlist_init(node); 174 | } 175 | 176 | /* 177 | * Return the callback time of the newest item on the list 178 | */ 179 | static uint64_t timerlist_peek(struct timerlist *list) 180 | { 181 | if (timerlist_empty(list)) 182 | return 0; 183 | 184 | return list->prev->when; 185 | } 186 | 187 | #define timerlist_for_each(this, n, thead) \ 188 | for (this = (thead)->next, n = this->next; this != (thead); \ 189 | this = n, n = this->next) 190 | 191 | static struct timerlist *create_timerlists(void) 192 | { 193 | struct timerlist *ret; 194 | int i; 195 | 196 | ret = calloc(NETCONS_RTO, sizeof(*ret)); 197 | if (!ret) 198 | fatal("Unable to allocate timerlist\n"); 199 | 200 | for (i = 0; i < NETCONS_RTO; i++) 201 | timerlist_init(&ret[i]); 202 | 203 | return ret; 204 | } 205 | 206 | static void destroy_timerlists(struct timerlist *timerlist) 207 | { 208 | free(timerlist); 209 | } 210 | 211 | static struct hashtable *create_hashtable(int order, struct hashtable *old) 212 | { 213 | struct hashtable *new; 214 | struct bucket *bkt; 215 | unsigned long i; 216 | 217 | new = zalloc(sizeof(*new) + sizeof(struct bucket) * (1UL << order)); 218 | if (!new) 219 | fatal("Unable to allocate hashtable\n"); 220 | 221 | new->order = order; 222 | 223 | if (!old) 224 | return new; 225 | 226 | for (i = 0; i < (1UL << old->order); i++) { 227 | if (old->table[i].ncrx) { 228 | bkt = hlookup(new, &old->table[i].src); 229 | memcpy(bkt, &old->table[i], sizeof(*bkt)); 230 | 231 | /* 232 | * If the timernode wasn't on a list, initialize it as 233 | * empty for the new bucket. If it was, update its 234 | * neighbors to point to the new bucket. 235 | */ 236 | if (bkt->timernode.next == &old->table[i].timernode) { 237 | timerlist_init(&bkt->timernode); 238 | } else { 239 | bkt->timernode.next->prev = &bkt->timernode; 240 | bkt->timernode.prev->next = &bkt->timernode; 241 | } 242 | } 243 | } 244 | 245 | new->load = old->load; 246 | 247 | free(old); 248 | return new; 249 | } 250 | 251 | static void destroy_hashtable(struct hashtable *ht) 252 | { 253 | unsigned long i; 254 | 255 | for (i = 0; i < (1UL << ht->order); i++) 256 | if (ht->table[i].ncrx) 257 | ncrx_destroy(ht->table[i].ncrx); 258 | 259 | free(ht); 260 | } 261 | 262 | static void maybe_resize_hashtable(struct ncrx_worker *cur, unsigned long new) 263 | { 264 | unsigned long neworder; 265 | 266 | if ((cur->ht->load + new) >> (cur->ht->order - 2) < 3) 267 | return; 268 | 269 | /* 270 | * The hashtable is more than 75% full. Resize it such that it can take 271 | * @new additional client hosts and be less than 50% full. 272 | */ 273 | neworder = LONG_BIT - __builtin_clzl(cur->ht->load + new) + 1; 274 | cur->ht = create_hashtable(neworder, cur->ht); 275 | } 276 | 277 | static void hdelete(struct hashtable *h, struct bucket *victim) 278 | { 279 | struct bucket *old, *new; 280 | unsigned long origidx, idx; 281 | 282 | fatal_on(!victim->ncrx, "Attempt to delete free bucket\n"); 283 | 284 | if (!timerlist_empty(&victim->timernode)) 285 | timerlist_del(&victim->timernode); 286 | 287 | h->load--; 288 | ncrx_destroy(victim->ncrx); 289 | memset(victim, 0, sizeof(*victim)); 290 | 291 | /* 292 | * There's potential to be clever here, but for now just be pedantic and 293 | * rebucket any potentially probed entries. 294 | */ 295 | 296 | origidx = victim - h->table; 297 | idx = origidx; 298 | while (h->table[idx].ncrx) { 299 | old = &h->table[idx]; 300 | new = hlookup(h, &old->src); 301 | if (new != old) { 302 | memcpy(new, old, sizeof(*new)); 303 | memset(old, 0, sizeof(*old)); 304 | 305 | /* 306 | * If the timernode wasn't on a list, initialize it as 307 | * empty for the new bucket. If it was, update its 308 | * neighbors to point to the new bucket. 309 | */ 310 | if (new->timernode.next == &old->timernode) { 311 | timerlist_init(&new->timernode); 312 | } else { 313 | new->timernode.next->prev = &new->timernode; 314 | new->timernode.prev->next = &new->timernode; 315 | } 316 | } 317 | 318 | idx = htable_mask(idx + 1, h->order); 319 | fatal_on(idx == origidx, "Infinite loop in hdelete()\n"); 320 | } 321 | } 322 | 323 | /* 324 | * Simple garbage collection. This is meant to be rare (on the order of once per 325 | * hour), so maintaining an LRU list isn't worth the overhead: just blow through 326 | * the whole table. Worst case it's ~50MB. 327 | */ 328 | static void try_to_garbage_collect(struct ncrx_worker *cur) 329 | { 330 | unsigned long i, count = 0; 331 | uint64_t now, end; 332 | struct bucket *bkt; 333 | 334 | now = now_mono_ms(); 335 | for (i = 0; i < (1UL << cur->ht->order); i++) { 336 | bkt = &cur->ht->table[i]; 337 | 338 | if (bkt->ncrx && now - bkt->last_seen > cur->gc_age_ms) { 339 | hdelete(cur->ht, bkt); 340 | count++; 341 | } 342 | } 343 | end = now_mono_ms(); 344 | 345 | log("Worker %d GC'd %lu in %" PRIu64 "ms\n", cur->thread_nr, count, 346 | end - now); 347 | } 348 | 349 | static void maybe_garbage_collect(struct ncrx_worker *cur) 350 | { 351 | uint64_t nowgc; 352 | 353 | if (!cur->gc_int_ms) 354 | return; 355 | 356 | nowgc = now_mono_ms() / cur->gc_int_ms; 357 | if (nowgc > cur->lastgc) { 358 | try_to_garbage_collect(cur); 359 | cur->lastgc = nowgc; 360 | } 361 | } 362 | 363 | static void schedule_ncrx_callback(struct ncrx_worker *cur, struct bucket *bkt, 364 | uint64_t when) 365 | { 366 | struct timerlist *tgtlist; 367 | uint64_t now; 368 | 369 | if (when == UINT64_MAX) { 370 | /* 371 | * No callback needed. If we had one we no longer need it, so 372 | * just remove ourselves from the timerlist. 373 | */ 374 | if (!timerlist_empty(&bkt->timernode)) 375 | timerlist_del(&bkt->timernode); 376 | 377 | return; 378 | } 379 | 380 | /* 381 | * Never queue messages outside the current window. This clamp() is what 382 | * guarantees that the callbacks in the timerlists are strictly ordered 383 | * from least to most recent: at any given moment only one callback time 384 | * corresponds to each bucket, and time cannot go backwards. 385 | */ 386 | now = now_mono_ms(); 387 | when = clamp(when, now + 1, now + NETCONS_RTO); 388 | 389 | /* 390 | * If the bucket is already on a timerlist, we only requeue it if the 391 | * callback needs to happen earlier than the one currently queued. 392 | */ 393 | if (!timerlist_empty(&bkt->timernode)) { 394 | if (when > bkt->timernode.when) 395 | return; 396 | 397 | timerlist_del(&bkt->timernode); 398 | } 399 | 400 | tgtlist = &cur->tlist[when % NETCONS_RTO]; 401 | fatal_on(when < timerlist_peek(tgtlist), "Timerlist ordering broken\n"); 402 | 403 | bkt->timernode.when = when; 404 | timerlist_append(&bkt->timernode, tgtlist); 405 | maybe_update_wake(cur, when); 406 | } 407 | 408 | /* 409 | * Read any pending messages out of the bucket, and invoke the output pipeline 410 | * with the extended metadata. 411 | */ 412 | static void drain_bucket_ncrx(struct ncrx_worker *cur, struct bucket *bkt) 413 | { 414 | struct ncrx_msg *out; 415 | uint64_t when; 416 | 417 | while ((out = ncrx_next_msg(bkt->ncrx))) { 418 | execute_output_pipeline(cur->thread_nr, &bkt->src, NULL, out); 419 | free(out); 420 | } 421 | 422 | when = ncrx_invoke_process_at(bkt->ncrx); 423 | schedule_ncrx_callback(cur, bkt, when); 424 | } 425 | 426 | /* 427 | * Execute callbacks for a specific timerlist, until either the list is empty or 428 | * we reach an entry that was queued for a time in the future. 429 | */ 430 | static void do_ncrx_callbacks(struct ncrx_worker *cur, struct timerlist *list) 431 | { 432 | uint64_t now = now_mono_ms(); 433 | struct timerlist *tnode, *tmp; 434 | struct bucket *bkt; 435 | 436 | timerlist_for_each(tnode, tmp, list) { 437 | if (tnode->when > now) 438 | break; 439 | 440 | /* 441 | * Remove the bucket from the list first, since it might end up 442 | * being re-added to another timerlist by drain_bucket_ncrx(). 443 | */ 444 | timerlist_del(tnode); 445 | 446 | bkt = bucket_from_timernode(tnode); 447 | ncrx_process(NULL, now, 0, bkt->ncrx); 448 | drain_bucket_ncrx(cur, bkt); 449 | } 450 | } 451 | 452 | /* 453 | * We have no idea how large the queue we just processed was: it could have 454 | * taken tens of seconds. So we must handle wraparound in the tlist array. 455 | */ 456 | static uint64_t run_ncrx_callbacks(struct ncrx_worker *cur, uint64_t lastrun) 457 | { 458 | uint64_t i, now = now_mono_ms(); 459 | 460 | if (now == lastrun) 461 | goto out; 462 | 463 | fatal_on(now < lastrun, "Time went backwards\n"); 464 | 465 | /* 466 | * It's possible we wrapped: in that case, we simply iterate over the 467 | * entire wheel and drain each list until we hit a callback after now. 468 | * Otherwise, we only iterate over the buckets that lie on [last,now]. 469 | */ 470 | for (i = max(lastrun, now - NETCONS_RTO + 1); i <= now; i++) 471 | do_ncrx_callbacks(cur, &cur->tlist[i % NETCONS_RTO]); 472 | 473 | out: 474 | return now; 475 | } 476 | 477 | static void consume_msgbuf(struct ncrx_worker *cur, struct msg_buf *buf) 478 | { 479 | struct bucket *ncrx_bucket; 480 | 481 | ncrx_bucket = hlookup(cur->ht, &buf->src.sin6_addr); 482 | if (!ncrx_bucket->ncrx) { 483 | ncrx_bucket->ncrx = ncrx_create(&ncrx_param); 484 | timerlist_init(&ncrx_bucket->timernode); 485 | memcpy(&ncrx_bucket->src, &buf->src.sin6_addr, 486 | sizeof(ncrx_bucket->src)); 487 | cur->ht->load++; 488 | } 489 | 490 | ncrx_bucket->last_seen = buf->rcv_time; 491 | 492 | buf->buf[buf->rcv_bytes] = '\0'; 493 | if (!ncrx_process(buf->buf, now_mono_ms(), buf->rcv_time, 494 | ncrx_bucket->ncrx)) { 495 | drain_bucket_ncrx(cur, ncrx_bucket); 496 | return; 497 | } 498 | 499 | execute_output_pipeline(cur->thread_nr, &ncrx_bucket->src, buf, NULL); 500 | } 501 | 502 | static struct msg_buf *grab_prequeue(struct ncrx_worker *cur) 503 | { 504 | struct msg_buf *ret; 505 | 506 | assert_pthread_mutex_locked(&cur->queuelock); 507 | ret = cur->queue_head; 508 | cur->queue_head = NULL; 509 | 510 | return ret; 511 | } 512 | 513 | void *ncrx_worker_thread(void *arg) 514 | { 515 | struct ncrx_worker *cur = arg; 516 | struct msg_buf *curbuf, *tmp; 517 | uint64_t lastrun = now_mono_ms(); 518 | int nr_dequeued; 519 | 520 | cur->ht = create_hashtable(16, NULL); 521 | cur->tlist = create_timerlists(); 522 | 523 | reset_waketime(cur); 524 | pthread_mutex_lock(&cur->queuelock); 525 | while (!cur->stop) { 526 | pthread_cond_timedwait(&cur->cond, &cur->queuelock, 527 | next_waketime(cur)); 528 | 529 | reset_waketime(cur); 530 | morework: 531 | curbuf = grab_prequeue(cur); 532 | nr_dequeued = cur->nr_queued; 533 | cur->nr_queued = 0; 534 | pthread_mutex_unlock(&cur->queuelock); 535 | 536 | maybe_resize_hashtable(cur, nr_dequeued); 537 | 538 | while ((tmp = curbuf)) { 539 | consume_msgbuf(cur, curbuf); 540 | curbuf = curbuf->next; 541 | free(tmp); 542 | 543 | cur->processed++; 544 | } 545 | 546 | if (!cur->stop) { 547 | maybe_garbage_collect(cur); 548 | lastrun = run_ncrx_callbacks(cur, lastrun); 549 | } 550 | 551 | pthread_mutex_lock(&cur->queuelock); 552 | if (cur->queue_head) 553 | goto morework; 554 | } 555 | 556 | assert_pthread_mutex_locked(&cur->queuelock); 557 | fatal_on(cur->queue_head != NULL, "Worker queue not empty at exit\n"); 558 | 559 | cur->hosts_seen = cur->ht->load; 560 | destroy_timerlists(cur->tlist); 561 | destroy_hashtable(cur->ht); 562 | return NULL; 563 | } 564 | --------------------------------------------------------------------------------