├── .dockerignore ├── .gitignore ├── .gitlab-ci.yml ├── .gitmodules ├── .mailmap ├── .readthedocs.yaml ├── COPYING ├── Dockerfile ├── NEWS ├── README.md ├── ci ├── dnsjit │ └── Dockerfile ├── luacheck-run.sh ├── mypy-run.sh └── pylint-run.sh ├── configs ├── doh-get.toml ├── doh-post.toml ├── doh.toml ├── doq.toml ├── dot.toml ├── mixed.toml ├── tcp.toml └── udp.toml ├── docs ├── analyzing-clients.md ├── capturing-traffic.md ├── configuration-file.md ├── configuration-presets.md ├── connection-chart.md ├── extracting-clients.md ├── img │ ├── clients.png │ ├── connections.png │ ├── handshakes.png │ ├── latency.png │ └── response-rate.png ├── index.md ├── installation.md ├── key-concepts.md ├── latency-histogram.md ├── performance-tuning.md ├── raw-output.md ├── replaying-traffic.md ├── response-rate-chart.md ├── showcase │ ├── connections.png │ └── handshakes.png └── troubleshooting.md ├── mkdocs.yml ├── pcap ├── count-packets-over-time.lua ├── count-packets-per-ip.lua ├── cut-pcap.lua ├── extract-clients.lua ├── filter-dnsq.lua ├── generate-const-qps.lua ├── limit-clients.lua ├── merge-chunks.py └── split-clients.lua ├── pylintrc ├── replay.py ├── replay ├── dnssim │ ├── .gitignore │ ├── CHANGELOG.md │ ├── CMakeLists.txt │ ├── README.md │ └── src │ │ └── output │ │ ├── dnssim.c │ │ ├── dnssim.h │ │ ├── dnssim.lua │ │ └── dnssim │ │ ├── common.c │ │ ├── connection.c │ │ ├── https2.c │ │ ├── internal.h │ │ ├── ll.h │ │ ├── quic.c │ │ ├── tcp.c │ │ ├── tls.c │ │ └── udp.c ├── luaconfig.lua.j2 └── shotgun.lua ├── requirements.txt └── tools ├── merge-data.py ├── mplhlpr ├── README ├── presentation.mplstyle ├── shotgun.mplstyle └── styles.py ├── plot-client-distribution.py ├── plot-connections.py ├── plot-latency.py ├── plot-packet-rate.py └── plot-response-rate.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Tests 7 | .pytest_cache/ 8 | .mypy_cache/ 9 | 10 | # Data files 11 | *.pcap 12 | 13 | replay/dnssim/build* 14 | replay/dnssim/compile_commands.json 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Tests 7 | .pytest_cache/ 8 | .mypy_cache/ 9 | 10 | # Data files 11 | *.pcap 12 | 13 | # Built docs 14 | site/ 15 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | LC_ALL: C.UTF-8 3 | GIT_SUBMODULE_STRATEGY: recursive 4 | GIT_STRATEGY: clone # sometimes unclean submodule dirs otherwise 5 | DNSJIT_IMAGE_NAME: knot/shotgun/ci/dnsjit 6 | 7 | stages: 8 | - docker 9 | - test 10 | - deploy 11 | 12 | .rebuild-image-rule: &rebuild-image-rule 13 | if: $CI_PIPELINE_SOURCE == "push" 14 | changes: 15 | - ci/dnsjit/**/* 16 | - .gitlab-ci.yml 17 | 18 | dnsjit-image: 19 | stage: docker 20 | services: 21 | - docker:23-dind 22 | image: docker:23-dind 23 | tags: 24 | - amd64 25 | - dind 26 | rules: 27 | - <<: *rebuild-image-rule 28 | script: 29 | # We concatenate '$CI_COMMIT_TAG$CI_COMMIT_TAG' here - they are exclusive, 30 | # only one of them should apply 31 | - docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY" 32 | - docker pull "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest" || true # to use as many cached layers as possible (allowed to fail) 33 | - docker pull "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG" || true # to use as many cached layers as possible (allowed to fail) 34 | - docker build --build-arg "CI_REGISTRY=$CI_REGISTRY" --tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" ci/dnsjit 35 | - test "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH" 36 | && docker tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest" 37 | && echo "Created 'latest' tag" 38 | || echo "Not on '$CI_DEFAULT_BRANCH' branch - skipping" 39 | - test -n "$CI_COMMIT_BRANCH$CI_COMMIT_TAG" 40 | && docker tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG" 41 | && docker push "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG" 42 | && echo "Pushed '$CI_COMMIT_BRANCH$CI_COMMIT_TAG' tag" 43 | || echo "Not on a branch/tag" 44 | - test "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH" 45 | && docker tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest" 46 | && docker push "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest" 47 | && echo "Pushed 'latest' tag" 48 | || echo "Not on '$CI_DEFAULT_BRANCH' branch - skipping" 49 | 50 | .debian: &debian 51 | # Concatenating 52 | image: $CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG 53 | # This is unsupported in GitLab <16.1 54 | #needs: [] 55 | #rules: 56 | # - <<: *rebuild-image-rule 57 | # needs: 58 | # - job: dnsjit-image 59 | # artifacts: false 60 | # - when: on_success 61 | tags: 62 | - docker 63 | - linux 64 | - amd64 65 | 66 | black: 67 | <<: *debian 68 | stage: test 69 | script: 70 | - python3 -m black --check . --extend-exclude 'replay/dnssim/vendor' 71 | 72 | mypy: 73 | <<: *debian 74 | stage: test 75 | script: 76 | - ./ci/mypy-run.sh 77 | 78 | pylint: 79 | <<: *debian 80 | stage: test 81 | script: 82 | - pip3 install -r requirements.txt 83 | - ./ci/pylint-run.sh 84 | 85 | luacheck: 86 | <<: *debian 87 | stage: test 88 | script: 89 | - ./ci/luacheck-run.sh 90 | 91 | .dnssim-build: &dnssim-build 92 | <<: *debian 93 | stage: test 94 | script: 95 | - mkdir replay/dnssim/build 96 | - cd replay/dnssim/build 97 | - echo "ASAN=$DNSSIM_ASAN UBSAN=$DNSSIM_UBSAN" 98 | - cmake .. -DASAN=${DNSSIM_ASAN} -DUBSAN=${DNSSIM_UBSAN} 99 | - make 100 | 101 | dnssim-build: 102 | <<: *dnssim-build 103 | variables: 104 | DNSSIM_ASAN: 0 105 | DNSSIM_UBSAN: 0 106 | 107 | dnssim-build-sanitizers: 108 | <<: *dnssim-build 109 | variables: 110 | DNSSIM_ASAN: 1 111 | DNSSIM_UBSAN: 1 112 | 113 | docker: 114 | stage: deploy 115 | script: 116 | - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN $CI_REGISTRY 117 | - docker build --no-cache -t "$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG" . 118 | - docker push "$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG" 119 | tags: 120 | - dind 121 | only: 122 | - tags 123 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "replay/dnssim/vendor/ngtcp2"] 2 | path = replay/dnssim/vendor/ngtcp2 3 | url = https://github.com/ngtcp2/ngtcp2.git 4 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Nicki Křížek 2 | Nicki Křížek 3 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 2 | 3 | version: 2 4 | 5 | build: 6 | os: ubuntu-22.04 7 | tools: 8 | python: "3.11" 9 | 10 | mkdocs: 11 | configuration: mkdocs.yml 12 | 13 | # Build all formats 14 | formats: all 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=ubuntu:22.04 2 | FROM $BASE_IMAGE AS runtime_base 3 | MAINTAINER Petr Spacek 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt-get update -q 7 | RUN apt-get upgrade -y -q 8 | # required for PPA repo usage 9 | RUN apt-get install -y -q -o APT::Install-Suggests=0 -o APT::Install-Recommends=0 \ 10 | ca-certificates \ 11 | lsb-release 12 | 13 | # avoid PGP; keyring download here would have dependency on TLS anyway 14 | # and I don't want to hardcode PGP key ID here 15 | RUN echo "deb [trusted=yes] https://ppa.launchpadcontent.net/dns-oarc/dnsjit/ubuntu `lsb_release -c -s` main" > /etc/apt/sources.list.d/dns-oarc.list 16 | RUN apt-get update -q 17 | 18 | # shotgun's runtime depedencies 19 | RUN apt-get install -y -q -o APT::Install-Suggests=0 -o APT::Install-Recommends=0 \ 20 | dnsjit \ 21 | libnghttp2-14 \ 22 | libuv1 \ 23 | python3 \ 24 | python3-pip 25 | 26 | COPY requirements.txt /tmp/requirements.txt 27 | RUN pip3 install -r /tmp/requirements.txt 28 | 29 | # separate image for build, will not be tagged at the end 30 | FROM runtime_base AS build_stage 31 | RUN apt-get install -y -q -o APT::Install-Suggests=0 -o APT::Install-Recommends=0 \ 32 | cmake \ 33 | dnsjit-dev \ 34 | g++ \ 35 | gcc \ 36 | git \ 37 | jq \ 38 | libnghttp2-dev \ 39 | libuv1-dev \ 40 | ninja-build \ 41 | pkg-config \ 42 | tshark 43 | 44 | COPY . /shotgun 45 | RUN mkdir /shotgun/replay/dnssim/build 46 | WORKDIR /shotgun/replay/dnssim/build 47 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release -G Ninja 48 | RUN cmake --build . 49 | RUN cmake --install . 50 | 51 | # copy only installed artifacts, Shotgun repo and throw away everything else 52 | FROM runtime_base AS installed 53 | COPY --from=build_stage /usr/local /usr/local 54 | COPY . /shotgun 55 | WORKDIR /shotgun 56 | ENV PATH="${PATH}:/shotgun" 57 | 58 | # cleanup intended for docker build --squash 59 | RUN rm -rf /shotgun/.git 60 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | Next 2 | ==== 3 | 4 | Incompatible changes 5 | -------------------- 6 | - dnsjit 1.3.0+ is now required 7 | 8 | Bugfixes 9 | -------- 10 | - pcap/filter-dnsq: fix potential data mixup between packets 11 | 12 | DNS Shotgun v20240219 13 | ===================== 14 | 15 | Incompatible changes 16 | -------------------- 17 | - CMake is now being used to build dnssim instead of Autotools 18 | - GnuTLS 3.7.0+ is now required 19 | 20 | Improvements 21 | ------------ 22 | - pcap/extract-clients: always reset UDP port numbers to 53 (!56) 23 | - pcap/extract-clients: ability to write to stdout (!62) 24 | - pcap/filter-dnsq: skip 'special' queries for \*.dotnxdomain.net (!58) 25 | - pcap/split-clients: new tool to split larger PCAPs into smaller ones (!61) 26 | - pcap/merge-chunks: allow disabling randomization (!67) 27 | - tools/plot-latency: ability to diversify lines with linestyles (!69) 28 | - tools/plot-response-rate: estimate worst-case drop caused by discarded packets (!74) 29 | - tools/plot-packet-rate: handle incomplete last sampling period (!71) 30 | - tools/plot-response-rate: ability to ignore RCODEs with small response rate (!73) 31 | - pcap/filter-dnsq: ability to log malformed queries (!72) 32 | - pcap/generate-const-qps: new tool to generate constant QPS (!33) 33 | - tools: allow customizing plot charts with `SHOTGUN_MPLSTYLES` (!65) 34 | - replay: `--preload` argument, mainly for dnssim debugging with sanitizers (!76) 35 | - tools/plot-latency: use fractional values for humans in charts (!78) 36 | - pcap/extract-clients: warn if some input packets were skipped (!80) 37 | - dnssim: replace Autotools with CMake (!77, !86) 38 | - configs: DoH configs with exclusively GET/POST methods (!82) 39 | - tools/plot-response-rate: avoid division by zero (!89) 40 | - tools/plot-latency: denser labels to improve logarithmic scale readability (!90) 41 | - pcap/extract-clients: allow query rewriting - anonymization (!91) 42 | - Support for DNS-over-QUIC :tada: (!75) 43 | 44 | Bugfixes 45 | -------- 46 | - tools/plot-response-rate: avoid white lines on white background (!55) 47 | - tools/plot-client-distribution: properly handle file limit (!59) 48 | - pcap: proper PCAP write error handling (!60) 49 | - tools/plot-connections: set axis limits properly (!66) 50 | - tools/plot-packet-rate: trim chart whitespace (!79) 51 | - replay: do not exit silently when dnssim returns non-zero (!87) 52 | 53 | DNS Shotgun v20210714 54 | ===================== 55 | 56 | Incompatible changes 57 | -------------------- 58 | - dnsjit 1.2.0+ is now required 59 | - dnssim component was moved from dnsjit to this repository and has to be 60 | compiled separately (!52) 61 | 62 | Improvements 63 | ------------ 64 | - replay: new option `stop_after_s` added to input (!40) 65 | - tools/plot-packat-rate: new tool to plot packet rate in PCAPs (!46) 66 | - replay/shotgun: support reading PCAP from stdin (!36) 67 | - pcap/merge-chunks: utility to merge chunks on-the-fly (!37) 68 | - tools/plot-response-rate: add `--rcodes-above-pct` option (!43) 69 | - tools/plot-response-rate: add `--skip-total` option (!47) 70 | - tools/plot-response-rate: add `--sum-rcodes` option (!47) 71 | - tools/plot-latency: add `--group` option to plot aggregate data (!53) 72 | - pcap/cut-pcap: new script to effetively trim a sorted PCAP (!50) 73 | - configs: plot response rate chart with non-zero rcodes by default (!44) 74 | - pcap/extract-clients: add --seed option for deterministic splitting (!34) 75 | - pcap/filter-dnsq: improved malformed packet handling (!31) 76 | - pcap/extract-clients: detect non-monotonic timestamps (!31) 77 | - replay/shotgun: use faster mmpcap (!36) 78 | - tools/plot-response-rate: add markers for plotting rcodes (!41) 79 | - tools/plot-response-rate: improve color selection (!41, !42) 80 | 81 | Bugfixes 82 | -------- 83 | - pcap/extract-clients: fix imprecise timestamps and off-by-one error (!32) 84 | - tools/plot-client-distribution: fix handling of empty query count intervals (!38) 85 | - replay: add workaround to support /32 and /128 addresses (!39) 86 | 87 | 88 | DNS Shotgun v20210203 89 | ===================== 90 | 91 | - new user interface 92 | - replay.py is now the main tool instead of shotgun.lua 93 | - new configuration format to support complex scenarios 94 | - mixed-protocol traffic replay is supported 95 | - some scripts in `tools/` were renamed 96 | - fast scripts for processing PCAP data 97 | - can be found in `pcap/` 98 | - these replaced the now removed pellet.py and scale.py 99 | - added IPv4 support 100 | - DoH is now fully supported 101 | - comprehensive usage documentation is available at https://dns-shotgun.readthedocs.io 102 | - minimum required dnsjit version is 1.1.0 103 | 104 | 105 | DNS Shotgun v20200914 106 | ===================== 107 | 108 | - fully supported UDP, TCP and DNS-over-TLS with 109 | [dnsjit](https://github.com/DNS-OARC/dnsjit) 1.0.0 110 | - fully supported DNS-over-HTTPS with development version of dnsjit 111 | - traffic can be replayed only over IPv6 112 | - user interface 113 | - may be unstable 114 | - only very basic UI available 115 | - more complex scenarios are no supported yet 116 | (e.g. simultaneously using multiple protocols) 117 | - pellet.py is functional, but it is very slow and requires python-dpkt from 118 | master 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DNS Shotgun 2 | 3 | Realistic DNS benchmarking tool which supports multiple transport protocols: 4 | 5 | - **DNS-over-TLS (DoT)** 6 | - **DNS-over-HTTPS (DoH)** 7 | - UDP 8 | - TCP 9 | 10 | *DNS Shotgun is capable of simulating hundreds of thousands of DoT/DoH 11 | clients.* 12 | 13 | Every client establishes its own connection(s) when communicating over 14 | TCP-based protocol. This makes the tool uniquely suited for realistic DoT/DoH 15 | benchmarks since its traffic patterns are very similar to real clients. 16 | 17 | DNS Shotgun exports a number of statistics, such as query latencies, number of 18 | handshakes and connections, response rate, response codes etc. in JSON format. 19 | The toolchain also provides scripts that can plot these into readable charts. 20 | 21 | ## Features 22 | 23 | - Supports DNS over UDP, TCP, TLS and HTTP/2 24 | - Allows mixed-protocol simultaneous benchmark/testing 25 | - Can bind to multiple source IP addresses 26 | - Customizable client behaviour (idle time, TLS versions, HTTP method, ...) 27 | - Replays captured queries over selected protocol(s) while keeping original timing 28 | - Suitable for high-performance realistic benchmarks 29 | - Tools to plot charts from output data to evaluate results 30 | 31 | ## Caveats 32 | 33 | - Requires captured traffic from clients 34 | - Setup for proper benchmarks can be quite complex 35 | - Isn't suitable for testing with very low number of clients/queries 36 | - Backward compatibility between versions isn't kept 37 | 38 | ## Documentation 39 | 40 | **The latest documentation can be found at .** 41 | 42 | For installation and/or build instructions, see the 43 | [Installation section](https://dns-shotgun.readthedocs.io/en/stable/installation.html) 44 | of the documentation. The same section also contains instructions for building 45 | the documentation itself. 46 | 47 | ## Showcase 48 | 49 | The following charts highlight the unique capabilities of DNS Shotgun. 50 | Measurements are demonstrated using DNS over TCP. In our test setup, DNS 51 | Shotgun was able to keep sending/receiving: 52 | 53 | - 400k queries per second over 54 | - **500k simultaneously active TCP connections**, with about 55 | - 25k handshakes per second, which amounts to 56 | - 1.6M total established connections during the 60s test runtime. 57 | 58 | ![Active Connections](docs/showcase/connections.png) 59 | ![Handshakes](docs/showcase/handshakes.png) 60 | 61 | ### Socket statistics on server 62 | 63 | ``` 64 | # ss -s 65 | Total: 498799 (kernel 0) 66 | TCP: 498678 (estab 498466, closed 52, orphaned 0, synrecv 0, timewait 54/0), ports 0 67 | 68 | Transport Total IP IPv6 69 | * 0 - - 70 | RAW 4 1 3 71 | UDP 19 2 17 72 | TCP 498626 5 498621 73 | INET 498649 8 498641 74 | FRAG 0 0 0 75 | ``` 76 | 77 | ### Test setup 78 | 79 | - DNS over TCP against [TCP echo server](https://gitlab.nic.cz/knot/echo-server) 80 | - two physical servers: one for DNS Shotgun, another for the echo server 81 | - both servers have 16 CPUs, 32 GB RAM, 10GbE network card (up to 64 queues) 82 | - servers were connected directly to each other - no latency 83 | - TCP network stack was tuned and there was no firewall 84 | 85 | ## License 86 | 87 | DNS Shotgun is released under GPLv3 or later. 88 | 89 | ## Thanks 90 | 91 | We'd like to thank the [Comcast Innovation 92 | Fund](https://innovationfund.comcast.com) for sponsoring the work to support 93 | the use of TCP, DoT and DoH protocols. 94 | 95 | DNS Shogun is built of top of the [dnsjit](https://github.com/DNS-OARC/dnsjit) 96 | engine. We'd like to thank DNS-OARC and Jerry Lundström for the development and 97 | continued support of dnsjit. 98 | -------------------------------------------------------------------------------- /ci/dnsjit/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG DNSJIT_VERSION 2 | ARG CI_REGISTRY=registry.nic.cz 3 | FROM $CI_REGISTRY/knot/knot-resolver/ci/debian-12:knot-3.2 4 | 5 | ENV DNSJIT_VERSION=${DNSJIT_VERSION:-1.3.0} 6 | 7 | RUN apt-get update && \ 8 | apt-get install -y cmake libgnutls28-dev libpcap-dev libck-dev 9 | 10 | RUN pip install --break-system-packages --no-input \ 11 | types-toml black mypy pylint 12 | 13 | RUN wget "https://github.com/DNS-OARC/dnsjit/archive/refs/tags/v${DNSJIT_VERSION}.tar.gz" \ 14 | --output-document 'dnsjit.tar.gz' && \ 15 | tar -xzf dnsjit.tar.gz 16 | WORKDIR dnsjit-${DNSJIT_VERSION} 17 | RUN sh autogen.sh && \ 18 | ./configure --prefix=/usr && \ 19 | make && \ 20 | make install 21 | -------------------------------------------------------------------------------- /ci/luacheck-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | luacheck --codes --formatter TAP . 5 | -------------------------------------------------------------------------------- /ci/mypy-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Find Python scripts 5 | FILES=$(find . \ 6 | -path './ci' -prune -o \ 7 | -path './.git' -prune -o \ 8 | -path './replay/dnssim/vendor' -prune -o \ 9 | -name '*.py' -print) 10 | 11 | python3 -m mypy \ 12 | --explicit-package-bases \ 13 | --ignore-missing-imports ${FILES} 14 | -------------------------------------------------------------------------------- /ci/pylint-run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Find Python modules and standalone Python scripts 5 | FILES=$(find . \ 6 | -type d -exec test -e '{}/__init__.py' \; -print -prune -o \ 7 | -path './ci' -prune -o \ 8 | -path './.git' -prune -o \ 9 | -path './replay/dnssim/vendor' -prune -o \ 10 | -name '*.py' -print) 11 | 12 | python3 -m pylint -j 0 --rcfile pylintrc ${FILES} 13 | -------------------------------------------------------------------------------- /configs/doh-get.toml: -------------------------------------------------------------------------------- 1 | # DNS-over-HTTPS traffic senders using HTTP/2 over TLS. 2 | # 3 | # Below is configured a traffic sender that represents well behaved clients 4 | # using GET method to send queries. 5 | # 6 | # For other client types, refer to DoT or TCP configuration examples. All 7 | # options used there also make sense for configuring DoH client behaviour, most 8 | # notably connection idle timeout, TLS session resumption or TLS versions. 9 | [traffic] 10 | 11 | # Well behaved DNS-over-HTTPS clients using GET method. 12 | [traffic.DoH-GET] 13 | protocol = "doh" 14 | http_method = "GET" 15 | 16 | 17 | [charts] 18 | 19 | [charts.latency] 20 | type = "latency" 21 | 22 | [charts.response-rate] 23 | type = "response-rate" 24 | 25 | [charts.response-rate-rcodes] 26 | type = "response-rate" 27 | rcodes-above-pct = 0 28 | 29 | [charts.connections] 30 | type = "connections" 31 | kind = "active" 32 | 33 | [charts.handshakes] 34 | type = "connections" 35 | kind = ["conn_hs", "failed_hs"] 36 | title = "Handshakes over Time" 37 | 38 | [charts.resumptions] 39 | type = "connections" 40 | kind = ["conn_hs", "tls_resumed"] 41 | title = "TLS Session Resumption for New Connections" 42 | -------------------------------------------------------------------------------- /configs/doh-post.toml: -------------------------------------------------------------------------------- 1 | # DNS-over-HTTPS traffic senders using HTTP/2 over TLS. 2 | # 3 | # Below is configured a traffic sender that represents well behaved clients 4 | # using POST method to send queries. 5 | # 6 | # For other client types, refer to DoT or TCP configuration examples. All 7 | # options used there also make sense for configuring DoH client behaviour, most 8 | # notably connection idle timeout, TLS session resumption or TLS versions. 9 | [traffic] 10 | 11 | # Well behvaed DNS-over-HTTPS clients using POST method. 12 | [traffic.DoH-POST] 13 | protocol = "doh" 14 | http_method = "POST" 15 | 16 | 17 | [charts] 18 | 19 | [charts.latency] 20 | type = "latency" 21 | 22 | [charts.response-rate] 23 | type = "response-rate" 24 | 25 | [charts.response-rate-rcodes] 26 | type = "response-rate" 27 | rcodes-above-pct = 0 28 | 29 | [charts.connections] 30 | type = "connections" 31 | kind = "active" 32 | 33 | [charts.handshakes] 34 | type = "connections" 35 | kind = ["conn_hs", "failed_hs"] 36 | title = "Handshakes over Time" 37 | 38 | [charts.resumptions] 39 | type = "connections" 40 | kind = ["conn_hs", "tls_resumed"] 41 | title = "TLS Session Resumption for New Connections" 42 | -------------------------------------------------------------------------------- /configs/doh.toml: -------------------------------------------------------------------------------- 1 | # DNS-over-HTTPS traffic senders using HTTP/2 over TLS. 2 | # 3 | # Below are configured two traffic senders that both represent well behaved 4 | # clients using either GET or POST method to send queries. 5 | # 6 | # For other client types, refer to DoT or TCP configuration examples. All 7 | # options used there also make sense for configuring DoH client behaviour, most 8 | # notably connection idle timeout, TLS session resumption or TLS versions. 9 | [traffic] 10 | 11 | # Well behaved DNS-over-HTTPS clients using GET method. 12 | [traffic.DoH-GET] 13 | protocol = "doh" 14 | http_method = "GET" 15 | 16 | # Well behvaed DNS-over-HTTPS clients using POST method. 17 | [traffic.DoH-POST] 18 | protocol = "doh" 19 | http_method = "POST" 20 | 21 | 22 | [charts] 23 | 24 | [charts.latency] 25 | type = "latency" 26 | 27 | [charts.response-rate] 28 | type = "response-rate" 29 | 30 | [charts.response-rate-rcodes] 31 | type = "response-rate" 32 | rcodes-above-pct = 0 33 | 34 | [charts.connections] 35 | type = "connections" 36 | kind = "active" 37 | 38 | [charts.handshakes] 39 | type = "connections" 40 | kind = ["conn_hs", "failed_hs"] 41 | title = "Handshakes over Time" 42 | 43 | [charts.resumptions] 44 | type = "connections" 45 | kind = ["conn_hs", "tls_resumed"] 46 | title = "TLS Session Resumption for New Connections" 47 | -------------------------------------------------------------------------------- /configs/doq.toml: -------------------------------------------------------------------------------- 1 | # DNS-over-QUIC traffic senders. 2 | # 3 | # Below is a configuration example for DoQ clients. You may use this as 4 | # inspiration to craft your own client-types. 5 | # 6 | # GnuTLS priority string (with the key `gnutls_priority`) may be used to control 7 | # TLS protocol version, used ciphers suites etc. For more GnuTLS priority 8 | # options, refer to: https://gnutls.org/manual/html_node/Priority-Strings.html 9 | [traffic] 10 | 11 | # Default DoQ clients 12 | [traffic.DoQ] 13 | protocol = "doq" 14 | 15 | # Aggressive DNS-over-QUIC clients that terminate connection as soon as queries are answered. 16 | #[traffic.DoQ-no_idle] 17 | #protocol = "doq" 18 | #idle_timeout_s = 0 19 | 20 | # DoQ clients without TLS Session Resumption 21 | #[traffic.DoQ-no_tickets] 22 | #protocol = "doq" 23 | #gnutls_priority = "dnssim-default:%NO_TICKETS" 24 | #zero_rtt = false # 0-RTT makes no sense without tickets 25 | 26 | # DoQ clients without QUIC 0-RTT 27 | #[traffic.DoQ-no_0rtt] 28 | #protocol = "doq" 29 | #zero_rtt = false 30 | 31 | [charts] 32 | 33 | [charts.latency] 34 | type = "latency" 35 | 36 | [charts.response-rate] 37 | type = "response-rate" 38 | 39 | [charts.response-rate-rcodes] 40 | type = "response-rate" 41 | rcodes-above-pct = 0 42 | 43 | [charts.connections] 44 | type = "connections" 45 | kind = "active" 46 | 47 | [charts.handshakes] 48 | type = "connections" 49 | kind = ["conn_hs", "failed_hs"] 50 | title = "Handshakes over Time" 51 | 52 | [charts.resumptions] 53 | type = "connections" 54 | kind = ["conn_hs", "tls_resumed"] 55 | title = "TLS Session Resumption for New Connections" 56 | 57 | [charts.0rtt] 58 | type = "connections" 59 | kind = ["conn_hs", "quic_0rtt"] 60 | title = "QUIC 0-RTT for New Connections" 61 | -------------------------------------------------------------------------------- /configs/dot.toml: -------------------------------------------------------------------------------- 1 | # DNS-over-TLS traffic senders. 2 | # 3 | # Below are some examples of DoT clients with various behaviours. You can use 4 | # these as inspiration to craft your own client-types. Only the well behaved 5 | # client is enabled for this default "dot" scenario. 6 | # 7 | # GnuTLS priority string can be used to control TLS protocol version, used 8 | # ciphers suites, TLS Session Resumption support etc. For more GnuTLS priority 9 | # options, refer to: https://gnutls.org/manual/html_node/Priority-Strings.html 10 | [traffic] 11 | 12 | # Well-behaved DoT clients utilizing both idle connection and TLS session resumption. 13 | [traffic.DoT] 14 | protocol = "dot" 15 | 16 | # DoT clients without TLS Session Resumption 17 | #[traffic.DoT-no_tickets] 18 | #protocol = "dot" 19 | #gnutls_priority = "dnssim-default:%NO_TICKETS" 20 | 21 | # Aggressive DoT clients with TLS Session Resumption 22 | #[traffic.DoT-no_idle] 23 | #protocol = "dot" 24 | #idle_timeout_s = 0 25 | 26 | # Aggressive DoT clients without TLS Session Resumption 27 | #[traffic.DoT-no_idle-no_tickets] 28 | #protocol = "dot" 29 | #idle_timeout_s = 0 30 | #gnutls_priority = "dnssim-default:%NO_TICKETS" 31 | 32 | # DoT clients with TLS 1.3 33 | #[traffic.DoT-tls1_3] 34 | #protocol = "dot" 35 | #gnutls_priority = "dnssim-default:-VERS-ALL:+VERS-TLS1.3" 36 | 37 | 38 | [charts] 39 | 40 | [charts.latency] 41 | type = "latency" 42 | 43 | [charts.response-rate] 44 | type = "response-rate" 45 | 46 | [charts.response-rate-rcodes] 47 | type = "response-rate" 48 | rcodes-above-pct = 0 49 | 50 | [charts.connections] 51 | type = "connections" 52 | kind = "active" 53 | 54 | [charts.handshakes] 55 | type = "connections" 56 | kind = ["conn_hs", "failed_hs"] 57 | title = "Handshakes over Time" 58 | 59 | [charts.resumptions] 60 | type = "connections" 61 | kind = ["conn_hs", "tls_resumed"] 62 | title = "TLS Session Resumption for New Connections" 63 | -------------------------------------------------------------------------------- /configs/mixed.toml: -------------------------------------------------------------------------------- 1 | # Complex scenario simulating clients using multiple types of supported 2 | # protocols (Do53 - UDP and TCP, DoH and DoT). 3 | # 4 | # Clients are randomly assigned exactly one of the configured traffic settings. 5 | # The assignment respects the configured weights. In the following example: 6 | # 60 % of clients use UDP 7 | # 5 % + 5 % of clients use TCP (with/without utilizing idle connections) 8 | # 10 % + 5 % of clients use DoT (with/without TLS Session Resumption) 9 | # 10 % + 5 % of clients use DoH (using GET or POST) 10 | # 11 | # You can use this example as a starting point to create your own 12 | # configurations with various clients and traffic distribution. Refer to other 13 | # example configs for more ideas of clients types. 14 | [traffic] 15 | 16 | # DNS-over-UDP clients. 17 | [traffic.UDP] 18 | protocol = "udp" 19 | weight = 60 20 | 21 | # Well-behaved DNS-over-TCP clients. 22 | [traffic.TCP] 23 | protocol = "tcp" 24 | weight = 5 25 | 26 | # Aggressive DNS-over-TCP clients. 27 | [traffic.TCP-no_idle] 28 | protocol = "tcp" 29 | weight = 5 30 | idle_timeout_s = 0 31 | 32 | # Well-behaved DNS-over-TLS clients. 33 | [traffic.DoT] 34 | protocol = "dot" 35 | weight = 10 36 | 37 | # Well-behaved DNS-over-TLS clients without TLS Session Resumption. 38 | [traffic.DoT-no_tickets] 39 | protocol = "dot" 40 | weight = 5 41 | gnutls_priority = "dnssim-default:%NO_TICKETS" 42 | 43 | # Well-behaved DNS-over-HTTPS clients using GET method. 44 | [traffic.DoH-GET] 45 | protocol = "doh" 46 | weight = 10 47 | http_method = "GET" 48 | 49 | # Well-behaved DNS-over-HTTPS clients using POST method. 50 | [traffic.DoH-POST] 51 | protocol = "doh" 52 | weight = 5 53 | http_method = "POST" 54 | 55 | 56 | [charts] 57 | 58 | [charts.latency] 59 | type = "latency" 60 | 61 | [charts.response-rate] 62 | type = "response-rate" 63 | 64 | [charts.response-rate-rcodes] 65 | type = "response-rate" 66 | rcodes-above-pct = 0 67 | -------------------------------------------------------------------------------- /configs/tcp.toml: -------------------------------------------------------------------------------- 1 | # DNS-over-TCP traffic senders. 2 | [traffic] 3 | 4 | # Well behaved DNS-over-TCP clients utilizing idle connection (default). 5 | [traffic.TCP] 6 | protocol = "tcp" 7 | 8 | # Aggressive DNS-over-TCP clients that terminate connection as soon as queries are answered. 9 | #[traffic.TCP-no_idle] 10 | #protocol = "tcp" 11 | #idle_timeout_s = 0 12 | 13 | 14 | [charts] 15 | 16 | [charts.latency] 17 | type = "latency" 18 | 19 | [charts.response-rate] 20 | type = "response-rate" 21 | 22 | [charts.response-rate-rcodes] 23 | type = "response-rate" 24 | rcodes-above-pct = 0 25 | 26 | [charts.connections] 27 | type = "connections" 28 | kind = "active" 29 | 30 | [charts.handshakes] 31 | type = "connections" 32 | kind = ["conn_hs", "failed_hs"] 33 | title = "Handshakes over Time" 34 | -------------------------------------------------------------------------------- /configs/udp.toml: -------------------------------------------------------------------------------- 1 | # Plain DNS over UDP traffic sender. 2 | [traffic] 3 | 4 | # DNS-over-UDP clients. 5 | [traffic.UDP] 6 | protocol = "udp" 7 | 8 | 9 | [charts] 10 | 11 | [charts.latency] 12 | type = "latency" 13 | 14 | [charts.response-rate] 15 | type = "response-rate" 16 | 17 | [charts.response-rate-rcodes] 18 | type = "response-rate" 19 | rcodes-above-pct = 0 20 | -------------------------------------------------------------------------------- /docs/analyzing-clients.md: -------------------------------------------------------------------------------- 1 | # Analyzing Clients 2 | 3 | When you've created a pellets file that is ready to use for DNS Shotgun replay, 4 | you may want to verify you didn't distort the original client population. There 5 | is a tool that can be used to compare client distribution and activity between 6 | the original traffic capture and the pellets file. 7 | 8 | !!! note 9 | This steps is optional and may not be neccessary for larger client 10 | populations or for client populations with similar behaviour. Nevertheless, 11 | it's better to check your assumptions. 12 | 13 | First, you need to run client analysis script for both the original capture (or 14 | rather the `filtered.pcap` file) and the processed pellets file. 15 | 16 | ``` 17 | $ pcap/count-packets-per-ip.lua -r filtered.pcap --csv filtered.csv 18 | $ pcap/count-packets-per-ip.lua -r pellets.pcap --csv pellets.csv 19 | ``` 20 | 21 | Then, you can use another tool to plot a chart of these results. 22 | 23 | ``` 24 | $ tools/plot-client-distribution.py -o clients.png filtered.csv pellets.csv 25 | ``` 26 | 27 | ## Client distribution chart 28 | 29 | The following charts demonstrates how queries are distributed among clients. It 30 | can be used to read how active are your clients or how many overall queries 31 | your resolver receives from which clients. 32 | 33 | !!! warning 34 | The following chart displays absolute number of queries, not QPS. When 35 | comparing multiple distributions, always make sure to use PCAPs of the same 36 | duration. 37 | 38 | ![Client distribution chart](img/clients.png) 39 | 40 | There are several blobs on the chart that represent groups of clients. The area 41 | of the blob visually signifies the total amount of queries that were received 42 | from these clients. 43 | 44 | For each blob, you can locate its center and read the X and Y axes values. 45 | Please note that both axis are logarithmic. On the Y-axis you can read the mean 46 | number of queries that a client represented in the blob has sent. On the 47 | X-axis, you can read the percentage of clients that are represented by this 48 | blob. 49 | 50 | In the example above, the first blob from the left shows that almost 80 % of 51 | clients send less than 10 queries. Around 20 % of clients send between 10 and 52 | 100 queries. Even though the remaining clients represent around 1 % of the 53 | total client population, we can see that these clients generate significant 54 | query traffic. 55 | 56 | The comparison shows the two samples are quite similar. In case these 57 | differences are significant, you may want to consider changes to pellets files. 58 | If you used `pcap/limit-clients.lua` to generate these, using a different 59 | `-s/--seed` might help. 60 | -------------------------------------------------------------------------------- /docs/capturing-traffic.md: -------------------------------------------------------------------------------- 1 | # Capturing Traffic 2 | 3 | When replaying traffic using DNS Shotgun, you need to provide it with a PCAP 4 | that contains extracted client data, or "*pellets*". You may not use an 5 | arbitrary PCAP file. Instead, you must pre-process the raw PCAP capture into 6 | pellets as described in the following sections. 7 | 8 | !!! note 9 | DNS Shotgun's measurements are only as good as the data you feed it. 10 | Quality of input data that most accurately represents your clients is 11 | crucial for realistic benchmarking. Results can vary greatly for different 12 | client populations. 13 | 14 | ## Raw capture assumptions 15 | 16 | To start, you need a traffic capture from your network to work with. It only 17 | needs to contain UDP DNS queries from clients towards your resolver. Other 18 | traffic may be present as well, but it will be filtered out. 19 | 20 | ### Packets must be sorted by increasing timestamp 21 | 22 | Some network or hardware conditions may cause the packets to appear in 23 | different order. To ensure correct order, use the `reodercap` command from 24 | tshark/wireshark. 25 | 26 | ``` 27 | $ reordercap raw.pcap ordered.pcap 28 | ``` 29 | 30 | ### Unique IP means unique client 31 | 32 | Client needs to be somehow identified in the captured traffic. We decided to 33 | use IP address to tell clients apart. This should be a reasonable assumption, 34 | unless your clients are behind NAT. 35 | 36 | !!! warning 37 | If your real clients are behind NAT, this has major consequences and should 38 | be acounted for, since multiple real clients will be bundled in a single 39 | simulated one. 40 | 41 | ### Only UDP packets are used 42 | 43 | If large number of your clients already use DoT, DoH or TCP, you need to 44 | somehow get their queries into plain UDP format. For example, Knot Resolver can 45 | [mirror](https://knot-resolver.readthedocs.io/en/v5.2.1/modules-policy.html#policy.MIRROR) 46 | incoming queries to UDP. 47 | 48 | ## Filtering DNS queries 49 | 50 | In this step, UDP DNS queries from clients are extracted from the raw PCAP. If 51 | the raw capture includes queries from resolver to upstream servers, it is 52 | _crucial_ to provide the script with resolver IP address(es) to filter out 53 | outgoing queries. 54 | 55 | ``` 56 | $ pcap/filter-dnsq.lua -r ordered.pcap -w filtered.pcap -a $RESOLVER_IP 57 | ``` 58 | 59 | !!! tip 60 | You may also use this script to work with traffic directly captured from 61 | interface chosen with `-i`. See `--help` for usage. 62 | -------------------------------------------------------------------------------- /docs/configuration-file.md: -------------------------------------------------------------------------------- 1 | # Configuration File 2 | 3 | !!! tip 4 | You can find configuration files for presets in 5 | [`config/`](https://gitlab.nic.cz/knot/shotgun/-/tree/master/config). They 6 | are an excellent starting point to create your own configurations. 7 | 8 | Configuration is written in [TOML](https://toml.io/en/). There are multiple sections that may have additional subsections. 9 | 10 | - `[traffic]` contains one or more subsections that each define client behaviour, including protocol 11 | - `[charts]` is an optional section which can contain subsections that define charts that should be automatically plotted 12 | - `[defaults.traffic]` is an optional section that makes it possible specify defaults shared by all traffic senders 13 | - `[input]` is an optional section that specifies how to read input data 14 | 15 | ## [traffic] section 16 | 17 | You can define one or more traffic senders with specific client behaviour. Every traffic sender has a name and may have multiple parameters. At the very least, each traffic sender must define `protocol`. 18 | 19 | This is an example of minimal configuration file sending all traffic as DNS-over-TLS using defaults for everything. The name of the traffic sender here is "DoT". 20 | 21 | ``` 22 | [traffic] 23 | [traffic.DoT] 24 | protocol = "dot" 25 | ``` 26 | 27 | The following configuration parameters for traffic senders are supported. 28 | 29 | ### protocol 30 | 31 | - `udp`: DNS over UDP 32 | - `tcp`: DNS over TCP 33 | - `dot`: DNS over TLS over TCP 34 | - `doh`: DNS over HTTP/2 over TLS over TCP 35 | - `doq`: DNS over QUIC 36 | 37 | ### weight 38 | 39 | When multiple traffic senders are defined, weight affects the client 40 | distribution between them. Weight is relative to the sum of all weights. 41 | 42 | Integer or float. Defaults to 1. 43 | 44 | 45 | ### idle_time_s 46 | 47 | Determines whether clients keep the connection in idle state, i.e. leaving it 48 | established after they have received all answers and currently have no more 49 | queries to send. Idle time of 0 means the client will close the connection as 50 | soon as possible. 51 | 52 | Integer. Defaults to 10 seconds. 53 | 54 | ### gnutls_priority 55 | 56 | [GnuTLS priority string](https://gnutls.org/manual/html_node/Priority-Strings.html) 57 | which can be used to select TLS protocol version and features, for example: 58 | 59 | ``` 60 | gnutls_priority = "dnssim-default:%NO_TICKETS" # don't use TLS Session Resumption 61 | gnutls_priority = "dnssim-default:-VERS-ALL:+VERS-TLS1.3" # only use TLS 1.3 62 | ``` 63 | 64 | String. 65 | 66 | A non-standard `dnssim-default` (case-sensitive) keyword is allowed to be at the 67 | beginning of the priority string, optionally with additional keywords separated 68 | by colons (`:`). For conventional TLS over TCP connections, this gets replaced 69 | by `NORMAL`, which lets the system's GnuTLS library determine the default 70 | settings. For QUIC, this always sets the minimum TLS version to 1.3 as dictated 71 | by [section 4.2 of RFC 9001](https://www.rfc-editor.org/rfc/rfc9001.html#section-4.2) 72 | and disables some of the less secure ciphers (settings taken from 73 | [Knot DNS](https://gitlab.nic.cz/knot/knot-dns/-/blob/v3.3.2/src/libknot/quic/quic.c#L50)). 74 | 75 | !!! warning 76 | When `%NO_TICKETS` is set here for DNS-over-QUIC transport, `zero_rtt` 77 | needs to be **disabled**. Otherwise, DNS Shotgun may misbehave. 78 | 79 | 80 | ### http_method 81 | 82 | - `GET` 83 | - `POST` 84 | 85 | ### timeout_s 86 | 87 | Individual query timeout in seconds. 88 | 89 | Integer. Defaults to 2 seconds. 90 | 91 | !!! warning 92 | Increasing the query timeout can negatively impact DNS Shotgun's 93 | performance and is not recommended. 94 | 95 | ### handshake_timeout_s 96 | 97 | Timeout for establishing a connection in seconds. 98 | 99 | Integer. Defaults to 5 seconds. 100 | 101 | ### zero_rtt 102 | 103 | Whether 0-RTT (early data) is enabled. Only applicable to DNS-over-QUIC. 104 | 105 | Boolean. Defaults to `true`. 106 | 107 | ### Advanced settings 108 | 109 | You shouldn't use these unless you need to. 110 | 111 | - `cpu_factor`: override the default CPU thread distribution (UDP: 1, TCP:2, DoT/DoH: 3) 112 | - `max_clients`: number of clients each dnssim instance can hold (per-thread settings) 113 | - `channel_size`: number of queries that can be buffered before thread starts to block 114 | - `batch_size`: number of queries processed in each loop 115 | 116 | ### CLI overrides 117 | 118 | The following options can be used to override the CLI options for `replay.py`. 119 | Values in configuration file always take precedence before CLI options. 120 | 121 | - `server`: target server's IPv4/IPv6 address 122 | - `dns_port`: target server's port for plain DNS (UDP and TCP) 123 | - `dot_port`: target server's port for DNS-over-TLS 124 | - `doh_port`: target server's port for DNS-over-HTTPS 125 | 126 | ## [charts] section 127 | 128 | This section is optional and is only provided as a convenience to automate 129 | plotting charts after the test. Anything defined in this section can be 130 | achieved by using the plotting scripts directly. 131 | 132 | Similarly to the `[traffic]` section, it also contains named subsections. Every 133 | such subsection must contain `type` which determines the charts that should be 134 | plotted. For example: 135 | 136 | ``` 137 | [charts] 138 | [charts.response-rate] 139 | type = "response-rate" 140 | ``` 141 | 142 | ### type 143 | 144 | Type determines which chart will be plotted. The following charts are supported: 145 | 146 | - `response-rate`: [Response Rate Chart](response-rate-chart.md) 147 | - `latency`: [Latency Histogram](latency-histogram.md) 148 | - `connections`: [Connection Chart](connection-chart.md) 149 | 150 | ### title 151 | 152 | Title of the chart. 153 | 154 | ### output 155 | 156 | Output filename for the chart. Various file extensions can be used. Defaults to using svg. 157 | 158 | ### Other parameters 159 | 160 | These depend on the specific chart type. Generally, any option that can be 161 | passed directly to the plotting scripts can also be specified in the config. 162 | Refer to the tools `--help` for possible options. 163 | 164 | ## [defaults] section 165 | 166 | ### [defaults.traffic] section 167 | 168 | This section can provide defaults for all traffic senders. If a specific 169 | traffic sender re-defines the same parameter, the traffic sender-specific value 170 | takes precedence before the default value. 171 | 172 | Any parameter that can be specified for traffic senders in `[traffic]` section 173 | can also be specified in this section. For example, to override the default 174 | behavior to not use TLS Session Resumption, you can use: 175 | 176 | ``` 177 | [defaults] 178 | [defaults.traffic] 179 | gnutls_priority = "dnssim-default:%NO_TICKETS" 180 | ``` 181 | 182 | ## [input] section 183 | 184 | Optionally specifies how to read input data. 185 | 186 | ``` 187 | [input] 188 | pcap = /path/to/input.pcap 189 | stop_after_s = 600 190 | ``` 191 | 192 | ### pcap 193 | 194 | Path to PCAP file, overrides value specified by `--read` command line option. 195 | Intended as shortcut when re-running test with the same dataset again and again. 196 | 197 | ### stop_after_s 198 | 199 | Time limit for test, in seconds (integer). 200 | Reading queries from PCAP will stop at first packet with timestamp >= `stop_after_s`. 201 | 202 | Defaults to no limit, i.e. read all packets from PCAP. 203 | 204 | !!! warning 205 | Using the `stop_after_s` option negatively impacts DNS Shotgun's read 206 | performance and slows down PCAP processing by 50 %. If this performance 207 | penalty is unacceptable, cut the PCAP using external tools and avoid using 208 | this option. 209 | -------------------------------------------------------------------------------- /docs/configuration-presets.md: -------------------------------------------------------------------------------- 1 | # Configuration Presets 2 | 3 | You can either use a configuration preset or create your own configuration. It 4 | is possible to replay the original traffic over various different protocols 5 | with different client behaviours simultaneously. For example, you can split 6 | your traffic into 60 % UDP, 20 % DoT and 20 % DoH. 7 | 8 | There are the following predefined use-cases for simplicity of use without the 9 | need to create a configuration file. You can pass these values instead of 10 | filepath to `-c/--config` option of `replay.py` utility. 11 | 12 | - `udp` 13 | - 100 % DNS-over-UDP clients 14 | - `tcp` 15 | - 100 % well-behaved DNS-over-TCP clients 16 | - `dot` 17 | - 100 % well-behaved DNS-over-TLS clients using TLS Session Resumption 18 | - `doh` 19 | - 50 % well-behaved DNS-over-HTTPS GET clients using TLS Session Resumption 20 | - 50 % well-behaved DNS-over-HTTPS POST clients using TLS Session Resumption 21 | - `doq` 22 | - 100 % well-behaved DNS-over-QUIC clients 23 | - `mixed` 24 | - 60 % DNS-over-UDP clients 25 | - 5 % well-behaved DNS-over-TCP clients 26 | - 5 % aggressive DNS-over-TCP clients 27 | - 10 % well-behaved DNS-over-TLS clients using TLS Session Resumption 28 | - 5 % well-behaved DNS-over-TLS clients without TLS Session Resumption 29 | - 10 % well-behaved DNS-over-HTTPS GET clients using TLS Session Resumption 30 | - 5 % well-behaved DNS-over-HTTPS POST clients using TLS Session Resumption 31 | 32 | !!! note 33 | You can find configuration files for presets in 34 | [`config/`](https://gitlab.nic.cz/knot/shotgun/-/tree/master/config). They 35 | are an excellent starting point to create your own configurations. 36 | -------------------------------------------------------------------------------- /docs/connection-chart.md: -------------------------------------------------------------------------------- 1 | # Connection Chart 2 | 3 | The connection chart can be used to visualize connection-related information, 4 | such as the number of active established connections, handshake attempts, 5 | successful TLS Session Resumptions or failed handshakes. 6 | 7 | ``` 8 | $ tools/plot-connections.py -k active -- DoT.json 9 | $ tools/plot-connections.py -k conn_hs tls_resumed failed_hs -t "Handshakes over Time" DoT.json 10 | ``` 11 | 12 | The optional parameter `-k/--kind` can be used to select which data should be 13 | plotted. The following values are supported. 14 | 15 | - `active` means the number of currently active established connections 16 | - `conn_hs` means the number of TCP or QUIC handshake attempts in the last second 17 | - `failed_hs` means the number of failed handshakes. All kinds of connection 18 | setup failures will be included, whether it's TCP handshake timeout, TLS 19 | negotiation failure or anything else. 20 | - `tls_resumed` means the number of connection that were resumed with TLS 21 | Session Resumption during the last second 22 | 23 | !!! tip 24 | Using the `--` to separate a list of JSON files after specifying 25 | `-k/--kind` might be needed in some cases. 26 | 27 | ![connections](img/connections.png) 28 | ![handshakes](img/handshakes.png) 29 | -------------------------------------------------------------------------------- /docs/extracting-clients.md: -------------------------------------------------------------------------------- 1 | # Extracting Clients 2 | 3 | Once you have the `filtered.pcap` with DNS queries from clients, you can 4 | process them into *pellets* - the pre-processed input files for DNS Shotgun. 5 | All the content of these files will be used during the replay stage - all 6 | clients for the entire duration of the file. 7 | 8 | The following example takes the entire `filtered.pcap` and transforms it into 9 | pellets. The pellets file will contain all the clients and it will have the 10 | same duration as the original file. 11 | 12 | ``` 13 | $ pcap/extract-clients.lua -r filtered.pcap -O $OUTPUT_DIR 14 | ``` 15 | 16 | The produced pellets file is ready to be used as the input for DNS Shotgun 17 | replay. 18 | 19 | ## Splitting original capture into multiple pellets files 20 | 21 | It can be useful to have a long original capture file, which contains more 22 | clients and queries. However, since the pellets file will be replayed in its 23 | entirety, you may want to split the original file into multiple pellets files 24 | with shorter duration. 25 | 26 | For example, if your initial capture file is 30 minutes long and you could 27 | split it into fifteen two minute pellets files with the `-d/--duration` option. 28 | 29 | ``` 30 | $ pcap/extract-clients.lua -r filtered.pcap -O $OUTPUT_DIR -d 120 31 | ``` 32 | 33 | !!! tip 34 | Is it useful to keep a collection of these original pellets files of same 35 | duration. They can be later combined to create different test cases. 36 | 37 | ## Scaling-up the traffic 38 | 39 | If you want to stress-test your infrastructure, you can combine these pellets 40 | files together to effectively scale-up the traffic. The pellets files are 41 | created in a way that you can simply use `mergecap` utility to combine them. 42 | 43 | ``` 44 | $ mergecap -w scaled.pcap $OUTPUT_DIR/* 45 | ``` 46 | 47 | !!! warning 48 | You can only merge chunks that were created with the same duration when 49 | calling `extract-clients.lua`. Modifying the chunks in other ways, such as 50 | attempting to shift or extend the traffic, will produce unexpected results. 51 | For more information, see [this 52 | discussion](https://gitlab.nic.cz/knot/shotgun/-/merge_requests/32#note_196879). 53 | 54 | ## Limiting the traffic 55 | 56 | It is also possible to take a pellets file and scale-down its traffic. This is 57 | done on a per-client basis. Either client's entire query stream will be 58 | present, or the client won't be present at all. 59 | 60 | To limit the overall traffic, you can select the portion of the clients that 61 | should be included. This can range from 0 to 1. For example, let's suppose we 62 | want to scale-down the number of clients in the pellets file to 30 %. 63 | 64 | ``` 65 | $ pcap/limit-clients.lua -r pellets.pcap -w limited.pcap -l 0.3 66 | ``` 67 | 68 | An alternative approach is to split an original large pellet to multiple small 69 | pellets using `split-clients.lua` script. To split one big pellet into 10 70 | smaller ones use: 71 | 72 | ``` 73 | $ pcap/split-clients.lua -r pellets.pcap -O output_directory -n 10 74 | ``` 75 | 76 | Every client will be assigned to a one output file. All of client's packets 77 | remain intact and go into a single file. 78 | -------------------------------------------------------------------------------- /docs/img/clients.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/clients.png -------------------------------------------------------------------------------- /docs/img/connections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/connections.png -------------------------------------------------------------------------------- /docs/img/handshakes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/handshakes.png -------------------------------------------------------------------------------- /docs/img/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/latency.png -------------------------------------------------------------------------------- /docs/img/response-rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/response-rate.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # DNS Shotgun 2 | 3 | Realistic DNS benchmarking tool which supports multiple transport protocols: 4 | 5 | - **DNS-over-QUIC (DoQ)** 6 | - **DNS-over-TLS (DoT)** 7 | - **DNS-over-HTTPS (DoH)** 8 | - UDP 9 | - TCP 10 | 11 | *DNS Shotgun is capable of simulating hundreds of thousands of DoQ/DoT/DoH 12 | clients.* 13 | 14 | Every client establishes its own connection(s) when communicating over TCP-based 15 | protocols. This makes the tool uniquely suited for realistic DoT/DoH benchmarks 16 | since its traffic patterns are very similar to real clients. 17 | 18 | Similarly, each client establishes its own connection(s) when communicating over 19 | QUIC, utilizing its capability of sending/receiving queries over a single secure 20 | connection, but with multiple mutually independent streams. 21 | 22 | DNS Shotgun exports a number of statistics, such as query latencies, number of 23 | handshakes and connections, response rate, response codes etc. in JSON format. 24 | The toolchain also provides scripts that can plot these into readable charts. 25 | 26 | ## Features 27 | 28 | - Supports DNS over UDP, TCP, TLS, HTTP/2, and QUIC 29 | - Allows mixed-protocol simultaneous benchmark/testing 30 | - Can bind to multiple source IP addresses 31 | - Customizable client behaviour (idle time, TLS versions, HTTP method, ...) 32 | - Replays captured queries over selected protocol(s) while keeping original timing 33 | - Suitable for high-performance realistic benchmarks 34 | - Tools to plot charts from output data to evaluate results 35 | 36 | ## Caveats 37 | 38 | - Requires captured traffic from clients 39 | - Setup for proper benchmarks can be quite complex 40 | - Isn't suitable for testing with very low number of clients/queries 41 | - Backward compatibility between versions isn't kept 42 | 43 | ## Code Repository 44 | 45 | [https://gitlab.nic.cz/knot/shotgun](https://gitlab.nic.cz/knot/shotgun) 46 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | There are two options for using DNS Shotgun. You can either use a pre-built 4 | docker image, or install the dependencies, compile the dnssim module and use 5 | the scripts from the repository directly. 6 | 7 | ## Docker Image 8 | 9 | Pre-built image can be obtained from [CZ.NIC DNS Shotgun 10 | Registry](https://gitlab.nic.cz/knot/shotgun/container_registry/65). 11 | 12 | ```bash 13 | $ docker pull registry.nic.cz/knot/shotgun:v20240219 14 | ``` 15 | 16 | Alternately, you can build the image yourself from Dockerfile in the repository. 17 | 18 | ### Docker Usage 19 | 20 | - Make sure to run with `--network host`. 21 | - Mount input/output directories and files with `-v/--volume`. 22 | - Using `--privileged` might slightly improve performance if you don't mind the security risk. 23 | 24 | ```bash 25 | $ docker run \ 26 | --network host \ 27 | -v "$PWD:/mnt" \ 28 | registry.nic.cz/knot/shotgun:v20240219 \ 29 | $COMMAND 30 | ``` 31 | 32 | ## Using scripts from sources 33 | 34 | You can use the toolchain scripts directly from the git repository. You need to 35 | ensure you have the required dependencies installed and the compile and install 36 | the dnssim module. Also make sure to check out some tagged version, as the 37 | development happens in master branch. 38 | 39 | ```bash 40 | $ git clone https://gitlab.nic.cz/knot/shotgun.git 41 | $ cd shotgun 42 | $ git checkout v20240219 43 | $ git submodule update --init --recursive 44 | $ cd replay/dnssim 45 | $ mkdir build && cd build 46 | $ cmake .. 47 | $ cmake --build . 48 | $ sudo cmake --install . 49 | ``` 50 | 51 | ### Dependencies 52 | 53 | When using the scripts directly, the following dependencies are needed. 54 | 55 | If you only wish to process shotgun JSON output (e.g. plot charts), then dnsjit 56 | and compiling the dnssim module isn't required. 57 | 58 | - [dnsjit 1.3+](https://github.com/DNS-OARC/dnsjit): Can be installed from [DNS-OARC 59 | repositories](https://dev.dns-oarc.net/packages/). 60 | - libuv 61 | - libnghttp2 62 | 63 | - Python 3.6 or later 64 | - Python dependencies from [requirements.txt](https://gitlab.nic.cz/knot/shotgun/-/blob/master/requirements.txt) 65 | - (optional) tshark/wireshark for some PCAP pre-processing 66 | 67 | ## Documentation 68 | 69 | **The latest documentation can be found at 70 | ** — chances are that is what you are 71 | looking at right now. The documentation is also available in human-readable 72 | Markdown files in the source tree's `docs` subdirectory. 73 | 74 | You may wish to edit the documentation locally and preview those local changes. 75 | To do that, [install MkDocs](https://www.mkdocs.org/user-guide/installation/), 76 | then, in the sources directory, run: 77 | 78 | ```bash 79 | $ mkdocs build 80 | ``` 81 | 82 | This will create a new `site` directory containing the Shotgun documentation in 83 | HTML format. 84 | 85 | For testing the locally built documentation with live-rebuild, MkDocs' built-in 86 | development server may be used like so: 87 | 88 | ```bash 89 | $ mkdocs serve 90 | ``` 91 | -------------------------------------------------------------------------------- /docs/key-concepts.md: -------------------------------------------------------------------------------- 1 | # Key Concepts 2 | 3 | DNS Shotgun is capable of simulating real client behaviour by replaying 4 | captured traffic over selected protocol(s). The timing of original queries as 5 | well as their content is kept intact. 6 | 7 | Realistic high-performance benchmarking requires complex setup, especially for 8 | TCP-based protocols. However, the authors of this tool have successfully used it 9 | to benchmark and test various DNS implementations with up to hundreds of 10 | thousands of clients (meaning _connections_ for TCP-based transports) using 11 | commodity hardware. This requires [performance tuning](performance-tuning.md) 12 | that is described in later section. 13 | 14 | ## Client 15 | 16 | These docs often mention "*client*" and we often use it to describe DNS 17 | infrastructure throughput in addition to queries per second (QPS). What is a 18 | considered a client and why does it matter? 19 | 20 | A client is the origin of one or more queries and it is supposed to represent a 21 | single device, i.e. anything from a CPE such as home/office router to a mobile 22 | device. Since traffic patterns of various devices can vary greatly, it is 23 | crucial to use traffic that most accurately represents your real clients. 24 | 25 | In plain DNS sent over UDP the concept of client doesn't matter, since UDP is a 26 | stateless protocol and a packet is just a packet. Thus, QPS throughput may be 27 | sufficient metric for UDP. 28 | 29 | In stateful DNS protocols, such as DoT, DoH or TCP, much of the overhead and 30 | performance cost is caused by establishing the connection over which queries 31 | are subsequently sent. Therefore, the concept of client becomes crucial for 32 | benchmarking stateful protocols. 33 | 34 | !!! note 35 | As an extreme example, consider 10k QPS sent over a single DoH connection 36 | versus establishing a 10k DoH connections, each with 1 QPS. While both 37 | scenarios have the same overall QPS, the second one will consume vastly more 38 | resources, especially when establishing the connections. 39 | 40 | ### Client replay guarantees 41 | 42 | DNS Shotgun aims to provide the most realistic client behaviour when replaying 43 | the traffic. When you run DNS Shotgun, there are the following guarantees when 44 | using a stateful protocol. 45 | 46 | - **Multiple clients never share a single connection.** 47 | - **Each client attempts to establish at least one connection.** 48 | - **A client may have zero, one or more (rarely) active established connections 49 | at any time**, depending on its traffic and behavior. 50 | 51 | ## Real traffic 52 | 53 | A key focus of this toolchain is to make the benchmarks as realistic as 54 | possible. Therefore, no synthetic queries or clients are generated. To 55 | effectively use this tool, you need to have large amount of source PCAPs. 56 | Ideally, these contain the traffic from your own network. 57 | 58 | !!! note 59 | In case you'd prefer to use synthetic client/queries anyway, you can just 60 | generate the traffic and capture it in PCAP for further processing. Doing that 61 | is outside of the scope of this documentation. 62 | 63 | ### Traffic replay guarantees 64 | 65 | - **Content of DNS messages is left intact.** Messages without proper DNS header 66 | or question section will be discarded. 67 | - **Timing of the DNS messages is kept as close to the original traffic as 68 | possible.** If the tool detects time skew larger than one second, it aborts the 69 | test. However, real time difference may be slightly longer due to various 70 | buffers. 71 | -------------------------------------------------------------------------------- /docs/latency-histogram.md: -------------------------------------------------------------------------------- 1 | # Latency Histogram 2 | 3 | This very useful chart is a bit difficult to read and understand, but it 4 | provides a great deal of information about the overall latency from client side 5 | perspective. We use the logarithmic percentile histogram to display this data. 6 | [This 7 | article](https://blog.powerdns.com/2017/11/02/dns-performance-metrics-the-logarithmic-percentile-histogram/) 8 | provides an in-depth explanation about the chart and how to interpret it. 9 | 10 | ``` 11 | $ tools/plot-latency.py -t "DNS Latency Overhead" UDP.json TCP.json DoT.json DoH.json 12 | ``` 13 | 14 | ![latency overhead](img/latency.png) 15 | 16 | The chart above illustrates why comparing just the response rate isn't a 17 | sufficient metric. For all protocols compared in this case, you'd get around 18 | 99.5 % response rate. However, when you examine the client latency, you can see 19 | clear differences. 20 | 21 | In the chart, 80 % of all queries are represented by the rightmost part of the 22 | chart - between the "slowest percentile" of 20 % and 100 %. For these 23 | queries, the latency for UDP, TCP, DoT or DoH is the same, which is one 24 | round trip. These represent immediate answers from the resolver (e.g. cached or 25 | refused), which are sent either over UDP or over an already established 26 | connection (for stateful protocols). The latency is 10 ms, or 1 RTT. 27 | 28 | The most interesting part is between the 5 % and 20 % slowest percentile. For 29 | these 15 % of all queries, there are major differences between the latency of 30 | UDP, TCP and DoT/DoH. This illustrates the latency cost of setting up a 31 | connection where none is present. UDP is stateless and requires just 1 RTT. TCP 32 | requires an extra round trip to establish the connection and the latency for the 33 | client becomes 2 RTTs. Finally, both DoT and DoH require an additional round 34 | trip for the TLS handshake and thus the overall latency cost becomes 3 RTTs. 35 | 36 | The trailing 5 % of queries show no difference between protocols, since these 37 | are queries that aren't answered from cache and the delay is introduced by the 38 | communication between the resolver and the upstream servers. The last 0.5 % of 39 | queries aren't answered by the resolver within 2 seconds and are considered a 40 | timeout by the client. 41 | -------------------------------------------------------------------------------- /docs/performance-tuning.md: -------------------------------------------------------------------------------- 1 | # Performance Tuning 2 | 3 | Any high-performance benchmark setup requires separate server for generating 4 | traffic which then sends the traffic to the target server under test. In order 5 | to scale-up DNS Shotgun to be able to perform well under heavy load, some 6 | performance tuning and network adjustments are needed. 7 | 8 | !!! tip 9 | An example of performance tuning we use in our benchmarks can be found in 10 | our [ansible 11 | role](https://gitlab.nic.cz/knot/resolver-benchmarking/-/tree/master/roles/tuning). 12 | 13 | ## Number of file descriptors 14 | 15 | Make sure the number of available file descriptors is sufficient. It's 16 | typically necessary when running DNS Shotgun from terminal. When using docker, 17 | the defaults are usually sufficient. 18 | 19 | ``` 20 | $ ulimit -n 1000000 21 | ``` 22 | 23 | ## Ephemeral port range 24 | 25 | Extending the ephemeral port range gives the tool more outgoing ports to work with. 26 | 27 | ``` 28 | $ sysctl -w net.ipv4.ip_local_port_range="1025 60999" 29 | ``` 30 | 31 | ## NIC queues 32 | 33 | High-end network cards typically has multiple queues. Ideally, you want to set 34 | their number to be the same as number of available CPUs. 35 | 36 | ``` 37 | $ ethtool -L $INTERFACE combined $NCPU 38 | ``` 39 | 40 | !!! note 41 | It's important that the NIC interrupts from different queues are handled 42 | by different CPUs. If there are throughput issues, you may want to verify 43 | this is the case. 44 | 45 | ## UDP 46 | 47 | DNS Shotgun can generate quite bursty traffic. Increasing the receiving 48 | server's socket memory can help to prevent that. If this buffer isn't 49 | sufficient, it can cause packet loss. 50 | 51 | ``` 52 | $ sysctl -w net.core.rmem_default="8192000" 53 | ``` 54 | 55 | ## TCP, DoT, DoH 56 | 57 | Tuning the network stack for TCP isn't as straightforward and it's network-card 58 | specific. It's best to refer to [kernel 59 | documentation](https://www.kernel.org/doc/html/latest/networking/device_drivers/ethernet/intel/ixgb.html#improving-performance) 60 | for your specific network card. 61 | 62 | ## conntrack 63 | 64 | For our benchmarks, we don't use iptables or any firewall. Especially the 65 | `conntrack` module probably won't be able to handle serious load. Make sure the 66 | conntrack module isn't loaded by kernel if you're not using it. 67 | -------------------------------------------------------------------------------- /docs/raw-output.md: -------------------------------------------------------------------------------- 1 | # Raw Output 2 | 3 | In the output directory of DNS Shotgun's `replay.py` tool, the following 4 | structure is created. Let's assume we ran a configuration that configure two 5 | traffic senders - `DoT` and `DoH`. 6 | 7 | ``` 8 | $OUTDIR 9 | ├── .config # ignore this directory 10 | │ └── luaconfig.lua # for debugging purposes only 11 | ├── data # directory with raw JSON output 12 | │ ├── DoH # "DoH" traffic sender data 13 | │ │ ├── DoH-01.json # raw data from first thread of DoH traffic sender 14 | │ │ ├── DoH-02.json # raw data from second thread of DoH traffic sender 15 | │ │ └── ... # raw data from other threads of DoH traffic sender 16 | │ ├── DoH.json # merged raw data from all DoH sender threads 17 | │ ├── DoT # "DoT" traffic sender data 18 | │ │ ├── DoT-01.json # raw data from first thread of DoT traffic sender 19 | │ │ ├── DoT-02.json # raw data from second thread of DoT traffic sender 20 | │ │ └── ... # raw data from other threads of DoT traffic sender 21 | │ └── DoT.json # merged raw data from all DoT sender threads 22 | └── charts # directory with automatically plotted charts (if configured) 23 | ├── latency.svg # chart comparing latency of DoT and DoH clients 24 | └── response-rate.svg # chart comparing the response rate of DoT and DoH clients 25 | ``` 26 | 27 | ## data directory 28 | 29 | This directory contains the raw JSON data. Since DNS Shotgun typically operates 30 | with multiple threads, the results for each traffic sender are also provided 31 | per each thread. However, since you typically don't care about the clients were 32 | emulated, but only about their aggregate behaviour, a data file that contains 33 | the combined results of all threads belonging to the configured traffic sender 34 | is also provided. 35 | 36 | Every configured traffic sender will have its own output directory of the same 37 | name. Inside, per-thread raw data are available. The aggregate file is directly 38 | in the `data/` directory as JSON file with the name of the configured traffic 39 | sender. The aggregate file is the one you typically want to use. 40 | 41 | !!! note 42 | The raw JSON file is versioned and is not intended to be forward or 43 | backward compatible with various DNS Shotgun versions. You should use the 44 | same version of the toolchain for both replay and interpreting the data. 45 | 46 | !!! tip 47 | If you wish to explore, format or interpret the raw JSON data, 48 | [jq](https://stedolan.github.io/jq/) utility can be useful for some 49 | rudimentary processing. 50 | 51 | ## charts directory 52 | 53 | This directory may not be present if you didn't configure any charts to be 54 | automatically plotted in the configuration file. If it is available, it 55 | contains the plotted charts that are described in the following sections. 56 | 57 | When charts are plotted automatically, they always display data for all the 58 | configure traffic senders with their predefined names. If you wish to customize 59 | it, omit certain senders etc., you can use the plotting scripts 60 | directly from CLI. These can be found in the `tools/` directory and you can 61 | refer to their `--help` for usage. 62 | -------------------------------------------------------------------------------- /docs/replaying-traffic.md: -------------------------------------------------------------------------------- 1 | # Replaying Traffic 2 | 3 | Once you've prepared the input pellets file with clients and either have you 4 | own configuration file or know which present you want to use, you can the the 5 | following scripts to run DNS Shotgun. 6 | 7 | ``` 8 | $ replay.py -r pellets.pcap -c udp -s ::1 9 | ``` 10 | 11 | !!! tip 12 | Use the `--help` option to explore other options. 13 | 14 | During the replay, there is quite a bit of logging information that look like 15 | this. 16 | 17 | ``` 18 | UDP-01 notice: total processed: 267; answers: 0; discarded: 2; ongoing: 172 19 | ``` 20 | 21 | The important thing to look out for is the number of `discarded` packets. In 22 | case nearly all the packets are discarded or a large portion of them, it almost 23 | certainly indicates some improper setup or input data. The test should be 24 | aborted and the reason should be investigated. Increasing the `-v/--verbosity` 25 | level might help. 26 | 27 | ## Binding to multiple source addresses 28 | 29 | When sending traffic against a single IP/port combination of the target server, 30 | the source IP address has a limited number of ports it can utilize. A single 31 | IP address is insufficient to achieve hundreds of thousands of clients. 32 | 33 | 34 | DNS Shotgun can bind to multiple sources addresses with the `-b/--bind-net` 35 | option. You can specify either IP address or a newtork range using CIDR 36 | notation. Multiple values (either IPs, ranges or any combination of those) can 37 | be specified. When using CIDR notation, the network and broadcast address won't 38 | be used. 39 | 40 | 41 | ``` 42 | $ replay.py -r pellets.pcap -c tcp -s fd00:dead:beef::cafe -b fd00:dead:beef::/124 43 | ``` 44 | 45 | !!! tip 46 | Our rule of thumb is to use at least one source IP address per every 30k 47 | clients. However, using more addresses is certainly better and can help to 48 | avoid weird behaviour, slow performance and other issues that require 49 | in-depth troubleshooting. 50 | 51 | !!! note 52 | If you're limited by the number of source addresses you can use, utilizing 53 | either IPv6 unique-local addresses (fd00::/8) or private IPv4 ranges could 54 | be helpful. 55 | 56 | ## Emulating link latency 57 | 58 | !!! warning 59 | This is an advanced topic and emulating latency isn't necessary for many 60 | scenarios. 61 | 62 | Overall latency will affect the user's experience with DNS resolution. It also 63 | becomes much more relevant when using TCP and TLS, since the handshakes 64 | introduce additional round trips. When benchmarks are done in the data center 65 | with two servers that are directly connected to each other with practically no 66 | latency, it can provide a skewed view of the expected end user latency. 67 | 68 | Luckily, the `netem` Network Emulator makes it very simple to emulate various 69 | network conditions. For example, emulating latency on the sender side can be 70 | done quite easily. The following command adds 10 ms latency to outgoing 71 | packets, effectively simulating RTT of 10 ms. 72 | 73 | ``` 74 | $ tc qdisc add dev $INTERFACE root netem limit 10000000 delay 10ms 75 | ``` 76 | 77 | !!! tip 78 | For more possibilities, refer to `man netem.8`. Using a sufficiently large 79 | buffer (limit) is essential for proper operation. 80 | 81 | However, beware that the settings affect the entire interface. If you're going 82 | to emulate latency, it's best if the resolver-client traffic is on a separate 83 | interface, so the resolver-upstream traffic isn't negatively impacted. 84 | -------------------------------------------------------------------------------- /docs/response-rate-chart.md: -------------------------------------------------------------------------------- 1 | # Response Rate Chart 2 | 3 | This basic chart can display the overall response rate over time. It is also 4 | possible to plot specific error codes, such as `NOERROR`. 5 | 6 | ``` 7 | $ tools/plot-response-rate.py -r 0 -o rr.png UDP.json 8 | ``` 9 | 10 | !!! tip 11 | The image format depends on the output filename extension chosen with can 12 | `-o/--output`. `svg` is used by default, but other formats such as `png` 13 | are supported as well. 14 | 15 | The following chart displays the answer rate and the rate of `NOERROR` answers. 16 | In this measurement, the resolver was started with a cold cache. We can see the 17 | overall response rate is close to 100 %. The `NOERROR` response rate slightly 18 | increases over time from 72 % to around 75 % as the cache warms up. 19 | 20 | ![UDP response rate](img/response-rate.png) 21 | -------------------------------------------------------------------------------- /docs/showcase/connections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/showcase/connections.png -------------------------------------------------------------------------------- /docs/showcase/handshakes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/showcase/handshakes.png -------------------------------------------------------------------------------- /docs/troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | ## failed to send udp packet: too many open files 4 | 5 | Increase the number of file descriptors. (See 6 | [`man limits.conf`](https://www.man7.org/linux/man-pages/man5/limits.conf.5.html) 7 | and/or `ulimit --help`) 8 | 9 | ## fatal: aborting, real time drifted ahead of simulated time 10 | 11 | This happens when DNS Shotgun can't keep up with the traffic it's supposed to 12 | send/receive. The tool attempts to keep realistic timing from the original data 13 | and it just aborts it if fails to keep that promise. This can have multiple 14 | causes. 15 | 16 | - You're pushing the tool beyond the limits of what it can do, e.g.: 17 | - Not enough computing power (are all CPUs utilized?) 18 | - Insufficient network throughput (is network tuned properly? are there enough source IPs?) 19 | - Unresponsive resolver and/or too high `timeout_s` 20 | - NIC interrupts aren't properly distributed among CPUs 21 | - A single thread is assigned too much traffic 22 | - This typically shouldn't be the case, but if specific traffic sender is 23 | *always* causing this failure, tweaking `cpu_factor` and/or number of 24 | threads might help 25 | 26 | ## critical: buffer capacity exceeded, threads are blocked 27 | 28 | This is an indication that a specific thread filled up its buffer and is now 29 | causing the entire tool to slow down which will eventually cause the crash 30 | described above if it goes on for too long. If it only happens for a specific 31 | traffic sender, tweaking `cpu_factor` to change thread distribution could help. 32 | 33 | ## various warnings 34 | 35 | Especially under heavy load, there can occasionally be some warnings. 36 | Sometimes it's a GnuTLS connection error, a mismatched response etc. The general 37 | rule is a few different warnings during heavy load probably isn't something to 38 | be too concerned about. Typically, it's when the output is spammed by the same 39 | warning over and over that you have a problem. 40 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: DNS Shotgun 2 | theme: 3 | name: readthedocs 4 | navigation_depth: 1 5 | nav: 6 | - "Overview": index.md 7 | - installation.md 8 | - key-concepts.md 9 | - "Input Data": 10 | - capturing-traffic.md 11 | - extracting-clients.md 12 | - analyzing-clients.md 13 | - "Replay": 14 | - configuration-presets.md 15 | - configuration-file.md 16 | - replaying-traffic.md 17 | - performance-tuning.md 18 | - troubleshooting.md 19 | - "Interpreting Results": 20 | - raw-output.md 21 | - response-rate-chart.md 22 | - latency-histogram.md 23 | - connection-chart.md 24 | markdown_extensions: 25 | - admonition 26 | -------------------------------------------------------------------------------- /pcap/count-packets-over-time.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- count-packets-over-time.lua: count number of packets in input PCAP in time intervals 4 | 5 | local input = require("dnsjit.input.pcap").new() 6 | local layer = require("dnsjit.filter.layer").new() 7 | local object = require("dnsjit.core.objects") 8 | local log = require("dnsjit.core.log").new("count-packets-over-time.lua") 9 | local getopt = require("dnsjit.lib.getopt").new({ 10 | { "r", "read", "", "input file to read", "?" }, 11 | { "s", "stats_period", 100, "period for printing stats (ms)", "?" }, 12 | { nil, "csv", "", "path to the output CSV file (default: stdout)", "?" }, 13 | }) 14 | 15 | log:enable("all") 16 | 17 | -- Parse arguments 18 | local args = {} 19 | getopt:parse() 20 | args.read = getopt:val("r") 21 | args.stats_period = getopt:val("s") 22 | args.csv = getopt:val("csv") 23 | 24 | -- Display help 25 | if getopt:val("help") then 26 | getopt:usage() 27 | return 28 | end 29 | 30 | -- Check arguments 31 | if args.stats_period <= 0 then 32 | log:fatal("stats_period must be grater than 0") 33 | end 34 | 35 | -- Set up input 36 | if args.read ~= "" then 37 | if input:open_offline(args.read) ~= 0 then 38 | log:fatal("failed to open input PCAP "..args.read) 39 | end 40 | log:notice("using input PCAP "..args.read) 41 | else 42 | getopt:usage() 43 | log:fatal("input must be specified, use -r/-i") 44 | end 45 | layer:producer(input) 46 | local produce, pctx = layer:produce() 47 | 48 | -- Set up CSV 49 | local csv_output 50 | if args.csv ~= "" then 51 | csv_output = io.open(args.csv, 'w') 52 | if csv_output == nil then 53 | log:fatal('failed to open "'..args.csv..'" for writing') 54 | else 55 | log:notice('writing output CSV to "'..args.csv..'"') 56 | end 57 | else 58 | csv_output = io.stdout 59 | end 60 | 61 | local Stats = {} 62 | local StatsCounters = {} 63 | 64 | function Stats.new(stats_period_ms, output, format) 65 | if stats_period_ms == nil then 66 | stats_period_ms = 1000 67 | end 68 | if stats_period_ms <= 0 then 69 | log:fatal("statistics interval must be greater than 0") 70 | end 71 | if format == nil then 72 | format = "time_s,period_time_since_ms,period_time_until_ms,period_packets,total_packets,period_pps,total_pps" 73 | end 74 | 75 | local self = setmetatable({ 76 | _stats_period_ms = stats_period_ms, 77 | _output = output, 78 | _format = format, 79 | _time_first_ms = nil, -- time of the very first received packet 80 | _time_next_ms = nil, -- time when next stats begins 81 | _time_last_ms = nil, -- time of the last received packet 82 | _period = StatsCounters.new(), 83 | _total = StatsCounters.new(), 84 | }, { __index = Stats }) 85 | 86 | if self._output ~= nil then 87 | self._output:write(format.."\n") 88 | end 89 | 90 | return self 91 | end 92 | 93 | function Stats:display() 94 | if self._output == nil then 95 | return 96 | end 97 | 98 | local period = self._period:tabulate("period") 99 | local total = self._total:tabulate("total") 100 | local additional = { 101 | time_s = string.format("%.03f", (self._period.time_until_ms - self._time_first_ms) / 1e3), 102 | } 103 | 104 | local outstr = string.gsub(self._format, "([_%w]+)", period) 105 | outstr = string.gsub(outstr, "([_%w]+)", total) 106 | outstr = string.gsub(outstr, "([_%w]+)", additional) 107 | 108 | self._output:write(outstr.."\n") 109 | end 110 | 111 | function Stats:receive(obj) 112 | local obj_pcap = obj:cast_to(object.PCAP) 113 | local time_pcap_ms = tonumber(obj_pcap.ts.sec) * 1e3 + tonumber(obj_pcap.ts.nsec) * 1e-6 114 | 115 | if self._time_first_ms == nil then 116 | self._time_first_ms = time_pcap_ms 117 | self._time_next_ms = self._time_first_ms + self._stats_period_ms 118 | self._period:begin(self._time_first_ms, self._time_next_ms) 119 | self._total:begin(self._time_first_ms, self._time_next_ms) 120 | end 121 | 122 | while time_pcap_ms >= self._time_next_ms do -- don't skip over 0-value intervals 123 | self._total = self._total + self._period 124 | self:display() 125 | 126 | local next_ms = self._time_next_ms + self._stats_period_ms 127 | self._period:begin(self._time_next_ms, next_ms) 128 | self._time_next_ms = next_ms 129 | end 130 | 131 | self._period.packets = self._period.packets + 1 132 | 133 | -- ensure monotonic update of time 134 | if self._time_last_ms == nil or time_pcap_ms > self._time_last_ms then 135 | self._time_last_ms = time_pcap_ms 136 | end 137 | end 138 | 139 | function Stats:finish() 140 | if self._time_last_ms == nil then 141 | self._log:warning("no packets received") 142 | return 143 | elseif self._time_last_ms < self._period.time_since_ms then 144 | -- this shouldn't happen, handling just in case 145 | self._log:fatal("last packet time is less than start of measurement interval") 146 | elseif self._time_last_ms == self._period.time_since_ms then 147 | -- avoid division by zero in calculations by adding an extra millisecond 148 | self._time_last_ms = self._time_last_ms + 1 149 | end 150 | self._period.time_until_ms = self._time_last_ms 151 | self._total = self._total + self._period 152 | self:display() 153 | end 154 | 155 | 156 | function StatsCounters.new() 157 | local self = setmetatable({ 158 | period_s = nil, 159 | time_since_ms = nil, 160 | time_until_ms = nil, 161 | packets = 0, 162 | }, { 163 | __index = StatsCounters, 164 | __add = function(op1, op2) 165 | op1.time_since_ms = math.min(op1.time_since_ms, op2.time_since_ms) 166 | op1.time_until_ms = math.max(op1.time_until_ms, op2.time_until_ms) 167 | op1.packets = op1.packets + op2.packets 168 | 169 | return op1 170 | end, 171 | }) 172 | 173 | return self 174 | end 175 | 176 | function StatsCounters:begin(time_since_ms, time_until_ms) 177 | self.packets = 0 178 | assert(time_until_ms > time_since_ms) 179 | self.time_since_ms = time_since_ms 180 | self.time_until_ms = time_until_ms 181 | end 182 | 183 | function StatsCounters:tabulate(prefix) 184 | if prefix == nil then 185 | prefix = "" 186 | elseif string.sub(prefix, -1) ~= "_" then 187 | prefix = prefix .. "_" 188 | end 189 | 190 | local res = {} 191 | local period_s = (self.time_until_ms - self.time_since_ms) / 1e3 192 | res[prefix.."time_since_ms"] = string.format("%d", self.time_since_ms) 193 | res[prefix.."time_until_ms"] = string.format("%d", self.time_until_ms) 194 | res[prefix.."packets"] = string.format("%d", self.packets) 195 | res[prefix.."pps"] = string.format("%d", self.packets / period_s) 196 | return res 197 | end 198 | 199 | function StatsCounters:count() 200 | self.packets = self.packets + 1 201 | end 202 | 203 | 204 | local stats = Stats.new(args.stats_period, csv_output) 205 | local obj 206 | while true do 207 | obj = produce(pctx) 208 | if obj == nil then break end 209 | stats:receive(obj) 210 | end 211 | stats:finish() 212 | 213 | csv_output:close() 214 | -------------------------------------------------------------------------------- /pcap/count-packets-per-ip.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- count-packets-per-ip.lua: provide packet summary for every source IP 4 | 5 | local input = require("dnsjit.input.pcap").new() 6 | local layer = require("dnsjit.filter.layer").new() 7 | local object = require("dnsjit.core.objects") 8 | local log = require("dnsjit.core.log").new("count-packets-per-ip.lua") 9 | local getopt = require("dnsjit.lib.getopt").new({ 10 | { "r", "read", "", "input file to read", "?" }, 11 | { nil, "csv", "", "path to the output CSV file (default: stdout)", "?" }, 12 | }) 13 | 14 | log:enable("all") 15 | 16 | -- Parse arguments 17 | local args = {} 18 | getopt:parse() 19 | args.read = getopt:val("r") 20 | args.csv = getopt:val("csv") 21 | 22 | -- Display help 23 | if getopt:val("help") then 24 | getopt:usage() 25 | return 26 | end 27 | 28 | -- Set up input 29 | if args.read ~= "" then 30 | if input:open_offline(args.read) ~= 0 then 31 | log:fatal("failed to open input PCAP "..args.read) 32 | end 33 | log:notice("using input PCAP "..args.read) 34 | else 35 | getopt:usage() 36 | log:fatal("input must be specified, use -r") 37 | end 38 | layer:producer(input) 39 | local produce, pctx = layer:produce() 40 | 41 | -- Set up CSV 42 | local csv_output 43 | if args.csv ~= "" then 44 | csv_output = io.open(args.csv, 'w') 45 | if csv_output == nil then 46 | log:fatal('failed to open "'..args.csv..'" for writing') 47 | else 48 | log:notice('writing output CSV to "'..args.csv..'"') 49 | end 50 | else 51 | csv_output = io.stdout 52 | end 53 | 54 | local clients = {} 55 | local now_ms, chunk_since_ms 56 | local n_clients = 0 57 | 58 | log:info("processing... (this may take up to minutes for very large files)") 59 | local obj, obj_pcap_in, obj_ip, client, src_ip 60 | while true do 61 | obj = produce(pctx) 62 | if obj == nil then break end 63 | 64 | obj_ip = obj:cast_to(object.IP) 65 | if obj_ip == nil then 66 | obj_ip = obj:cast_to(object.IP6) 67 | end 68 | 69 | obj_pcap_in = obj:cast_to(object.PCAP) 70 | if obj_ip ~= nil and obj_pcap_in ~= nil then 71 | now_ms = tonumber(obj_pcap_in.ts.sec) * 1e3 + tonumber(obj_pcap_in.ts.nsec) * 1e-6 72 | if chunk_since_ms == nil then 73 | chunk_since_ms = now_ms 74 | end 75 | 76 | src_ip = obj_ip:source() 77 | client = clients[src_ip] 78 | if client == nil then 79 | client = { 80 | packets = 0, 81 | since_ms = now_ms, 82 | until_ms = now_ms, 83 | } 84 | clients[src_ip] = client 85 | n_clients = n_clients + 1 86 | end 87 | client["packets"] = client["packets"] + 1 88 | client["until_ms"] = now_ms 89 | end 90 | end 91 | 92 | local duration_s = (now_ms - chunk_since_ms) / 1e3 93 | log:info(string.format("duration of input PCAP (s): %.3f", duration_s)) 94 | log:info(string.format("number of clients: %d", n_clients)) 95 | 96 | csv_output:write('"ip","ip_since_ms","ip_until_ms","packets","ip_chunk_qps"\n') 97 | for ip, data in pairs(clients) do 98 | csv_output:write('"') 99 | csv_output:write(ip) 100 | csv_output:write('",') 101 | csv_output:write(string.format("%d", data["since_ms"])) 102 | csv_output:write(',') 103 | csv_output:write(string.format("%d", data["until_ms"])) 104 | csv_output:write(',') 105 | csv_output:write(string.format("%d", data["packets"])) 106 | csv_output:write(',') 107 | csv_output:write(string.format("%.2f", data["packets"] / duration_s)) 108 | csv_output:write('\n') 109 | end 110 | csv_output:close() 111 | -------------------------------------------------------------------------------- /pcap/cut-pcap.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- cut-pcap.lua: Copy input PCAP to output until_s specified timestamp is reached. 4 | -- This is an efficient implementation of editcap -B for already sorted input. 5 | 6 | -- SPDX-FileCopyrightText: Internet Systems Consortium, Inc. ("ISC") 7 | -- SPDX-License-Identifier: BSD-2-Clause 8 | 9 | local fpcap = require("dnsjit.input.fpcap") 10 | local log = require("dnsjit.core.log") 11 | local mmpcap = require("dnsjit.input.mmpcap") 12 | local object = require("dnsjit.core.objects") 13 | local output = require("dnsjit.output.pcap").new() 14 | 15 | local function open_pcap(filename) 16 | local input 17 | if filename == '-' then 18 | input = fpcap.new() 19 | if input:openfp(io.stdin) ~= 0 then 20 | log.fatal("failed to open PCAP on stdin") 21 | else 22 | log.debug('stdin opened using fpcap') 23 | end 24 | else 25 | input = mmpcap.new() 26 | if input:open(filename) ~= 0 then 27 | log.notice("failed to open PCAP with mmap, fallback to fpcap") 28 | input = fpcap.new() 29 | if input:open(filename) ~= 0 then 30 | log.fatal("failed to open PCAP with fpcap") 31 | else 32 | log.debug('file %s opened using fpcap', filename) 33 | end 34 | else 35 | log.debug('file %s opened using mmpcap', filename) 36 | end 37 | end 38 | local producer, pctx = input:produce() 39 | return producer, pctx 40 | end 41 | 42 | local function get_next_pkt(producer, pctx) 43 | local obj = producer(pctx) 44 | if obj ~= nil then 45 | return obj, obj:cast_to(object.PCAP) 46 | end 47 | end 48 | 49 | local out_filename 50 | local function check_output() 51 | if output:have_errors() then 52 | log.fatal("error writting to file %s", out_filename) 53 | end 54 | end 55 | 56 | log.enable("all") 57 | if #arg ~= 4 or not tonumber(arg[4]) then 58 | print("usage: "..arg[1].." ") 59 | print("Copy packets before specified timestamp from input to output PCAP") 60 | return 61 | end 62 | 63 | local in_filename = arg[2] 64 | out_filename = arg[3] 65 | local until_s = tonumber(arg[4]) 66 | 67 | if until_s ~= math.floor(until_s) or until_s <= 0 then 68 | log.fatal('unsupported stop timestamp: use an integer > 0') 69 | end 70 | 71 | local producer, pctx = open_pcap(in_filename) 72 | local cur_obj, cur_pkt = get_next_pkt(producer, pctx) 73 | if not cur_pkt then 74 | log.fatal('no packets in input pcap %s, terminating', in_filename) 75 | end 76 | 77 | log.info('opening output file %s', out_filename) 78 | output:open(out_filename, 79 | cur_pkt.linktype, 80 | cur_pkt.snaplen) 81 | local receiver, rctx = output:receive() 82 | 83 | local npackets = 0 84 | while cur_pkt do 85 | if cur_pkt.ts.sec >= until_s then 86 | log.info('timestamp %.f reached, stop', until_s) 87 | break 88 | end 89 | receiver(rctx, cur_obj) 90 | -- 91 | -- check output state only every 10 000 packets - optimization 92 | if npackets % 10000 == 0 then 93 | check_output() 94 | end 95 | npackets = npackets + 1 96 | cur_obj, cur_pkt = get_next_pkt(producer, pctx) 97 | end 98 | 99 | check_output() 100 | log.info('output %.f packets', npackets) 101 | log.debug('closing output file %s', out_filename) 102 | output:close() 103 | log.debug('output file %s closed', out_filename) 104 | -------------------------------------------------------------------------------- /pcap/extract-clients.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- extract-clients.lua: prepare PCAPs with client streams 4 | -- 5 | -- Process input PCAP and assign each client a unique IPv6 address. 6 | -- Optionally, the input PCAP can be split in into multiple chunks on desired 7 | -- duration. Output PCAP contains just RAWIP layer with IPv6 packets. 8 | -- 9 | -- For example, 600s of input with 10k unique clients could be split up into 10 | -- chunks of 60s. The output files combined have more than 10k clients, 11 | -- since a client is considered "unique" for every time chunk it appears in. 12 | -- Depending on the traffic, the output would have anywhere between 10k and 13 | -- 100k clients (combined). 14 | -- 15 | -- Other tools can then be used to merge these files to "scale up" the number 16 | -- of clients in a single time chunk. 17 | 18 | --- Check if a file or directory exists in this path 19 | local function exists(file) 20 | local ok, err, code = os.rename(file, file) 21 | if not ok then 22 | if code == 13 then 23 | -- Permission denied, but it exists 24 | return true 25 | end 26 | end 27 | return ok, err 28 | end 29 | 30 | local seed_def = os.time() + os.clock() / 1e6 31 | local dir = os.getenv("PWD") or "" 32 | local bit = require("bit") 33 | local ffi = require("ffi") 34 | local input = require("dnsjit.input.pcap").new() 35 | local output = require("dnsjit.output.pcap").new() 36 | local layer = require("dnsjit.filter.layer").new() 37 | local object = require("dnsjit.core.objects") 38 | local log = require("dnsjit.core.log").new("extract-clients.lua") 39 | local getopt = require("dnsjit.lib.getopt").new({ 40 | { "r", "read", "", "input file to read", "?" }, 41 | { "O", "outdir", dir, "directory for client chunks (must exist)", "?" }, 42 | { "d", "duration", 0, "duration of each chunk (in seconds, 0 means entire file)", "?" }, 43 | { "k", "keep", false, "keep last chunk even if it's incomplete", "?" }, 44 | { nil, "seed", seed_def, "seed for RNG", "?" }, 45 | { nil, "stdout", false, "output to stdout as a single file, no splits", "?" }, 46 | { nil, "query-rewrite", false, "rewrite all queries to . NS", "?" }, 47 | }) 48 | 49 | local SNAPLEN = 66000 50 | local LINKTYPE = 12 -- DLT_RAW in Linux, see https://github.com/the-tcpdump-group/libpcap/blob/master/pcap/dlt.h 51 | local HEADERSLEN = 40 + 8 -- IPv6 header and UDP header 52 | 53 | -- DNS payload WITHOUT message ID, query . NS +RD; used if --query-rewrite 54 | local DNS_PAYLOAD = "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01" 55 | 56 | log:enable("all") 57 | 58 | -- Parse arguments 59 | local args = {} 60 | getopt:parse() 61 | args.read = getopt:val("r") 62 | args.duration = getopt:val("d") 63 | args.keep = getopt:val("k") 64 | args.outdir = getopt:val("O") 65 | args.seed = getopt:val("seed") 66 | args.stdout = getopt:val("stdout") 67 | args.query_rewrite = getopt:val("query-rewrite") 68 | math.randomseed(args.seed) 69 | 70 | -- Display help 71 | if getopt:val("help") then 72 | getopt:usage() 73 | return 74 | end 75 | 76 | -- Check arguments 77 | if args.stdout then 78 | if args.duration ~= 0 or args.outdir ~= "" then 79 | log.fatal("--stdout is mutualy exclusive with -d and -O, use -O ''") 80 | end 81 | end 82 | if args.duration < 0 then 83 | log:fatal("duration can't be negative") 84 | elseif args.duration == 0 then 85 | args.duration = math.huge 86 | log:notice("processing entire file as one chunk") 87 | else 88 | log:notice("file will be split every " .. args.duration .. " seconds") 89 | end 90 | if not args.stdout and (args.outdir == "" or not exists(args.outdir .. "/")) then 91 | log.fatal("output directory \"" .. args.outdir .. "\" doesn't exist") 92 | end 93 | 94 | -- Set up input 95 | if args.read ~= "" then 96 | if input:open_offline(args.read) ~= 0 then 97 | log:fatal("failed to open input PCAP "..args.read) 98 | end 99 | log:notice("using input PCAP "..args.read) 100 | else 101 | getopt:usage() 102 | log:fatal("input must be specified, use -r") 103 | end 104 | layer:producer(input) 105 | local produce, pctx = layer:produce() 106 | 107 | 108 | local i_chunk = 0 109 | local chunk_id 110 | local write, writectx 111 | local outfilename 112 | local function open_pcap() 113 | if args.stdout then 114 | outfilename = "-" 115 | else 116 | outfilename = args.outdir .. "/" .. chunk_id .. ".pcap" 117 | if exists(outfilename) then 118 | log:warning("chunk_id collision detected! skipping: " .. outfilename) 119 | return false 120 | end 121 | end 122 | if output:open(outfilename, LINKTYPE, SNAPLEN) ~= 0 then 123 | log:fatal("failed to open chunk file " .. outfilename) 124 | else 125 | log:notice("writing chunk: " .. outfilename) 126 | end 127 | write, writectx = output:receive() 128 | return true 129 | end 130 | 131 | 132 | local obj_pcap_out = ffi.new("core_object_pcap_t") 133 | obj_pcap_out.obj_type = object.PCAP 134 | 135 | local bytes = ffi.new("uint8_t[?]", SNAPLEN) 136 | bytes[0] = 0x60 -- IPv6 header 137 | -- UDP len in bytes[4]:bytes[5] 138 | bytes[6] = 0x11 -- next header: UDP 139 | bytes[8] = 0xfd -- bytes[8]:bytes[23] source IPv6 fd00:: 140 | bytes[39] = 0x01 -- dst IPv6 ::1 141 | obj_pcap_out.bytes = bytes 142 | 143 | local function put_uint16_be(dst, offset, src) 144 | dst[offset] = bit.rshift(bit.band(src, 0xff00), 8) 145 | dst[offset + 1] = bit.band(src, 0xff) 146 | end 147 | 148 | local function put_uint32_be(dst, offset, src) 149 | dst[offset] = bit.rshift(bit.band(src, 0xff000000), 24) 150 | dst[offset + 1] = bit.rshift(bit.band(src, 0xff0000), 16) 151 | dst[offset + 2] = bit.rshift(bit.band(src, 0xff00), 8) 152 | dst[offset + 3] = bit.band(src, 0xff) 153 | end 154 | 155 | local clients = {} 156 | local i_client = 0 157 | local ct_4b = ffi.typeof("uint8_t[4]") 158 | local now_ms, diff_ms, chunk_since_ms, chunk_until_ms 159 | 160 | local function check_output() 161 | if output:have_errors() then 162 | log:fatal("error writting to file %s", outfilename) 163 | end 164 | end 165 | 166 | local function chunk_init() 167 | local opened 168 | repeat 169 | -- assign random "unique" chunk ID 170 | bytes[16] = math.random(0, 255) 171 | bytes[17] = math.random(0, 255) 172 | bytes[18] = math.random(0, 255) 173 | bytes[19] = math.random(0, 255) 174 | chunk_id = string.format("%02x%02x%02x%02x", bytes[16], bytes[17], bytes[18], bytes[19]) 175 | opened = open_pcap() 176 | until(opened) 177 | 178 | clients = {} 179 | i_client = 0 180 | i_chunk = i_chunk + 1 181 | 182 | chunk_since_ms = chunk_until_ms or now_ms 183 | chunk_until_ms = chunk_since_ms + args.duration * 1e3 184 | end 185 | 186 | local function chunk_finalize() 187 | check_output() 188 | output:close() 189 | local duration_s = (chunk_until_ms - chunk_since_ms) / 1e3 190 | log:info(string.format(" duration_s: %.3f", duration_s)) 191 | log:info(string.format(" number of clients: %d", i_client)) 192 | if i_client == 0 then 193 | log:warning(" deleting empty chunk, double check your data") 194 | os.remove(outfilename) 195 | end 196 | end 197 | 198 | local obj, obj_pcap_in, obj_ip, obj_udp, obj_pl, client, src_ip, ip_len, prev_ms 199 | local npacketsin = 0 200 | local npacketsout = 0 201 | local npacketsskip = 0 202 | while true do 203 | obj = produce(pctx) 204 | if obj == nil then break end 205 | npacketsin = npacketsin + 1 206 | 207 | ip_len = 4 208 | obj_ip = obj:cast_to(object.IP) 209 | if obj_ip == nil then 210 | obj_ip = obj:cast_to(object.IP6) 211 | ip_len = 16 212 | end 213 | 214 | obj_udp = obj:cast_to(object.UDP) 215 | obj_pl = obj:cast_to(object.PAYLOAD) 216 | obj_pcap_in = obj:cast_to(object.PCAP) 217 | if obj_ip ~= nil and obj_udp ~= nil and obj_pl ~= nil and obj_pcap_in ~= nil then 218 | now_ms = tonumber(obj_pcap_in.ts.sec) * 1e3 + tonumber(obj_pcap_in.ts.nsec) * 1e-6 219 | if prev_ms then 220 | if (now_ms < prev_ms) then 221 | log:fatal('non-monotonic packet timestamp detected, exiting ' 222 | .. '(current ts %f < previous %f)', now_ms, prev_ms) 223 | break 224 | end 225 | end 226 | prev_ms = now_ms 227 | while chunk_until_ms == nil or now_ms >= chunk_until_ms do 228 | if chunk_until_ms ~= nil then 229 | chunk_finalize() 230 | end 231 | chunk_init() 232 | end 233 | 234 | src_ip = ffi.string(obj_ip.src, ip_len) 235 | client = clients[src_ip] 236 | if client == nil then 237 | client = { addr = ct_4b(), queries = 0 } 238 | put_uint32_be(client["addr"], 0, i_client) 239 | i_client = i_client + 1 240 | clients[src_ip] = client 241 | end 242 | client["queries"] = client["queries"] + 1 243 | ffi.copy(bytes + 20, client["addr"], 4) 244 | 245 | diff_ms = now_ms - chunk_since_ms 246 | obj_pcap_out.ts.sec = math.floor(diff_ms / 1e3) 247 | obj_pcap_out.ts.nsec = math.floor((diff_ms % 1e3) * 1e6) 248 | 249 | obj_pcap_out.len = HEADERSLEN + obj_pl.len 250 | obj_pcap_out.caplen = obj_pcap_out.len 251 | 252 | put_uint16_be(bytes, 4, obj_udp.ulen) -- IPv6 payload length 253 | put_uint16_be(bytes, 40, 0x0035) -- normalized src port 53 254 | put_uint16_be(bytes, 42, 0x0035) -- normalized dst port 53 255 | if args.query_rewrite then 256 | put_uint16_be(bytes, 44, 0x0019) -- UDP length incl. UDP header 257 | put_uint16_be(bytes, 46, 0x0000) -- checksum: disabled/ignored 258 | put_uint16_be(bytes, 48, math.random(0, 65535)) -- msg ID 259 | ffi.copy(bytes + HEADERSLEN + 2, DNS_PAYLOAD) 260 | else 261 | put_uint16_be(bytes, 44, obj_udp.ulen) 262 | put_uint16_be(bytes, 46, obj_udp.sum) 263 | ffi.copy(bytes + HEADERSLEN, obj_pl.payload, obj_pl.len) 264 | end 265 | 266 | -- check output state only every 10 000 packets - optimization 267 | if npacketsout % 10000 == 0 then 268 | check_output() 269 | end 270 | write(writectx, obj_pcap_out:uncast()) 271 | npacketsout = npacketsout + 1 272 | else 273 | npacketsskip = npacketsskip + 1 274 | end 275 | end 276 | if npacketsskip > 0 then 277 | log:warning(string.format("skipped %d non-IP or non-UDP packets (%f %%)", 278 | npacketsskip, npacketsskip / npacketsin * 100)) 279 | end 280 | 281 | if now_ms == nil then 282 | log:fatal("no valid packets found") 283 | end 284 | 285 | chunk_finalize() 286 | 287 | if args.duration ~= math.huge and not args.keep then 288 | log:notice("removing incomplete last chunk "..outfilename) 289 | os.remove(outfilename) 290 | end 291 | -------------------------------------------------------------------------------- /pcap/filter-dnsq.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- filter-dnsq.lua: obtain DNS queries from input PCAP / interface 4 | -- 5 | -- Process input and extract DNS queries into an output PCAP. 6 | 7 | local ffi = require("ffi") 8 | local C = ffi.C 9 | local input = require("dnsjit.input.pcap").new() 10 | local output = require("dnsjit.output.pcap").new() 11 | local layer = require("dnsjit.filter.layer").new() 12 | local object = require("dnsjit.core.objects") 13 | local dns = require("dnsjit.core.object.dns").new() 14 | local dns_q = require("dnsjit.core.object.dns.q").new() 15 | local dns_rr = require("dnsjit.core.object.dns.rr").new() 16 | local labels = require("dnsjit.core.object.dns.label").new(127) 17 | local log = require("dnsjit.core.log").new("filter-dnsq.lua") 18 | local getopt = require("dnsjit.lib.getopt").new({ 19 | { "r", "read", "", "input file to read", "?" }, 20 | { "i", "interface", "", "capture interface", "?" }, 21 | { "w", "write", "", "output file to write (or /dev/null)", "?" }, 22 | { "p", "port", 53, "destination port to check for UDP DNS queries", "?" }, 23 | { "l", "log-malformed", false, "log why queries were considered as malformed", "?" }, 24 | { "m", "malformed", false, "include malformed queries", "?" }, 25 | { "M", "only-malformed", false, "include only malformed queries", "?" }, 26 | { "s", "skipped", false, "include queries for *.dotnxdomain.net, " 27 | .. "which would otherwise be skipped", "?" }, 28 | { "a", "address", "", "destination address (can be specified multiple times)", "?*" }, 29 | }) 30 | 31 | local AF_INET = 2 32 | local AF_INET6 = 10 33 | if ffi.os == "OSX" then 34 | AF_INET6 = 30 35 | end 36 | 37 | local args 38 | 39 | local function check_output() 40 | if output:have_errors() then 41 | log:fatal("error writting to file %s", args.write) 42 | end 43 | end 44 | 45 | ffi.cdef[[ 46 | int inet_pton(int af, const char* src, void* dst); 47 | int memcmp(const void *s1, const void *s2, size_t n); 48 | ]] 49 | 50 | log:enable("all") 51 | 52 | -- Parse arguments 53 | args = {} 54 | getopt:parse() 55 | args.read = getopt:val("r") 56 | args.interface = getopt:val("i") 57 | args.write = getopt:val("w") 58 | args.port = getopt:val("p") 59 | args.only_malformed = getopt:val("M") 60 | args.malformed = getopt:val("m") or args.only_malformed 61 | args.log_malformed = getopt:val("l") 62 | args.csv = getopt:val("csv") 63 | args.skipped = getopt:val("s") 64 | args.address = getopt:val("a") 65 | 66 | -- Display help 67 | if getopt:val("help") then 68 | getopt:usage() 69 | return 70 | end 71 | 72 | -- Check arguments 73 | if args.port <= 0 or args.port > 65535 then 74 | log:fatal("invalid port number") 75 | end 76 | 77 | -- Convert IPs to binary 78 | local addresses = {} 79 | if #args.address > 0 then 80 | for i, addr in ipairs(args.address) do 81 | local inet = ffi.new("uint8_t [16]") -- reserve enough memory for either IPv4 or IPv6 82 | local len = 4 83 | -- try parse as IPv4 84 | if C.inet_pton(AF_INET, addr, inet) ~= 1 then 85 | len = 16 86 | if C.inet_pton(AF_INET6, addr, inet) ~= 1 then 87 | log:fatal("failed to parse address as IPv4 or IPv6: "..addr) 88 | end 89 | end 90 | addresses[i] = { inet = inet, len = len } 91 | end 92 | end 93 | 94 | -- Set up input 95 | if args.read ~= "" then 96 | if input:open_offline(args.read) ~= 0 then 97 | log:fatal("failed to open input PCAP "..args.read) 98 | end 99 | log:notice("using input PCAP "..args.read) 100 | elseif args.interface ~= "" then 101 | input:create(args.interface) 102 | if input:activate() ~= 0 then 103 | log:fatal("failed to capture interface "..args.interface.." (insufficient permissions?)") 104 | end 105 | log:notice("capturing input interface "..args.interface) 106 | else 107 | getopt:usage() 108 | log:fatal("input must be specified, use -r/-i") 109 | end 110 | layer:producer(input) 111 | local produce, pctx = layer:produce() 112 | 113 | -- Set up output 114 | if args.write == "" then 115 | log:fatal("output must be specified, use -w; use /dev/null if you want just counters") 116 | elseif output:open(args.write, input:linktype(), input:snaplen()) ~= 0 then 117 | log:fatal("failed to open output PCAP "..args.write) 118 | else 119 | log:notice("using output PCAP "..args.write) 120 | end 121 | local write, writectx = output:receive() 122 | 123 | local function matches_addresses(ip, len) 124 | for _, addr in ipairs(addresses) do 125 | if addr.len == len and C.memcmp(ip, addr.inet, len) == 0 then 126 | return true 127 | end 128 | end 129 | return false 130 | end 131 | 132 | local function rr_idx_section_name(rr_idx, ancount, nscount) 133 | if rr_idx <= ancount then 134 | return string.format('answer RR idx %d', rr_idx) 135 | elseif rr_idx <= ancount + nscount then 136 | return string.format('authority RR idx %d', rr_idx - ancount) 137 | else 138 | return string.format('additional RR idx %d', rr_idx - ancount - nscount) 139 | end 140 | end 141 | 142 | local function log_packet(obj, reason) 143 | if not args.log_malformed then 144 | return 145 | end 146 | local obj_pcap = obj:cast_to(object.PCAP) 147 | log:info("timestamp %d.%d: %s", obj_pcap.ts.sec, obj_pcap.ts.nsec, reason) 148 | end 149 | 150 | local function is_skipped_qname(payload, qlabels, max_labels) 151 | local found_labels = 0 152 | -- iterate over label lengths to the or label array end 153 | for n = 1, max_labels do 154 | local qlabel = qlabels[n - 1] 155 | if qlabel.have_offset == 1 then 156 | return nil -- malformed, qname should not be compressed 157 | elseif qlabel.have_dn == 0 then 158 | break -- end of label array 159 | end 160 | -- have_dn == 1, continue to see if there are further labels 161 | found_labels = n 162 | end 163 | -- check if qname can have form *.dotnxdomain.net. 164 | if found_labels < 3 then 165 | return false 166 | end 167 | -- malformed, qname must be terminated with root label 168 | if qlabels[found_labels].length ~= 0 then 169 | return nil 170 | end 171 | 172 | -- is it in net.? 173 | local tld = qlabels[found_labels - 1] 174 | if tld.length ~= 3 then 175 | return false 176 | end 177 | local tlddata = ffi.cast('char *', payload + tld.offset + 1) 178 | if ffi.string(tlddata, tld.length):lower() ~= 'net' then 179 | return false 180 | end 181 | 182 | -- is it in dotnxdomain.net.? 183 | local sld = qlabels[found_labels - 2] 184 | if sld.length ~= 11 then 185 | return false 186 | end 187 | local slddata = ffi.cast('char *', payload + sld.offset + 1) 188 | if ffi.string(slddata, sld.length):lower() ~= 'dotnxdomain' then 189 | return false 190 | end 191 | 192 | return true 193 | end 194 | 195 | 196 | local nmalformed = 0 197 | local nskipped = 0 198 | -- Filtering function that picks only DNS queries 199 | local function is_dnsq(obj) 200 | local payload = obj:cast_to(object.PAYLOAD) 201 | if payload == nil then return false end 202 | if payload.len < 12 then return false end -- ignore garbage smaller than DNS header size 203 | local udp = obj:cast_to(object.UDP) 204 | if udp == nil then return false end -- use only UDP packets 205 | if udp.dport ~= args.port then return false end 206 | 207 | if #addresses > 0 then -- check destination IP 208 | local ip_obj = obj:cast_to(object.IP) or obj:cast_to(object.IP6) 209 | local len = 4 210 | if ip_obj.obj_type == object.IP6 then len = 16 end 211 | if matches_addresses(ip_obj.dst, len) == false then return false end 212 | end 213 | 214 | dns:reset() 215 | dns.obj_prev = obj 216 | dns:parse_header() 217 | if dns.qr == 1 then return false end -- ignore DNS responses 218 | 219 | -- check that query isn't malformed 220 | if dns.qdcount > 0 then -- parse all questions 221 | for idx = 1, dns.qdcount do 222 | if dns:parse_q(dns_q, labels, 127) ~= 0 then 223 | log_packet(obj, 'cannot parse qname idx %d', idx) 224 | nmalformed = nmalformed + 1 225 | return args.malformed 226 | end 227 | local is_skipped = is_skipped_qname(dns.payload, labels, 127) 228 | if is_skipped == nil then 229 | log_packet(obj, 'too suspicious qname idx %d', idx) 230 | nmalformed = nmalformed + 1 231 | return args.malformed 232 | elseif is_skipped and not args.skipped then 233 | nskipped = nskipped + 1 234 | return false 235 | end 236 | end 237 | end 238 | local rrcount = dns.ancount + dns.nscount + dns.arcount 239 | if rrcount > 0 then -- parse all other RRs 240 | for idx = 1, rrcount do 241 | if dns:parse_rr(dns_rr, labels, 127) ~= 0 then 242 | log_packet(obj, string.format('malformed RR idx %d (%s)', idx, rr_idx_section_name(idx, dns.ancount, dns.nscount))) 243 | nmalformed = nmalformed + 1 244 | return args.malformed 245 | end 246 | end 247 | end 248 | return not args.only_malformed 249 | end 250 | 251 | local npackets_in = 0 252 | local npackets_out = 0 253 | local obj 254 | while true do 255 | obj = produce(pctx) 256 | if obj == nil then break end 257 | npackets_in = npackets_in + 1 258 | if is_dnsq(obj) then 259 | write(writectx, obj) 260 | npackets_out = npackets_out + 1 261 | if npackets_out % 10000 == 0 then 262 | check_output() 263 | end 264 | end 265 | end 266 | 267 | check_output() 268 | output:close() 269 | 270 | if npackets_out == 0 then 271 | log:fatal("no packets were matched by filter!") 272 | else 273 | log:notice("%0.f out of %0.f packets matched filter (%f %%)", 274 | npackets_out, npackets_in, npackets_out / npackets_in * 100) 275 | if nmalformed > 0 then 276 | local total 277 | if args.only_malformed then 278 | total = npackets_out 279 | else 280 | total = npackets_out + nmalformed 281 | end 282 | local malformed_desc 283 | if args.malformed then 284 | malformed_desc = "and written to output" 285 | else 286 | malformed_desc = "and omitted from output" 287 | end 288 | log:notice("%0.f malformed DNS packets detected " 289 | .. "(%f %% of matching packets) %s", 290 | nmalformed, nmalformed / total * 100, malformed_desc) 291 | else 292 | log:info("0 malformed DNS packets detected") 293 | end 294 | if nskipped > 0 then 295 | log:notice("%0.f skipped queries for *.dotnxdomain.net were " 296 | .. "omitted from output", nskipped) 297 | end 298 | end 299 | -------------------------------------------------------------------------------- /pcap/generate-const-qps.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | local bit = require("bit") 3 | 4 | -- For mysterious reasons this combination of write_uint32 implementations is fastest 5 | -- with QPS >= 10 and it gets only slower if implementations are unified (LuaJIT 2.1.0b3). 6 | -- It is slower for QPS < 10 but that's a corner case we are not optimizing for. 7 | local function write_uint32_le(output, src) 8 | local s = string.char( 9 | bit.band(src, 0xff), 10 | bit.rshift(bit.band(src, 0xff00), 8), 11 | bit.rshift(bit.band(src, 0xff0000), 16), 12 | bit.rshift(bit.band(src, 0xff000000), 24)) 13 | output:write(s) 14 | return s 15 | end 16 | 17 | local function write_uint32_be(output, src) 18 | output:write(string.char( 19 | bit.rshift(bit.band(src, 0xff000000), 24))) 20 | output:write(string.char( 21 | bit.rshift(bit.band(src, 0xff0000), 16))) 22 | output:write(string.char( 23 | bit.rshift(bit.band(src, 0xff00), 8))) 24 | output:write(string.char( 25 | bit.band(src, 0xff))) 26 | end 27 | 28 | local function write_uint16_be(output, src) 29 | output:write(string.char( 30 | bit.rshift(bit.band(src, 0xff00), 8))) 31 | output:write(string.char( 32 | bit.band(src, 0xff))) 33 | end 34 | 35 | 36 | -- https://wiki.wireshark.org/Development/LibpcapFileFormat 37 | local function write_pcap_header(output) 38 | output:write("\xD4\xC3\xB2\xA1") -- PCAP magic 39 | output:write("\x02\x00") -- major version number 40 | output:write("\x04\x00") -- minor version number 41 | output:write("\x00\x00\x00\x00") -- thizone: gmt to local correction 42 | output:write("\x00\x00\x00\x00") -- sigfigs: accuracy of timestamps, in practice always 0 43 | output:write("\xD0\x01\x01\x00") -- snaplen: max length of captured packets, in octets 44 | output:write("\x65\x00\x00\x00") -- linktype: data link type RAW_IP 45 | end 46 | 47 | local frame_start = 48 | -- PCAP packet header 49 | "\x41\x00\x00\x00" .. -- snap length 50 | "\x41\x00\x00\x00" .. -- original length 51 | -- IP layer 52 | "\x60\x00\x00\x00" .. -- IP version 6 and no flags 53 | "\x00\x19" .. -- IP payload length 54 | "\x11" .. -- protocol payload = UDP 55 | "\x00" .. -- hop limit 56 | "\xfd\x00\x00\x00\x00\x00\x00\x00\x02\x11\x66\x8e" -- source address WITHOUT last 4 octets 57 | 58 | local header_end = 59 | "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" .. -- full dest address 60 | -- UDP 61 | "\x00\x35" .. -- source port 62 | "\x00\x35" .. -- dest port 63 | "\x00\x19" .. -- UDP length incl. UDP header (= payload + 8 bytes) 64 | "\x00\x00" -- checksum (disabled/ignored) 65 | 66 | -- DNS payload WITHOUT message ID, query . NS +RD 67 | local dns_payload = "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01" 68 | 69 | local function write_frame(output, source_id) 70 | output:write(frame_start) 71 | write_uint32_be(output, source_id) 72 | output:write(header_end) 73 | write_uint16_be(output, math.random(0, 65535)) 74 | output:write(dns_payload) 75 | end 76 | 77 | local cache_sec 78 | local cache_sec_bytes 79 | local function write_timestamps(output, now_sec) 80 | local sec_int = math.floor(now_sec) 81 | local usec_int = math.floor((now_sec - sec_int) * 1e6) 82 | -- unix timestamp in seconds 83 | if cache_sec == sec_int then 84 | output:write(cache_sec_bytes) 85 | else 86 | cache_sec_bytes = write_uint32_le(output, sec_int) 87 | cache_sec = sec_int 88 | end 89 | -- microseconds since second 90 | write_uint32_le(output, usec_int) 91 | end 92 | 93 | local log = require("dnsjit.core.log").new("generate-const-qps.lua") 94 | local getopt = require("dnsjit.lib.getopt").new({ 95 | { "c", "clients", 1, "number of source IP addresses to generate", "?" }, 96 | { "q", "qps", 1, "queries per second to generate", "" }, 97 | { "t", "time-limit", math.huge, "length of query stream in seconds", "?" }, 98 | { "u", "uniform-clients", false, "do not randomize source IP addresses, do round robin", "?" }, 99 | { "w", "write", "-", "output file to write, - means stdout (default)", "?" }, 100 | }) 101 | local ok, left = pcall(getopt.parse, getopt) 102 | if not ok or #left > 0 or getopt:val("help") 103 | or getopt:val("clients") < 1 or getopt:val("clients") > 2^32 104 | or getopt:val("time-limit") <= 0 or getopt:val("qps") < 1 then 105 | log:info("Generate DNS query stream with uniform QPS and given number of source IP addresses.") 106 | getopt:usage() 107 | if not ok then 108 | log:fatal(left) 109 | elseif #left > 0 then 110 | log:fatal('unsupported extra arguments on command line') 111 | end 112 | return 113 | end 114 | 115 | local output 116 | if getopt:val("write") == "-" then 117 | output = io.stdout 118 | else 119 | output = io.open(getopt:val("write"), "w") 120 | end 121 | if output == nil then 122 | log:fatal("cannot open output file %s", getopt:val("write")) 123 | return 124 | end 125 | 126 | local clients = getopt:val("clients") 127 | local uniform_clients = clients == 1 or getopt:val("uniform-clients") 128 | local packet_interval = 1/getopt:val("qps") 129 | local end_sec = getopt:val("time-limit") 130 | 131 | write_pcap_header(output) 132 | 133 | local now_sec = 0 134 | local i_client = 0 135 | while (now_sec <= end_sec) 136 | do 137 | write_timestamps(output, now_sec) 138 | write_frame(output, i_client) 139 | now_sec = now_sec + packet_interval 140 | if uniform_clients then 141 | i_client = (i_client + 1) % clients 142 | else 143 | i_client = math.random(0, clients - 1) 144 | end 145 | end 146 | -------------------------------------------------------------------------------- /pcap/limit-clients.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- limit-clients.lua: randomize which clients (IPs) will be included in output 4 | -- 5 | -- Every unique IP (client) has the given chance to appear in the output file. 6 | -- If a client appears, all of its packets remain intact. If a client doesn't 7 | -- appear in the output, none of its packets will. 8 | -- 9 | -- This script can only scale-down (limit) the number of clients, i.e. the 10 | -- chance must be in range 0 to 1. 11 | 12 | local ffi = require("ffi") 13 | local input = require("dnsjit.input.pcap").new() 14 | local output = require("dnsjit.output.pcap").new() 15 | local layer = require("dnsjit.filter.layer").new() 16 | local object = require("dnsjit.core.objects") 17 | local log = require("dnsjit.core.log").new("limit-clients.lua") 18 | local getopt = require("dnsjit.lib.getopt").new({ 19 | { "r", "read", "", "input file to read", "?" }, 20 | { "w", "write", "", "output file to write", "?" }, 21 | { "l", "limit", 1.0, "chance for each client to appear, 0 to 1", "?" }, 22 | { nil, "seed", 0, "seed for RNG", "?" }, 23 | }) 24 | 25 | local SNAPLEN = 66000 26 | local LINKTYPE = 12 -- DLT_RAW in Linux, see https://github.com/the-tcpdump-group/libpcap/blob/master/pcap/dlt.h 27 | 28 | local args 29 | 30 | local function check_output() 31 | if output:have_errors() then 32 | log:fatal("error writting to file %s", args.write) 33 | end 34 | end 35 | 36 | log:enable("all") 37 | 38 | -- Parse arguments 39 | args = {} 40 | getopt:parse() 41 | args.read = getopt:val("r") 42 | args.write = getopt:val("w") 43 | args.limit = getopt:val("l") 44 | args.seed = getopt:val("seed") 45 | 46 | -- Display help 47 | if getopt:val("help") then 48 | getopt:usage() 49 | return 50 | end 51 | 52 | -- Check arguments 53 | if args.limit <= 0 then 54 | log:fatal("limit must be greater than 0") 55 | elseif args.limit > 1 then 56 | log:fatal("limit can't be greater than 1") 57 | end 58 | math.randomseed(args.seed) 59 | 60 | -- Set up input 61 | if args.read ~= "" then 62 | if input:open_offline(args.read) ~= 0 then 63 | log:fatal("failed to open input PCAP " .. args.read) 64 | end 65 | log:notice("using input PCAP " .. args.read) 66 | else 67 | getopt:usage() 68 | log:fatal("input must be specified, use -r") 69 | end 70 | layer:producer(input) 71 | local produce, pctx = layer:produce() 72 | 73 | -- Set up output 74 | if args.write ~= "" then 75 | if output:open(args.write, LINKTYPE, SNAPLEN) ~= 0 then 76 | log:fatal("failed to open chunk file " .. args.write) 77 | else 78 | log:notice("writing output PCAP: " .. args.write) 79 | end 80 | else 81 | getopt:usage() 82 | log:fatal("output must be specified, use -w") 83 | end 84 | local write, writectx = output:receive() 85 | 86 | local clients = {} 87 | local n_present = 0 88 | local n_packets = 0 89 | 90 | local obj, obj_pcap_in, obj_ip, obj_pl, src_ip, ip_len, present 91 | while true do 92 | obj = produce(pctx) 93 | if obj == nil then break end 94 | 95 | ip_len = 4 96 | obj_ip = obj:cast_to(object.IP) 97 | if obj_ip == nil then 98 | obj_ip = obj:cast_to(object.IP6) 99 | ip_len = 16 100 | end 101 | 102 | obj_pl = obj:cast_to(object.PAYLOAD) 103 | obj_pcap_in = obj:cast_to(object.PCAP) 104 | if obj_ip ~= nil and obj_pl ~= nil and obj_pcap_in ~= nil then 105 | src_ip = ffi.string(obj_ip.src, ip_len) 106 | present = clients[src_ip] 107 | if present == nil then 108 | present = math.random() < args.limit 109 | if present then 110 | n_present = n_present + 1 111 | end 112 | clients[src_ip] = present 113 | end 114 | 115 | if present then 116 | write(writectx, obj) 117 | n_packets = n_packets + 1 118 | if n_packets % 10000 == 0 then 119 | check_output() 120 | end 121 | end 122 | end 123 | end 124 | 125 | check_output() 126 | log:info(string.format(" number of clients: %d", n_present)) 127 | -------------------------------------------------------------------------------- /pcap/merge-chunks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import argparse 3 | import os 4 | from pathlib import Path 5 | import random 6 | import sys 7 | 8 | 9 | def positive_int(val): 10 | i = int(val) 11 | if i <= 0: 12 | raise ValueError("must be greater than 0") 13 | return i 14 | 15 | 16 | def readable_directory(path): 17 | po = Path(path) 18 | if not po.is_dir(): 19 | raise ValueError("must be path to directory") 20 | return po 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser( 25 | description="Merge subset of PCAP chunks on the fly and write result to stdout" 26 | ) 27 | 28 | parser.add_argument( 29 | "nchunks", 30 | type=positive_int, 31 | help="Number of chunks to randomly select from source_dirs and merge", 32 | ) 33 | parser.add_argument( 34 | "source_dirs", 35 | nargs="+", 36 | type=readable_directory, 37 | help="Paths to directories with PCAP chunks", 38 | ) 39 | parser.add_argument( 40 | "--seed", 41 | default=0, 42 | type=int, 43 | help="Randomization seed (default: 0); use negative value to turn off randomization", 44 | ) 45 | args = parser.parse_args() 46 | 47 | # reproducible pseudorandomness 48 | random.seed(args.seed, version=2) 49 | 50 | pcaps = [] 51 | for dir_path in args.source_dirs: 52 | pcaps.extend( 53 | str(path) 54 | for path in dir_path.glob("**/*") 55 | if path.is_file() or path.is_fifo() 56 | ) 57 | 58 | if args.nchunks > len(pcaps): 59 | sys.exit(f"{args.nchunks} chunks requested but only {len(pcaps)} available") 60 | 61 | pcaps.sort() 62 | if args.seed >= 0: 63 | random.shuffle(pcaps) 64 | mergecap_args = ["mergecap", "-F", "pcap", "-w", "-"] 65 | mergecap_args.extend(pcaps[: args.nchunks]) 66 | 67 | sys.stderr.write(f"merging {args.nchunks} chunks into PCAP stream on stdout\n") 68 | sys.stderr.write(f"executing merge command: {mergecap_args}\n") 69 | sys.stderr.flush() 70 | 71 | os.execvp("mergecap", mergecap_args) 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /pcap/split-clients.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | -- split-clients.lua: separate clients (IPs) into multiple output files 4 | -- 5 | -- Every unique IP (client) will be assigned to a one output file. 6 | -- All of client's packets remain intact and go into a single file. 7 | 8 | local ffi = require("ffi") 9 | local input = require("dnsjit.input.pcap").new() 10 | local layer = require("dnsjit.filter.layer").new() 11 | local object = require("dnsjit.core.objects") 12 | local log = require("dnsjit.core.log").new("split-clients.lua") 13 | local getopt = require("dnsjit.lib.getopt").new({ 14 | { "r", "read", "", "input file to read", "?" }, 15 | { "O", "outdir", "", "directory for client chunks (must exist)", "?" }, 16 | { "n", "noutputs", 0, "number of output files", "?" }, 17 | { nil, "seed", 0, "seed for RNG", "?" }, 18 | }) 19 | 20 | local SNAPLEN = 66000 21 | local LINKTYPE = 12 -- DLT_RAW in Linux, see https://github.com/the-tcpdump-group/libpcap/blob/master/pcap/dlt.h 22 | 23 | log:enable("all") 24 | 25 | --- Check if a file or directory exists in this path 26 | local function exists(file) 27 | local ok, err, code = os.rename(file, file) 28 | if not ok then 29 | if code == 13 then 30 | -- Permission denied, but it exists 31 | return true 32 | end 33 | end 34 | return ok, err 35 | end 36 | 37 | -- Error out if write failed 38 | local function check_output(output, filename) 39 | if output:have_errors() then 40 | log:fatal("error writting to file %s", filename) 41 | end 42 | end 43 | 44 | -- Parse arguments 45 | local args = {} 46 | getopt:parse() 47 | args.read = getopt:val("r") 48 | args.outdir = getopt:val("O") 49 | args.noutputs = getopt:val("n") 50 | args.seed = getopt:val("seed") 51 | math.randomseed(args.seed) 52 | 53 | -- Display help 54 | if getopt:val("help") then 55 | getopt:usage() 56 | return 57 | end 58 | 59 | -- Prepare output directories 60 | if args.outdir == "" then 61 | getopt:usage() 62 | log:fatal("output directory must be specified, use -O") 63 | elseif not exists(args.outdir .. "/") then 64 | log:fatal("output directory \"" .. args.outdir .. "\" doesn't exist") 65 | end 66 | 67 | -- Check arguments 68 | if args.noutputs <= 1 then 69 | log:fatal("number of output files must be greater than 1") 70 | end 71 | 72 | -- Set up input 73 | if args.read ~= "" then 74 | if input:open_offline(args.read) ~= 0 then 75 | log:fatal("failed to open input PCAP " .. args.read) 76 | end 77 | log:notice("using input PCAP " .. args.read) 78 | else 79 | getopt:usage() 80 | log:fatal("input must be specified, use -r") 81 | end 82 | 83 | layer:producer(input) 84 | local produce, pctx = layer:produce() 85 | 86 | local outputs = {} 87 | local fname_padding = tostring(math.ceil(math.log(args.noutputs, 10))) 88 | for n = 1, args.noutputs do 89 | local output = require("dnsjit.output.pcap").new() 90 | local out_fname = string.format("%s/%0" .. fname_padding 91 | .. "d.pcap", args.outdir, n) 92 | if output:open(out_fname, LINKTYPE, SNAPLEN) ~= 0 then 93 | log:fatal("failed to open chunk file " .. out_fname) 94 | else 95 | log:notice("created output PCAP: " .. out_fname) 96 | end 97 | 98 | outputs[n] = { fn = out_fname, output = output, nclients = 0, npackets = 0 } 99 | outputs[n]['write'], outputs[n]['writectx'] = output:receive() 100 | end 101 | 102 | local nclients = 0 103 | local client2output = {} 104 | 105 | local npackets = 0 106 | local ip_len = 16 107 | local obj, obj_ip, output_id, src_ip 108 | while true do 109 | obj = produce(pctx) 110 | if obj == nil then break end 111 | npackets = npackets + 1 112 | 113 | obj_ip = obj:cast_to(object.IP6) 114 | assert(obj_ip ~= nil, 'no IPv6 header found, use output from ' 115 | .. 'extract-clients.lua as input for this script') 116 | 117 | src_ip = ffi.string(obj_ip.src, ip_len) 118 | output_id = client2output[src_ip] 119 | if output_id == nil then 120 | output_id = math.random(1, args.noutputs) 121 | client2output[src_ip] = output_id 122 | outputs[output_id]['nclients'] = outputs[output_id]['nclients'] + 1 123 | nclients = nclients + 1 124 | end 125 | local output_tab = outputs[output_id] 126 | output_tab.write(output_tab.writectx, obj) 127 | output_tab.npackets = output_tab.npackets + 1 128 | if output_tab.npackets % 10000 == 0 then 129 | check_output(output_tab.output, output_tab.fn) 130 | end 131 | end 132 | 133 | if npackets == 0 then 134 | log:fatal("no input packets processed!") 135 | else 136 | log:info("processed %0.f input packets", npackets) 137 | end 138 | for _, output in pairs(outputs) do 139 | check_output(output['output'], output['fn']) 140 | log:info("%s: clients: %0.f packets: %0.f", output['fn'], output['nclients'], output['npackets']) 141 | end 142 | 143 | -- stats for sanity checks: min, max, and (deviation / average * 100) for clients and packets 144 | local stats = {} 145 | local avgs = {nclients = nclients / args.noutputs, npackets = npackets / args.noutputs} 146 | for _, stat_name in pairs({'nclients', 'npackets'}) do 147 | stats[stat_name] = {abs = {min=tonumber('inf'), max=tonumber('-inf')}, err = {}} 148 | local abs = stats[stat_name].abs -- absolute values 149 | local err = stats[stat_name].err -- deviation from per-output average as float 150 | for func, _ in pairs(abs) do 151 | for _, output in pairs(outputs) do 152 | abs[func] = math[func](abs[func], output[stat_name]) 153 | end 154 | end 155 | local avg = avgs[stat_name] 156 | -- procentual deviations from average (expected value) 157 | for func, _ in pairs(abs) do 158 | err[func] = (abs[func] - avg) / avg 159 | end 160 | end 161 | for stat_name, _ in pairs(stats) do 162 | log:notice("deviation from average number of %s in range <%0.1f, %0.1f> %% per " 163 | .. "output file (average %0.f)", stat_name, 164 | stats[stat_name].err.min * 100, 165 | stats[stat_name].err.max * 100, 166 | avgs[stat_name]) 167 | end 168 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | 3 | disable= 4 | missing-docstring, 5 | too-few-public-methods, 6 | too-many-arguments, 7 | too-many-locals, 8 | too-many-return-statements, 9 | too-many-branches, 10 | fixme, 11 | unused-import, # checked by flake8 12 | line-too-long, # checked by flake8 13 | invalid-name, 14 | broad-except, 15 | global-statement, 16 | no-else-return, 17 | duplicate-code, 18 | 19 | 20 | [SIMILARITIES] 21 | min-similarity-lines=6 22 | ignore-comments=yes 23 | ignore-docstrings=yes 24 | ignore-imports=no 25 | 26 | [DESIGN] 27 | max-parents=10 28 | -------------------------------------------------------------------------------- /replay/dnssim/.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | 54 | # Automake 55 | Makefile.in 56 | aclocal.m4 57 | ar-lib 58 | autom4te.cache 59 | compile 60 | config.guess 61 | config.sub 62 | configure 63 | depcomp 64 | install-sh 65 | ltmain.sh 66 | m4/libtool.m4 67 | m4/ltoptions.m4 68 | m4/ltsugar.m4 69 | m4/ltversion.m4 70 | m4/lt~obsolete.m4 71 | missing 72 | config.h.in 73 | config.h.in~ 74 | test-driver 75 | 76 | # Configure 77 | Makefile 78 | config.log 79 | config.status 80 | libtool 81 | .libs 82 | .deps 83 | src/config.h 84 | src/stamp-h1 85 | build* 86 | .dirstamp 87 | 88 | # Project specific files 89 | -------------------------------------------------------------------------------- /replay/dnssim/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | dnssim forever 2 | ============== 3 | Since dnssim is now a part of Shotgun, all future changes will be logged in 4 | Shotgun's `NEWS` file. 5 | 6 | dnssim v20210714 7 | ================ 8 | - dnssim was moved from dnsjit to shotgun, this implies new 9 | lua name: "shotgun.output.dnssim" 10 | 11 | dnssim v20210129 12 | ================ 13 | 14 | - Added DNS-over-HTTPS support with https2() 15 | - Added IPv4 support 16 | - Abort operation on insufficient file descriptors 17 | - Match QUESTION section of received responses 18 | - Improvements in connection state handling 19 | - Deprecate udp_only() in favor of udp() 20 | - Allow setting logger name with log(name) 21 | - Added check_version() and check_json_version() 22 | 23 | dnssim v20200723 24 | ================ 25 | 26 | - First released dnssim version with UDP, TCP and DoT support 27 | -------------------------------------------------------------------------------- /replay/dnssim/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(dnssim C) 3 | 4 | include(CheckIncludeFile) 5 | include(CheckIncludeFiles) 6 | include(CheckFunctionExists) 7 | 8 | option(DNSJIT_PATH "Path to dnsjit prefix" "") 9 | option(ASAN "Use AddressSanitizer" OFF) 10 | option(UBSAN "Use UndefinedBehaviorSanitizer" OFF) 11 | option(USE_SYSTEM_NGTCP2 "Use ngtcp2 from the system" OFF) 12 | 13 | 14 | ## Dependencies ################################################################ 15 | 16 | # GnuTLS 17 | find_package(GnuTLS REQUIRED) 18 | list(APPEND DNSSIM_LIBS GnuTLS::GnuTLS) 19 | 20 | # LibUV 21 | find_library(LIBUV_LIB libuv.so REQUIRED) 22 | list(APPEND DNSSIM_LIBS ${LIBUV_LIB}) 23 | check_include_file(uv.h HAS_UV_H) 24 | if (NOT HAS_UV_H) 25 | message(FATAL_ERROR "LibUV is required") 26 | endif () 27 | 28 | # nghttp2 29 | find_library(NGHTTP2_LIB libnghttp2.so REQUIRED) 30 | list(APPEND DNSSIM_LIBS ${NGHTTP2_LIB}) 31 | check_include_file(nghttp2/nghttp2.h HAS_NGHTTP2_H) 32 | if (NOT HAS_NGHTTP2_H) 33 | message(FATAL_ERROR "nghttp2 is required") 34 | endif () 35 | 36 | # ngtcp2 37 | if (USE_SYSTEM_NGTCP2) 38 | find_library(NGTCP2_LIB libngtcp2.so REQUIRED) 39 | list(APPEND DNSSIM_LIBS ${NGTCP2_LIB}) 40 | find_library(NGTCP2_GTLS_LIB libngtcp2_crypto_gnutls.so REQUIRED) 41 | list(APPEND DNSSIM_LIBS ${NGTCP2_GTLS_LIB}) 42 | check_include_files("ngtcp2/ngtcp2.h;ngtcp2/ngtcp2_crypto.h;ngtcp2/ngtcp2_crypto_gnutls.h" 43 | HAS_NGTCP2_H) 44 | 45 | if (NOT HAS_NGTCP2_H) 46 | message(FATAL_ERROR "ngtcp2 is required (and set to use system)") 47 | endif () 48 | else () 49 | function (add_ngtcp2) # wrapped in a function to ensure scope 50 | list(APPEND CMAKE_C_FLAGS -fPIC) 51 | set(ENABLE_STATIC_LIB ON) 52 | set(ENABLE_GNUTLS ON) 53 | set(ENABLE_QUICTLS OFF) 54 | set(ENABLE_OPENSSL OFF) 55 | add_subdirectory(vendor/ngtcp2 EXCLUDE_FROM_ALL SYSTEM) 56 | endfunction () 57 | add_ngtcp2() 58 | list(APPEND DNSSIM_LIBS ngtcp2_static) 59 | list(APPEND DNSSIM_LIBS ngtcp2_crypto_gnutls_static) 60 | endif () 61 | 62 | # dnsjit 63 | if (DNSJIT_PATH) 64 | include_directories(${DNSJIT_PATH}/include) 65 | endif () 66 | check_include_file(dnsjit/version.h HAS_DNSJIT_H) 67 | if (NOT HAS_DNSJIT_H) 68 | message(FATAL_ERROR "dnsjit is required (you may use DNSJIT_PATH to specify its install prefix)") 69 | endif () 70 | 71 | # syscalls 72 | check_function_exists(clock_gettime HAS_CLOCK_GETTIME) 73 | check_function_exists(clock_nanosleep HAS_CLOCK_NANOSLEEP) 74 | if (NOT HAS_CLOCK_GETTIME OR NOT HAS_CLOCK_NANOSLEEP) 75 | message(FATAL_ERROR "clock_gettime() and clock_nanosleep() are required") 76 | endif () 77 | 78 | 79 | ## Sanitizers ################################################################## 80 | 81 | if (ASAN) 82 | list(APPEND DNSSIM_SANITIZERS address) 83 | endif () 84 | if (UBSAN) 85 | list(APPEND DNSSIM_SANITIZERS undefined) 86 | endif () 87 | 88 | if (DNSSIM_SANITIZERS) 89 | list(JOIN DNSSIM_SANITIZERS "," DNSSIM_SANITIZERS_JOINED) 90 | list(APPEND CMAKE_C_FLAGS -fsanitize=${DNSSIM_SANITIZERS_JOINED}) 91 | endif () 92 | 93 | 94 | ## DNSSIM ###################################################################### 95 | 96 | list(APPEND DNSSIM_OPTS -fno-strict-aliasing -Wall) 97 | include_directories(src) 98 | add_library(dnssim SHARED 99 | src/output/dnssim.c 100 | src/output/dnssim/common.c 101 | src/output/dnssim/connection.c 102 | src/output/dnssim/https2.c 103 | src/output/dnssim/quic.c 104 | src/output/dnssim/tcp.c 105 | src/output/dnssim/tls.c 106 | src/output/dnssim/udp.c 107 | ) 108 | target_link_libraries(dnssim PUBLIC ${DNSSIM_LIBS}) 109 | set_target_properties(dnssim PROPERTIES 110 | PREFIX "") 111 | target_link_options(dnssim PUBLIC ${DNSSIM_OPTS}) 112 | target_compile_options(dnssim PUBLIC ${DNSSIM_OPTS}) 113 | 114 | 115 | ## Install ##################################################################### 116 | 117 | install(TARGETS dnssim 118 | DESTINATION lib/lua/5.1/shotgun-output-dnssim/) 119 | install(FILES src/output/dnssim.lua 120 | DESTINATION share/lua/5.1/shotgun/output/) 121 | -------------------------------------------------------------------------------- /replay/dnssim/README.md: -------------------------------------------------------------------------------- 1 | # shotgun.output.dnssim 2 | -------------------------------------------------------------------------------- /replay/dnssim/src/output/dnssim.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) CZ.NIC, z.s.p.o. 2 | * SPDX-License-Identifier: GPL-3.0-or-later 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #ifndef __dnsjit_output_dnssim_h 9 | #define __dnsjit_output_dnssim_h 10 | 11 | #include 12 | 13 | typedef enum output_dnssim_transport { 14 | OUTPUT_DNSSIM_TRANSPORT_UDP_ONLY, 15 | OUTPUT_DNSSIM_TRANSPORT_UDP, 16 | OUTPUT_DNSSIM_TRANSPORT_TCP, 17 | OUTPUT_DNSSIM_TRANSPORT_TLS, 18 | OUTPUT_DNSSIM_TRANSPORT_HTTPS2, 19 | OUTPUT_DNSSIM_TRANSPORT_QUIC, 20 | } output_dnssim_transport_t; 21 | 22 | typedef enum output_dnssim_h2_method { 23 | OUTPUT_DNSSIM_H2_GET, 24 | OUTPUT_DNSSIM_H2_POST 25 | } output_dnssim_h2_method_t; 26 | 27 | typedef struct output_dnssim_stats output_dnssim_stats_t; 28 | struct output_dnssim_stats { 29 | output_dnssim_stats_t* prev; 30 | output_dnssim_stats_t* next; 31 | 32 | uint64_t* latency; 33 | 34 | uint64_t since_ms; 35 | uint64_t until_ms; 36 | 37 | uint64_t requests; 38 | uint64_t ongoing; 39 | uint64_t answers; 40 | 41 | /* Number of connections that are open at the end of the stats interval. */ 42 | uint64_t conn_active; 43 | 44 | /* Number of TCP/QUIC connection handshake attempts during the stats interval. */ 45 | uint64_t conn_handshakes; 46 | 47 | /* Number of connections that have been resumed with TLS session resumption. */ 48 | uint64_t conn_resumed; 49 | 50 | /* Number of QUIC connections that have used 0-RTT transport parameters to 51 | * initiate a new connection. */ 52 | uint64_t conn_quic_0rtt_loaded; 53 | uint64_t quic_0rtt_sent; 54 | uint64_t quic_0rtt_answered; 55 | 56 | /* Number of timed out connection handshakes during the stats interval. */ 57 | uint64_t conn_handshakes_failed; 58 | 59 | uint64_t rcode_noerror; 60 | uint64_t rcode_formerr; 61 | uint64_t rcode_servfail; 62 | uint64_t rcode_nxdomain; 63 | uint64_t rcode_notimp; 64 | uint64_t rcode_refused; 65 | uint64_t rcode_yxdomain; 66 | uint64_t rcode_yxrrset; 67 | uint64_t rcode_nxrrset; 68 | uint64_t rcode_notauth; 69 | uint64_t rcode_notzone; 70 | uint64_t rcode_badvers; 71 | uint64_t rcode_badkey; 72 | uint64_t rcode_badtime; 73 | uint64_t rcode_badmode; 74 | uint64_t rcode_badname; 75 | uint64_t rcode_badalg; 76 | uint64_t rcode_badtrunc; 77 | uint64_t rcode_badcookie; 78 | uint64_t rcode_other; 79 | }; 80 | 81 | typedef struct output_dnssim { 82 | core_log_t _log; 83 | 84 | uint64_t processed; 85 | uint64_t discarded; 86 | uint64_t ongoing; 87 | 88 | output_dnssim_stats_t* stats_sum; 89 | output_dnssim_stats_t* stats_current; 90 | output_dnssim_stats_t* stats_first; 91 | 92 | size_t zero_rtt_data_initial_capacity; 93 | 94 | size_t max_clients; 95 | bool free_after_use; 96 | bool zero_rtt; 97 | 98 | uint64_t timeout_ms; 99 | uint64_t idle_timeout_ms; 100 | uint64_t handshake_timeout_ms; 101 | uint64_t stats_interval_ms; 102 | } output_dnssim_t; 103 | 104 | core_log_t* output_dnssim_log(); 105 | 106 | output_dnssim_t* output_dnssim_new(size_t max_clients); 107 | void output_dnssim_free(output_dnssim_t* self); 108 | 109 | void output_dnssim_log_name(output_dnssim_t* self, const char* name); 110 | void output_dnssim_set_transport(output_dnssim_t* self, output_dnssim_transport_t tr); 111 | int output_dnssim_target(output_dnssim_t* self, const char* ip, uint16_t port); 112 | int output_dnssim_bind(output_dnssim_t* self, const char* ip); 113 | int output_dnssim_tls_priority(output_dnssim_t* self, const char* priority, bool is_quic); 114 | int output_dnssim_run_nowait(output_dnssim_t* self); 115 | void output_dnssim_timeout_ms(output_dnssim_t* self, uint64_t timeout_ms); 116 | void output_dnssim_h2_uri_path(output_dnssim_t* self, const char* uri_path); 117 | void output_dnssim_h2_method(output_dnssim_t* self, const char* method); 118 | void output_dnssim_h2_zero_out_msgid(output_dnssim_t* self, bool zero_out_msgid); 119 | void output_dnssim_stats_collect(output_dnssim_t* self, uint64_t interval_ms); 120 | void output_dnssim_stats_finish(output_dnssim_t* self); 121 | 122 | core_receiver_t output_dnssim_receiver(); 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /replay/dnssim/src/output/dnssim/ll.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2019-2021 CZ.NIC, z.s.p.o. 2 | * SPDX-License-Identifier: GPL-3.0-or-later 3 | */ 4 | 5 | #ifndef __dnsjit_output_dnssim_ll_h 6 | #define __dnsjit_output_dnssim_ll_h 7 | 8 | #include 9 | 10 | /* Utility macros for linked list structures. 11 | * 12 | * - "list" is the pointer to the first node of the linked list 13 | * - "list" can be NULL if there are no nodes 14 | * - every node has "next", which points to the next node (can be NULL) 15 | */ 16 | 17 | /* Append a node to the list. 18 | * 19 | * Only a single node can be appended - node->next must be NULL. 20 | */ 21 | #define _ll_append(list, node) \ 22 | { \ 23 | glassert((node)->next == NULL, "node->next must be null when appending"); \ 24 | if ((list) == NULL) \ 25 | (list) = (node); \ 26 | else if ((node) != NULL) { \ 27 | typeof(list) _current = (list); \ 28 | while (_current->next != NULL) \ 29 | _current = _current->next; \ 30 | _current->next = node; \ 31 | } \ 32 | } 33 | 34 | /* Remove a node from the list. 35 | * 36 | * In strict mode, the node must be present in the list. 37 | */ 38 | #define _ll_remove_template(list, currname, cond, strict, once, dealloc) \ 39 | do { \ 40 | if (strict) \ 41 | glassert((list), "list can't be null when removing nodes"); \ 42 | if ((list) != NULL) { \ 43 | bool _removed = false; \ 44 | typeof(list)* currname = &(list); \ 45 | while (*currname) { \ 46 | if ((cond)) { \ 47 | typeof(list) _c = *currname; \ 48 | (*currname) = _c->next; \ 49 | _c->next = NULL; \ 50 | _removed = true; \ 51 | if ((dealloc)) \ 52 | free(_c); \ 53 | if ((once)) \ 54 | break; \ 55 | } else { \ 56 | currname = &(*currname)->next; \ 57 | } \ 58 | } \ 59 | if (!_removed && (strict)) \ 60 | glfatal("list doesn't contain the node to be removed"); \ 61 | } \ 62 | } while (0) 63 | 64 | #define _ll_remove_node_template(list, node, strict) \ 65 | _ll_remove_template((list), curr, *curr == (node), strict, true, false) 66 | 67 | /* Remove the specified node from the list. */ 68 | #define _ll_remove(list, node) \ 69 | _ll_remove_node_template((list), (node), true) 70 | 71 | /* Remove the specified node from the list if it's present. */ 72 | #define _ll_try_remove(list, node) \ 73 | _ll_remove_node_template((list), (node), false) 74 | 75 | /* Remove all nodes for which `cond` is `true`. Here, `currname` is the name of 76 | * the pointer to the node currently checked by `cond`. I.e. in the first case, 77 | * `currname` will be `&list`, then `&list->next`, then `&list->next->next` etc. 78 | * 79 | * For `currname = c`, `cond` may be e.g. `(*c)->qry == qry`. */ 80 | #define _ll_remove_cond(list, currname, cond, dealloc) \ 81 | _ll_remove_template((list), currname, (cond), false, false, (dealloc)) 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /replay/dnssim/src/output/dnssim/tcp.c: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2019-2021 CZ.NIC, z.s.p.o. 2 | * SPDX-License-Identifier: GPL-3.0-or-later 3 | */ 4 | 5 | #include "output/dnssim.h" 6 | #include "output/dnssim/internal.h" 7 | #include "output/dnssim/ll.h" 8 | 9 | #include 10 | 11 | static core_log_t _log = LOG_T_INIT("output.dnssim"); 12 | 13 | static void _on_tcp_closed(uv_handle_t* handle) 14 | { 15 | _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)handle->data; 16 | mlassert(conn, "conn is nil"); 17 | mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn must have tcp transport type"); 18 | conn->state = _OUTPUT_DNSSIM_CONN_CLOSED; 19 | 20 | /* Orphan any queries that are still unresolved. */ 21 | _output_dnssim_conn_move_queries_to_pending((_output_dnssim_query_stream_t**)&conn->queued); 22 | _output_dnssim_conn_move_queries_to_pending((_output_dnssim_query_stream_t**)&conn->sent); 23 | 24 | /* TODO Improve client re-connect behavior in case the connection fails to 25 | * establish. Currently, queries are orphaned and attempted to be re-sent 26 | * along with the next query that triggers a new connection. 27 | * 28 | * Attempting to establish new connection immediately leads to performance 29 | * issues if the number of these attempts doesn't have upper limit. */ 30 | ///* Ensure orhpaned queries are re-sent over a different connection. */ 31 | //if (_output_dnssim_handle_pending_queries(conn->client) != 0) 32 | // mlinfo("tcp: orphaned queries failed to be re-sent"); 33 | 34 | mlassert(conn->transport.tcp, "conn must have tcp handle when closing it"); 35 | free(conn->transport.tcp); 36 | conn->transport.tcp = NULL; 37 | _output_dnssim_conn_maybe_free(conn); 38 | } 39 | 40 | static void _on_tcp_query_written(uv_write_t* wr_req, int status) 41 | { 42 | _output_dnssim_query_stream_t* qry = (_output_dnssim_query_stream_t*)wr_req->data; 43 | mlassert(qry, "qry/wr_req->data is nil"); 44 | mlassert(qry->conn, "query must be associated with connection"); 45 | _output_dnssim_connection_t* conn = qry->conn; 46 | 47 | free(((_output_dnssim_query_stream_t*)qry)->bufs[0].base); 48 | 49 | if (qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_CLOSE) { 50 | qry->qry.state = status < 0 ? _OUTPUT_DNSSIM_QUERY_WRITE_FAILED : _OUTPUT_DNSSIM_QUERY_SENT; 51 | _output_dnssim_request_t* req = qry->qry.req; 52 | _output_dnssim_close_query_tcp(qry); 53 | _output_dnssim_maybe_free_request(req); 54 | qry = NULL; 55 | } 56 | 57 | if (status < 0) { 58 | if (status != UV_ECANCELED) 59 | mlinfo("tcp write failed: %s", uv_strerror(status)); 60 | if (qry != NULL) 61 | qry->qry.state = _OUTPUT_DNSSIM_QUERY_WRITE_FAILED; 62 | _output_dnssim_conn_close(conn); 63 | return; 64 | } 65 | 66 | if (qry == NULL) 67 | return; 68 | 69 | /* Mark query as sent and assign it to connection. */ 70 | mlassert(qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_WRITE_CB, "invalid query state"); 71 | qry->qry.state = _OUTPUT_DNSSIM_QUERY_SENT; 72 | if (qry->conn->state == _OUTPUT_DNSSIM_CONN_ACTIVE) { 73 | mlassert(qry->conn->queued, "conn has no queued queries"); 74 | _ll_remove(qry->conn->queued, &qry->qry); 75 | _ll_append(qry->conn->sent, &qry->qry); 76 | } 77 | } 78 | 79 | void _output_dnssim_tcp_write_query(_output_dnssim_connection_t* conn, _output_dnssim_query_stream_t* qry) 80 | { 81 | mlassert(qry, "qry can't be null"); 82 | mlassert(qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_WRITE, "qry must be pending write"); 83 | mlassert(qry->qry.req, "req can't be null"); 84 | mlassert(qry->qry.req->dns_q, "dns_q can't be null"); 85 | mlassert(qry->qry.req->dns_q->obj_prev, "payload can't be null"); 86 | mlassert(conn, "conn can't be null"); 87 | mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn transport type must be tcp"); 88 | mlassert(conn->state == _OUTPUT_DNSSIM_CONN_ACTIVE, "connection state != ACTIVE"); 89 | mlassert(conn->client, "conn must be associated with client"); 90 | mlassert(conn->client->pending, "conn has no pending queries"); 91 | 92 | mldebug("tcp write dnsmsg id: %04x", qry->qry.req->dns_q->id); 93 | 94 | core_object_payload_t* payload = (core_object_payload_t*)qry->qry.req->dns_q->obj_prev; 95 | uint16_t* len; 96 | mlfatal_oom(len = malloc(sizeof(uint16_t))); 97 | *len = htons(payload->len); 98 | qry->bufs[0] = uv_buf_init((char*)len, 2); 99 | qry->bufs[1] = uv_buf_init((char*)payload->payload, payload->len); 100 | 101 | qry->conn = conn; 102 | _ll_remove(conn->client->pending, &qry->qry); 103 | _ll_append(conn->queued, &qry->qry); 104 | 105 | /* Stop idle timer, since there are queries to answer now. */ 106 | if (conn->idle_timer != NULL) { 107 | conn->is_idle = false; 108 | uv_timer_stop(conn->idle_timer); 109 | } 110 | 111 | qry->write_req.data = (void*)qry; 112 | uv_write(&qry->write_req, (uv_stream_t*)conn->transport.tcp, qry->bufs, 2, _on_tcp_query_written); 113 | qry->qry.state = _OUTPUT_DNSSIM_QUERY_PENDING_WRITE_CB; 114 | } 115 | 116 | static void _on_tcp_read(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf) 117 | { 118 | _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)handle->data; 119 | output_dnssim_t* self = conn->client->dnssim; 120 | 121 | if (nread > 0) { 122 | mldebug("tcp nread: %d", nread); 123 | switch (_self->transport) { 124 | case OUTPUT_DNSSIM_TRANSPORT_TCP: 125 | _output_dnssim_read_dns_stream(conn, nread, buf->base, -1); 126 | break; 127 | case OUTPUT_DNSSIM_TRANSPORT_TLS: 128 | case OUTPUT_DNSSIM_TRANSPORT_HTTPS2: 129 | #if DNSSIM_HAS_GNUTLS 130 | mlassert(conn->tls, "con must have tls ctx"); 131 | conn->tls->buf = (uint8_t*)buf->base; 132 | conn->tls->buf_pos = 0; 133 | conn->tls->buf_len = nread; 134 | _output_dnssim_tls_process_input_data(conn); 135 | #else 136 | mlfatal(DNSSIM_MIN_GNUTLS_ERRORMSG); 137 | #endif 138 | break; 139 | default: 140 | mlfatal("unsupported transport"); 141 | break; 142 | } 143 | } else if (nread < 0) { 144 | if (nread != UV_EOF) { 145 | mlinfo("tcp conn unexpected close: %s", uv_strerror(nread)); 146 | _output_dnssim_conn_close(conn); 147 | } else { 148 | _output_dnssim_conn_bye(conn); 149 | } 150 | } 151 | 152 | if (buf->base != NULL) 153 | free(buf->base); 154 | } 155 | 156 | static void _on_tcp_connected(uv_connect_t* conn_req, int status) 157 | { 158 | _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)conn_req->handle->data; 159 | mlassert(conn, "conn is nil"); 160 | mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn transport type must be tcp"); 161 | 162 | free(conn_req); 163 | 164 | if (status < 0) { 165 | mldebug("tcp connect failed: %s", uv_strerror(status)); 166 | _output_dnssim_conn_close(conn); 167 | return; 168 | } 169 | 170 | mlassert(conn->state == _OUTPUT_DNSSIM_CONN_TRANSPORT_HANDSHAKE, "connection state != TCP_HANDSHAKE"); 171 | int ret = uv_read_start((uv_stream_t*)conn->transport.tcp, _output_dnssim_on_uv_alloc, _on_tcp_read); 172 | if (ret < 0) { 173 | mlwarning("tcp uv_read_start() failed: %s", uv_strerror(ret)); 174 | _output_dnssim_conn_close(conn); 175 | return; 176 | } 177 | 178 | mldebug("tcp connected"); 179 | mlassert(conn->client, "conn must be associated with a client"); 180 | mlassert(conn->client->dnssim, "client must be associated with dnssim"); 181 | output_dnssim_t* self = conn->client->dnssim; 182 | switch (_self->transport) { 183 | case OUTPUT_DNSSIM_TRANSPORT_TCP: 184 | _output_dnssim_conn_activate(conn); 185 | break; 186 | case OUTPUT_DNSSIM_TRANSPORT_TLS: 187 | case OUTPUT_DNSSIM_TRANSPORT_HTTPS2: 188 | #if DNSSIM_HAS_GNUTLS 189 | mldebug("init tls handshake"); 190 | _output_dnssim_tls_process_input_data(conn); /* Initiate TLS handshake. */ 191 | #else 192 | mlfatal(DNSSIM_MIN_GNUTLS_ERRORMSG); 193 | #endif 194 | break; 195 | default: 196 | lfatal("unsupported transport protocol"); 197 | break; 198 | } 199 | } 200 | 201 | static void _on_connection_timeout(uv_timer_t* handle) 202 | { 203 | _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)handle->data; 204 | _output_dnssim_conn_close(conn); 205 | } 206 | 207 | int _output_dnssim_tcp_connect(output_dnssim_t* self, _output_dnssim_connection_t* conn) 208 | { 209 | mlassert_self(); 210 | lassert(conn, "connection can't be null"); 211 | lassert(conn->transport.tcp == NULL, "connection already has a handle"); 212 | lassert(conn->handshake_timer == NULL, "connection already has a handshake timer"); 213 | lassert(conn->idle_timer == NULL, "connection already has idle timer"); 214 | lassert(conn->state == _OUTPUT_DNSSIM_CONN_INITIALIZED, "connection state != INITIALIZED"); 215 | 216 | conn->transport_type = _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP; 217 | lfatal_oom(conn->transport.tcp = malloc(sizeof(uv_tcp_t))); 218 | conn->transport.tcp->data = (void*)conn; 219 | int ret = uv_tcp_init(&_self->loop, conn->transport.tcp); 220 | if (ret < 0) { 221 | lwarning("failed to init uv_tcp_t"); 222 | goto failure; 223 | } 224 | 225 | ret = _output_dnssim_bind_before_connect(self, (uv_handle_t*)conn->transport.tcp); 226 | if (ret < 0) 227 | goto failure; 228 | 229 | /* Set connection parameters. */ 230 | ret = uv_tcp_nodelay(conn->transport.tcp, 1); 231 | if (ret < 0) 232 | lwarning("tcp: failed to set TCP_NODELAY: %s", uv_strerror(ret)); 233 | 234 | /* Set connection handshake timeout. */ 235 | lfatal_oom(conn->handshake_timer = malloc(sizeof(uv_timer_t))); 236 | uv_timer_init(&_self->loop, conn->handshake_timer); 237 | conn->handshake_timer->data = (void*)conn; 238 | uv_timer_start(conn->handshake_timer, _on_connection_timeout, self->handshake_timeout_ms, 0); 239 | 240 | /* Set idle connection timer. */ 241 | if (self->idle_timeout_ms > 0) { 242 | lfatal_oom(conn->idle_timer = malloc(sizeof(uv_timer_t))); 243 | uv_timer_init(&_self->loop, conn->idle_timer); 244 | conn->idle_timer->data = (void*)conn; 245 | 246 | /* Start and stop the timer to set the repeat value without running the timer. */ 247 | uv_timer_start(conn->idle_timer, _on_connection_timeout, self->idle_timeout_ms, self->idle_timeout_ms); 248 | uv_timer_stop(conn->idle_timer); 249 | } 250 | 251 | mldebug("tcp connecting"); 252 | uv_connect_t* conn_req; 253 | lfatal_oom(conn_req = malloc(sizeof(uv_connect_t))); 254 | ret = uv_tcp_connect(conn_req, conn->transport.tcp, (struct sockaddr*)&_self->target, _on_tcp_connected); 255 | if (ret < 0) 256 | goto failure; 257 | 258 | conn->stats->conn_handshakes++; 259 | self->stats_sum->conn_handshakes++; 260 | conn->state = _OUTPUT_DNSSIM_CONN_TRANSPORT_HANDSHAKE; 261 | return 0; 262 | failure: 263 | _output_dnssim_conn_close(conn); 264 | return ret; 265 | } 266 | 267 | void _output_dnssim_tcp_close(_output_dnssim_connection_t* conn) 268 | { 269 | mlassert(conn, "conn can't be nil"); 270 | mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn transport type must be tcp"); 271 | 272 | if (conn->transport.tcp != NULL) { 273 | uv_read_stop((uv_stream_t*)conn->transport.tcp); 274 | uv_close((uv_handle_t*)conn->transport.tcp, _on_tcp_closed); 275 | } 276 | } 277 | 278 | int _output_dnssim_create_query_tcp(output_dnssim_t* self, _output_dnssim_request_t* req) 279 | { 280 | mlassert_self(); 281 | lassert(req, "req is nil"); 282 | lassert(req->client, "request must have a client associated with it"); 283 | 284 | _output_dnssim_query_stream_t* qry; 285 | 286 | lfatal_oom(qry = calloc(1, sizeof(_output_dnssim_query_stream_t))); 287 | 288 | qry->qry.transport = OUTPUT_DNSSIM_TRANSPORT_TCP; 289 | qry->qry.req = req; 290 | qry->qry.state = _OUTPUT_DNSSIM_QUERY_PENDING_WRITE; 291 | req->qry = &qry->qry; // TODO change when adding support for multiple Qs for req 292 | _ll_append(req->client->pending, &qry->qry); 293 | 294 | return _output_dnssim_handle_pending_queries(req->client); 295 | } 296 | 297 | void _output_dnssim_close_query_tcp(_output_dnssim_query_stream_t* qry) 298 | { 299 | mlassert(qry, "qry can't be null"); 300 | mlassert(qry->qry.req, "query must be part of a request"); 301 | _output_dnssim_request_t* req = qry->qry.req; 302 | mlassert(req->client, "request must belong to a client"); 303 | 304 | if ((qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_WRITE_CB || qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_CLOSE)) { 305 | /* Query can't be freed until uv callback is called. */ 306 | qry->qry.state = _OUTPUT_DNSSIM_QUERY_PENDING_CLOSE; 307 | return; 308 | } 309 | 310 | _ll_try_remove(req->client->pending, &qry->qry); 311 | if (qry->conn) { 312 | _output_dnssim_connection_t* conn = qry->conn; 313 | _ll_try_remove(conn->queued, &qry->qry); /* edge-case of cancelled queries */ 314 | _ll_try_remove(conn->sent, &qry->qry); 315 | qry->conn = NULL; 316 | _output_dnssim_conn_idle(conn); 317 | } 318 | 319 | _ll_remove(req->qry, &qry->qry); 320 | free(qry); 321 | } 322 | -------------------------------------------------------------------------------- /replay/dnssim/src/output/dnssim/udp.c: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2019-2021 CZ.NIC, z.s.p.o. 2 | * SPDX-License-Identifier: GPL-3.0-or-later 3 | */ 4 | 5 | #include "output/dnssim.h" 6 | #include "output/dnssim/internal.h" 7 | #include "output/dnssim/ll.h" 8 | 9 | static core_log_t _log = LOG_T_INIT("output.dnssim"); 10 | 11 | static int _process_udp_response(uv_udp_t* handle, ssize_t nread, const uv_buf_t* buf) 12 | { 13 | _output_dnssim_query_udp_t* qry = (_output_dnssim_query_udp_t*)handle->data; 14 | _output_dnssim_request_t* req; 15 | core_object_payload_t payload = CORE_OBJECT_PAYLOAD_INIT(NULL); 16 | core_object_dns_t dns_a = CORE_OBJECT_DNS_INIT(&payload); 17 | mlassert(qry, "qry is nil"); 18 | mlassert(qry->qry.req, "query must be part of a request"); 19 | req = qry->qry.req; 20 | 21 | payload.payload = (uint8_t*)buf->base; 22 | payload.len = nread; 23 | 24 | dns_a.obj_prev = (core_object_t*)&payload; 25 | int ret = core_object_dns_parse_header(&dns_a); 26 | if (ret != 0) { 27 | mldebug("udp response malformed"); 28 | return _ERR_MALFORMED; 29 | } 30 | if (dns_a.id != req->dns_q->id) { 31 | mldebug("udp response msgid mismatch %x(q) != %x(a)", req->dns_q->id, dns_a.id); 32 | return _ERR_MSGID; 33 | } 34 | if (dns_a.tc == 1) { 35 | mldebug("udp response has TC=1"); 36 | return _ERR_TC; 37 | } 38 | ret = _output_dnssim_answers_request(req, &dns_a); 39 | if (ret != 0) { 40 | mlwarning("udp reponse question mismatch"); 41 | return _ERR_QUESTION; 42 | } 43 | 44 | _output_dnssim_request_answered(req, &dns_a, false); 45 | return 0; 46 | } 47 | 48 | static void _on_udp_query_recv(uv_udp_t* handle, ssize_t nread, const uv_buf_t* buf, const struct sockaddr* addr, unsigned flags) 49 | { 50 | if (nread > 0) { 51 | mldebug("udp recv: %d", nread); 52 | 53 | // TODO handle TC=1 54 | _process_udp_response(handle, nread, buf); 55 | } 56 | 57 | if (buf->base != NULL) { 58 | free(buf->base); 59 | } 60 | } 61 | 62 | static void _on_query_udp_closed(uv_handle_t* handle) 63 | { 64 | _output_dnssim_query_udp_t* qry = (_output_dnssim_query_udp_t*)handle->data; 65 | _output_dnssim_request_t* req; 66 | mlassert(qry, "qry is nil"); 67 | mlassert(qry->qry.req, "query must be part of a request"); 68 | req = qry->qry.req; 69 | 70 | free(qry->handle); 71 | 72 | _ll_remove(req->qry, &qry->qry); 73 | free(qry); 74 | 75 | if (req->qry == NULL) 76 | _output_dnssim_maybe_free_request(req); 77 | } 78 | 79 | void _output_dnssim_close_query_udp(_output_dnssim_query_udp_t* qry) 80 | { 81 | int ret; 82 | mlassert(qry, "qry is nil"); 83 | 84 | ret = uv_udp_recv_stop(qry->handle); 85 | if (ret < 0) { 86 | mldebug("failed uv_udp_recv_stop(): %s", uv_strerror(ret)); 87 | } 88 | 89 | uv_close((uv_handle_t*)qry->handle, _on_query_udp_closed); 90 | } 91 | 92 | int _output_dnssim_create_query_udp(output_dnssim_t* self, _output_dnssim_request_t* req) 93 | { 94 | int ret; 95 | _output_dnssim_query_udp_t* qry; 96 | core_object_payload_t* payload; 97 | mlassert_self(); 98 | lassert(req, "req is nil"); 99 | payload = (core_object_payload_t*)req->dns_q->obj_prev; 100 | 101 | lfatal_oom(qry = calloc(1, sizeof(_output_dnssim_query_udp_t))); 102 | lfatal_oom(qry->handle = malloc(sizeof(uv_udp_t))); 103 | 104 | qry->qry.transport = OUTPUT_DNSSIM_TRANSPORT_UDP; 105 | qry->qry.req = req; 106 | qry->buf = uv_buf_init((char*)payload->payload, payload->len); 107 | qry->handle->data = (void*)qry; 108 | ret = uv_udp_init(&_self->loop, qry->handle); 109 | if (ret < 0) { 110 | lwarning("failed to init uv_udp_t"); 111 | goto failure; 112 | } 113 | _ll_append(req->qry, &qry->qry); 114 | 115 | ret = _output_dnssim_bind_before_connect(self, (uv_handle_t*)qry->handle); 116 | if (ret < 0) 117 | return ret; 118 | 119 | ret = uv_udp_try_send(qry->handle, &qry->buf, 1, (struct sockaddr*)&_self->target); 120 | if (ret < 0) { 121 | lwarning("failed to send udp packet: %s", uv_strerror(ret)); 122 | return ret; 123 | } 124 | 125 | // listen for reply 126 | ret = uv_udp_recv_start(qry->handle, _output_dnssim_on_uv_alloc, _on_udp_query_recv); 127 | if (ret < 0) { 128 | lwarning("failed uv_udp_recv_start(): %s", uv_strerror(ret)); 129 | return ret; 130 | } 131 | 132 | return 0; 133 | failure: 134 | free(qry->handle); 135 | free(qry); 136 | return ret; 137 | } 138 | -------------------------------------------------------------------------------- /replay/luaconfig.lua.j2: -------------------------------------------------------------------------------- 1 | local config = {} 2 | 3 | config.pcap = "{{ pcap }}" 4 | config.verbosity = {{ verbosity|int }} 5 | config.drift_s = {{ drift_s|default(1)|int }} 6 | config.stop_after_s = {{ stop_after_s|default('nil') }} 7 | 8 | config.threads = {} 9 | {% for thread in threads %} 10 | config.threads[{{ loop.index }}] = { 11 | name = "{{ thread.name }}", 12 | target_ip = "{{ thread.target_ip }}", 13 | target_port = {{ thread.target_port|int }}, 14 | weight = {{ thread.weight }}, 15 | timeout_s = {{ thread.timeout_s|default(2)|int }}, 16 | handshake_timeout_s = {{ thread.handshake_timeout_s|default(5)|int }}, 17 | idle_timeout_s = {{ thread.idle_timeout_s|default(10)|int }}, 18 | protocol_func = "{{ thread.protocol_func|default('udp') }}", 19 | gnutls_priority = "{{ thread.gnutls_priority|default('dnssim-default') }}", 20 | zero_rtt = {{ thread.zero_rtt|default(1)|int }}, 21 | http_method = "{{ thread.http_method|default('GET') }}", 22 | output_file = "{{ thread.output_file }}", 23 | channel_size = {{ thread.channel_size|default(2048)|int }}, 24 | max_clients = {{ thread.max_clients|default(200000)|int }}, 25 | batch_size = {{ thread.batch_size|default(32)|int }}, 26 | bind_ips = { 27 | {%- for ip in thread.bind_ips %} 28 | "{{ ip|string }}", 29 | {%- endfor %} 30 | } 31 | } 32 | {% endfor %} 33 | 34 | return config 35 | 36 | -- vim: syntax=lua 37 | -------------------------------------------------------------------------------- /replay/shotgun.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env dnsjit 2 | 3 | local object = require("dnsjit.core.objects") 4 | local log = require("dnsjit.core.log") 5 | local dnssim = require("shotgun.output.dnssim") 6 | 7 | local DNSSIM_REQ_VERSION = 20240219 8 | local has_check_version, version = pcall(dnssim.check_version, DNSSIM_REQ_VERSION) 9 | if not has_check_version or version == nil then 10 | log.fatal(string.format( 11 | "Newer dnssim is required. Minimum version of dnssim component is v%d.", 12 | DNSSIM_REQ_VERSION)) 13 | end 14 | 15 | local getopt = require("dnsjit.lib.getopt").new({}) 16 | 17 | local confpath = unpack(getopt:parse()) 18 | if confpath == nil then 19 | log.fatal("lua config file must be specified as first argument") 20 | end 21 | local ok, config = pcall(dofile, confpath) 22 | if not ok then 23 | log.fatal("failed to load lua config file \""..config.."\"") 24 | end 25 | 26 | if config.verbosity > 0 then 27 | log.enable("warning") 28 | end 29 | if config.verbosity > 1 then 30 | log.enable("notice") 31 | end 32 | if config.verbosity > 2 then 33 | log.display_file_line(true) 34 | end 35 | if config.verbosity > 3 then 36 | log.enable("info") 37 | end 38 | if config.verbosity > 4 then 39 | log.enable("debug") 40 | end 41 | 42 | local function send_thread_main(thr) 43 | local channel = thr:pop() 44 | local running 45 | 46 | local max_clients = thr:pop() 47 | local name = thr:pop() 48 | local target_ip = thr:pop() 49 | local target_port = thr:pop() 50 | local timeout_s = thr:pop() 51 | local handshake_timeout_s = thr:pop() 52 | local idle_timeout_s = thr:pop() 53 | local protocol_func = thr:pop() 54 | local gnutls_priority = thr:pop() 55 | local zero_rtt = thr:pop() 56 | local http_method = thr:pop() 57 | local output_file = thr:pop() 58 | local batch_size = thr:pop() 59 | local nbind = thr:pop() 60 | 61 | local output = require("shotgun.output.dnssim").new(max_clients) 62 | -- luacheck: ignore log 63 | local log = output:log(name) 64 | 65 | output:target(target_ip, target_port) 66 | output:timeout(timeout_s) 67 | output:handshake_timeout(handshake_timeout_s) 68 | output:idle_timeout(idle_timeout_s) 69 | output:zero_rtt(zero_rtt) 70 | 71 | if protocol_func == "udp" then 72 | output:udp() 73 | elseif protocol_func == "tcp" then 74 | output:tcp() 75 | elseif protocol_func == "tls" then 76 | output:tls(gnutls_priority) 77 | elseif protocol_func == "https2" then 78 | output:https2({ method = http_method }, gnutls_priority) 79 | elseif protocol_func == "quic" then 80 | output:quic(gnutls_priority) 81 | else 82 | log:fatal("unknown protocol_func: " .. protocol_func) 83 | end 84 | 85 | output:stats_collect(1) 86 | output:free_after_use(true) 87 | 88 | for _ = 1, nbind do 89 | output:bind(thr:pop()) 90 | end 91 | 92 | local recv, rctx = output:receive() 93 | local i_full = 0 94 | while true do 95 | local obj 96 | local i = 0 97 | 98 | if channel:full() then 99 | i_full = i_full + 1 100 | if i_full == 1 then 101 | log:debug("buffer capacity reached") 102 | elseif i_full == 4 then 103 | log:info("buffer capacity reached") 104 | elseif i_full == 16 then 105 | log:warning("buffer capacity exceeded, threads may become blocked") 106 | elseif i_full % 64 == 0 then 107 | log:critical("buffer capacity exceeded, threads are blocked") 108 | end 109 | else 110 | if i_full >= 16 then 111 | log:notice("buffer capacity restored") 112 | end 113 | i_full = 0 114 | end 115 | 116 | -- read available data from channel 117 | while i < batch_size do 118 | obj = channel:try_get() 119 | if obj == nil then break end 120 | recv(rctx, obj) 121 | i = i + 1 122 | end 123 | 124 | -- execute libuv loop 125 | running = output:run_nowait() 126 | 127 | -- check if channel is still open 128 | if obj == nil and channel.closed == 1 then 129 | output:stats_finish() 130 | break 131 | end 132 | end 133 | 134 | -- finish processing outstanding requests 135 | while running ~= 0 do 136 | running = output:run_nowait() 137 | end 138 | 139 | output:export(output_file) 140 | end 141 | 142 | 143 | ---- setup input 144 | local delay = require("dnsjit.filter.timing").new() 145 | local layer = require("dnsjit.filter.layer").new() 146 | local ipsplit = require("dnsjit.filter.ipsplit").new() 147 | local copy = require("dnsjit.filter.copy").new() 148 | local input 149 | 150 | if config.pcap == '-' then 151 | input = require("dnsjit.input.fpcap").new() 152 | if input:openfp(io.stdin) ~= 0 then 153 | log.fatal("failed to open PCAP on stdin") 154 | end 155 | else 156 | input = require("dnsjit.input.mmpcap").new() 157 | if input:open(config.pcap) ~= 0 then 158 | log.notice("failed to open PCAP with mmap, fallback to fpcap") 159 | input = require("dnsjit.input.fpcap").new() 160 | if input:open(config.pcap) ~= 0 then 161 | log.fatal("failed to open PCAP with fpcap") 162 | end 163 | end 164 | end 165 | delay:realtime(config.drift_s) 166 | delay:producer(input) 167 | layer:producer(delay) 168 | ipsplit:overwrite_dst() 169 | ipsplit:random() 170 | 171 | -- setup threads 172 | local thread = require("dnsjit.core.thread") 173 | local channel = require("dnsjit.core.channel") 174 | local threads = {} 175 | local channels = {} 176 | 177 | ---- initialize send threads 178 | for i, thrconf in ipairs(config.threads) do 179 | channels[i] = channel.new(thrconf.channel_size) 180 | ipsplit:receiver(channels[i], thrconf.weight) 181 | 182 | threads[i] = thread.new() 183 | threads[i]:start(send_thread_main) 184 | threads[i]:push(channels[i]) 185 | threads[i]:push(thrconf.max_clients) 186 | threads[i]:push(thrconf.name) 187 | threads[i]:push(thrconf.target_ip) 188 | threads[i]:push(thrconf.target_port) 189 | threads[i]:push(thrconf.timeout_s) 190 | threads[i]:push(thrconf.handshake_timeout_s) 191 | threads[i]:push(thrconf.idle_timeout_s) 192 | threads[i]:push(thrconf.protocol_func) 193 | threads[i]:push(thrconf.gnutls_priority) 194 | threads[i]:push(thrconf.zero_rtt) 195 | threads[i]:push(thrconf.http_method) 196 | threads[i]:push(thrconf.output_file) 197 | threads[i]:push(thrconf.batch_size) 198 | threads[i]:push(#thrconf.bind_ips) 199 | for _, bind_ip in ipairs(thrconf.bind_ips) do 200 | threads[i]:push(bind_ip) 201 | end 202 | end 203 | 204 | copy:obj_type(object.PAYLOAD) 205 | copy:obj_type(object.IP6) 206 | copy:receiver(ipsplit) 207 | 208 | 209 | -- process PCAP 210 | local prod, pctx = layer:produce() 211 | local recv, rctx = copy:receive() 212 | while true do 213 | local obj = prod(pctx) 214 | if obj == nil then break end 215 | if config.stop_after_s then 216 | local obj_pcap_in = obj:cast_to(object.PCAP) 217 | if obj_pcap_in.ts.sec >= config.stop_after_s then 218 | break 219 | end 220 | end 221 | recv(rctx, obj) 222 | end 223 | log.notice('processed %.0f packets from input PCAP', input:packets()) 224 | 225 | -- teardown 226 | for i, _ in ipairs(config.threads) do 227 | channels[i]:close() 228 | end 229 | for i, _ in ipairs(config.threads) do 230 | threads[i]:stop() 231 | end 232 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2 2 | toml 3 | matplotlib 4 | -------------------------------------------------------------------------------- /tools/merge-data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import json 5 | import logging 6 | import os 7 | import sys 8 | import traceback 9 | 10 | 11 | JSON_VERSION = 20200527 12 | DEFAULT_FILENAME = "shotgun-all.json" 13 | 14 | 15 | class VersionError(RuntimeError): 16 | def __init__(self): 17 | super().__init__( 18 | "Older formats of JSON data aren't supported. " 19 | "Use older tooling or re-run the tests with newer shotgun." 20 | ) 21 | 22 | 23 | class MismatchData(RuntimeError): 24 | pass 25 | 26 | 27 | class MissingData(RuntimeError): 28 | def __init__(self, field): 29 | super().__init__(f'Field "{field}" is missing in one or more files.') 30 | 31 | 32 | class MergeFailed(RuntimeError): 33 | def __init__(self, field): 34 | super().__init__(f'Failed to merge field "{field}".') 35 | 36 | 37 | def first(iterable): 38 | assert len(iterable) >= 1 39 | return iterable[0] 40 | 41 | 42 | def same(iterable): 43 | assert len(iterable) >= 1 44 | if not all(val == iterable[0] for val in iterable): 45 | raise MismatchData 46 | return iterable[0] 47 | 48 | 49 | def merge_latency(iterable): 50 | assert len(iterable) >= 1 51 | latency = list(iterable[0]) 52 | for latency_data in iterable[1:]: 53 | if len(latency_data) != len(latency): 54 | raise MismatchData 55 | for i, _ in enumerate(latency_data): 56 | latency[i] += latency_data[i] 57 | return latency 58 | 59 | 60 | DATA_STRUCTURE_STATS = { 61 | "since_ms": min, 62 | "until_ms": max, 63 | "requests": sum, 64 | "ongoing": sum, 65 | "answers": sum, 66 | "conn_active": sum, 67 | "conn_resumed": sum, 68 | "conn_handshakes": sum, 69 | "conn_quic_0rtt_loaded": sum, 70 | "quic_0rtt_sent": sum, 71 | "quic_0rtt_answered": sum, 72 | "conn_handshakes_failed": sum, 73 | "rcode_noerror": sum, 74 | "rcode_formerr": sum, 75 | "rcode_servfail": sum, 76 | "rcode_nxdomain": sum, 77 | "rcode_notimp": sum, 78 | "rcode_refused": sum, 79 | "rcode_yxdomain": sum, 80 | "rcode_yxrrset": sum, 81 | "rcode_nxrrset": sum, 82 | "rcode_notauth": sum, 83 | "rcode_notzone": sum, 84 | "rcode_badvers": sum, 85 | "rcode_badkey": sum, 86 | "rcode_badtime": sum, 87 | "rcode_badmode": sum, 88 | "rcode_badname": sum, 89 | "rcode_badalg": sum, 90 | "rcode_badtrunc": sum, 91 | "rcode_badcookie": sum, 92 | "rcode_other": sum, 93 | "latency": merge_latency, 94 | } 95 | 96 | 97 | def merge_stats(iterable): 98 | return merge_fields(DATA_STRUCTURE_STATS, iterable) 99 | 100 | 101 | def merge_periodic_stats(iterable): 102 | out = [] 103 | 104 | for i in range(max(len(stats_periodic) for stats_periodic in iterable)): 105 | to_merge = [] 106 | for stats_periodic in iterable: 107 | try: 108 | stats = stats_periodic[i] 109 | except IndexError: 110 | continue 111 | else: 112 | to_merge.append(stats) 113 | out.append(merge_stats(to_merge)) 114 | 115 | return out 116 | 117 | 118 | DATA_STRUCTURE_ROOT = { 119 | "version": same, 120 | "merged": lambda x: True, 121 | "stats_interval_ms": same, 122 | "timeout_ms": same, 123 | "discarded": sum, 124 | "stats_sum": merge_stats, 125 | "stats_periodic": merge_periodic_stats, 126 | } 127 | 128 | 129 | def merge_fields(fields, thread_data): 130 | out = {} 131 | for field, merge_func in fields.items(): 132 | try: 133 | field_data = [data[field] for data in thread_data] 134 | except KeyError as exc: 135 | raise MissingData(field) from exc 136 | try: 137 | out[field] = merge_func(field_data) 138 | except Exception as exc: 139 | raise MergeFailed(field) from exc 140 | return out 141 | 142 | 143 | def merge_data(thread_data): 144 | assert len(thread_data) >= 1 145 | try: 146 | if thread_data[0]["version"] != JSON_VERSION: 147 | raise VersionError 148 | except KeyError as exc: 149 | raise VersionError from exc 150 | return merge_fields(DATA_STRUCTURE_ROOT, thread_data) 151 | 152 | 153 | def main(): 154 | logging.basicConfig( 155 | format="%(asctime)s %(levelname)8s %(message)s", level=logging.DEBUG 156 | ) 157 | 158 | parser = argparse.ArgumentParser(description="Merge JSON shotgun results") 159 | 160 | parser.add_argument("json_file", nargs="+", help="Paths to per-thread JSON results") 161 | parser.add_argument( 162 | "-o", "--output", default=DEFAULT_FILENAME, help="Output JSON file" 163 | ) 164 | args = parser.parse_args() 165 | 166 | outpath = args.output 167 | if outpath == DEFAULT_FILENAME: 168 | outpath = os.path.join(os.path.dirname(args.json_file[0]), outpath) 169 | 170 | try: 171 | thread_data = [] 172 | for path in args.json_file: 173 | with open(path, encoding="utf-8") as f: 174 | thread_data.append(json.load(f)) 175 | 176 | merged = merge_data(thread_data) 177 | 178 | with open(outpath, "w", encoding="utf-8") as f: 179 | json.dump(merged, f) 180 | logging.info("DONE: merged shotgun results saved as %s", outpath) 181 | except (FileNotFoundError, VersionError) as exc: 182 | logging.critical("%s", exc) 183 | sys.exit(1) 184 | except (MergeFailed, MissingData) as exc: 185 | logging.debug(traceback.format_exc()) 186 | logging.critical("%s", exc) 187 | sys.exit(1) 188 | except Exception as exc: 189 | logging.critical("uncaught exception: %s", exc) 190 | logging.debug(traceback.format_exc()) 191 | sys.exit(1) 192 | 193 | 194 | if __name__ == "__main__": 195 | main() 196 | -------------------------------------------------------------------------------- /tools/mplhlpr/README: -------------------------------------------------------------------------------- 1 | *.mplstyle files in this directory augment system-wide Matplotlib config 2 | on your system located in site-packages/matplotlib/mpl-data/matplotlibrc 3 | (relative to your Python installation location). 4 | 5 | Matplotlib styles used by DNS Shotgun plotters are defined as comma delimited 6 | list of styles in SHOTGUN_MPLSTYLES environment variable. It defaults to 7 | "shotgun" and thus by default loads file "shotgun.mplstyle" from this directory. 8 | 9 | Items listed later in the list can redefine values from preceding files, i.e. 10 | SHOTGUN_MPLSTYLES="shotgun,presentation,fast,grayscale" 11 | starts with "shotgun" style as base and the "presentation" style redefines 12 | individual settings. 13 | 14 | Matplotlib provides its own styles, e.g. "fast", "tableau-colorblind10" etc. 15 | See site-packages/matplotlib/mpl-data/stylelib. 16 | -------------------------------------------------------------------------------- /tools/mplhlpr/presentation.mplstyle: -------------------------------------------------------------------------------- 1 | # Inteded as augment to "shotgun" style. 2 | # Size and font is meant to be suitable for widescreen presentations 3 | # with slides 33.87 cm (width) x 19.05 cm (height) 4 | # 5 | # Usage: 6 | # export SHOTGUN_MPLSTYLES="shotgun,presentation" 7 | # 8 | # If resulting files are too big or rendering too slow, try adding "fast". 9 | # export SHOTGUN_MPLSTYLES="shotgun,presentation,fast" 10 | 11 | ###### FONT 12 | font.family: sans-serif 13 | #font.style: normal 14 | #font.variant: normal 15 | #font.weight: normal 16 | #font.stretch: normal 17 | font.size: 14 18 | 19 | font.sans-serif: Mada, DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif 20 | 21 | ###### AXES 22 | # hide chart title - slide has it's own 23 | axes.titlesize: 0 24 | 25 | ###### FIGURE 26 | #figure.labelsize: large # size of the figure label (``Figure.sup[x|y]label()``) 27 | figure.figsize: 10.161, 4.755 # figure size in inches 28 | #figure.dpi: 100 # figure dots per inch 29 | #figure.facecolor: white # figure face color 30 | #figure.edgecolor: white # figure edge color 31 | #figure.frameon: True # enable figure frame 32 | 33 | 34 | ###### Tweak to suit target presentation software 35 | 36 | ## SAVING FIGURES 37 | #savefig.format: svg # {png, ps, pdf, svg} 38 | #savefig.transparent: False # whether figures are saved with a transparent 39 | # background by default 40 | 41 | 42 | ### ps backend params 43 | #ps.papersize: letter # {auto, letter, legal, ledger, A0-A10, B0-B10} 44 | #ps.useafm: False # use of AFM fonts, results in small files 45 | #ps.usedistiller: False # {ghostscript, xpdf, None} 46 | # Experimental: may produce smaller files. 47 | # xpdf intended for production of publication quality files, 48 | # but requires ghostscript, xpdf and ps2eps 49 | #ps.distiller.res: 6000 # dpi 50 | #ps.fonttype: 3 # Output Type 3 (Type3) or Type 42 (TrueType) 51 | 52 | ### PDF backend params 53 | #pdf.compression: 6 # integer from 0 to 9 54 | # 0 disables compression (good for debugging) 55 | #pdf.fonttype: 3 # Output Type 3 (Type3) or Type 42 (TrueType) 56 | #pdf.use14corefonts: False 57 | #pdf.inheritcolor: False 58 | 59 | ### SVG backend params 60 | #svg.image_inline: True # Write raster image data directly into the SVG file 61 | #svg.fonttype: path # How to handle SVG fonts: 62 | # path: Embed characters as paths -- supported 63 | # by most SVG renderers 64 | # None: Assume fonts are installed on the 65 | # machine where the SVG will be viewed. 66 | -------------------------------------------------------------------------------- /tools/mplhlpr/shotgun.mplstyle: -------------------------------------------------------------------------------- 1 | #### MATPLOTLIBRC FORMAT 2 | grid.color: "#dddddd" # grid color 3 | grid.linestyle: "dotted" # default: solid 4 | 5 | figure.figsize: 16, 9 # inches 6 | 7 | # automatically make plot elements fit on the figure 8 | figure.constrained_layout.use: True 9 | # autolayout is not compatible with constrained_layout 10 | figure.autolayout: False 11 | 12 | lines.linestyle: "dotted" 13 | lines.marker: "x" 14 | 15 | # deterministic SVG output for easier diff-ing 16 | svg.hashsalt: 0 # If not None, use this string as hash salt instead of uuid4 17 | -------------------------------------------------------------------------------- /tools/mplhlpr/styles.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Load Matplotlib styles specified by SHOTGUN_MPLSTYLES env var. 4 | 5 | Multiple styles are separated by comma: 6 | SHOTGUN_MPLSTYLES=shotgun,tableau-colorblind10,fast 7 | 8 | Default is "shotgun", which distributed with Shotgun itself. 9 | 10 | Load attempts are done in this order: 11 | 1. Style name taken verbatim as file path 12 | 2. Relative base name pointing directory with this module 13 | 3. Matplotlib supplied style name (see matplotlib.style module) 14 | """ 15 | from pathlib import Path 16 | import os 17 | 18 | import matplotlib.style 19 | 20 | SCRIPT_DIR = Path(__file__).parent 21 | 22 | 23 | # style name "default" would be nicer, but it is reserved by matplotlib 24 | def configure_mpl_styles(comma_list=os.environ.get("SHOTGUN_MPLSTYLES", "shotgun")): 25 | styles = comma_list.split(",") 26 | for style in styles: 27 | candidates = [Path(style), SCRIPT_DIR / f"{style}.mplstyle", style] 28 | for candidate in candidates: 29 | if isinstance(candidate, Path) and not candidate.exists(): 30 | continue 31 | # raises if the style cannot be found 32 | matplotlib.style.use(candidate) 33 | break 34 | 35 | 36 | def ax_set_title(ax, title): 37 | """show title only if style defines titlesize > 0""" 38 | titlesize = matplotlib.rcParams["axes.titlesize"] 39 | if not isinstance(titlesize, (int, float)) or titlesize > 0: 40 | ax.set_title(title) 41 | -------------------------------------------------------------------------------- /tools/plot-client-distribution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # pylint: disable=wrong-import-order,wrong-import-position 4 | import argparse 5 | import csv 6 | import logging 7 | import os 8 | import statistics 9 | import sys 10 | import traceback 11 | from typing import Dict, List, Union 12 | 13 | # Force matplotlib to use a different backend to handle machines without a display 14 | from cycler import cycler 15 | import matplotlib 16 | 17 | matplotlib.use("Agg") 18 | import matplotlib.colors 19 | from matplotlib.lines import Line2D 20 | import matplotlib.pyplot as plt 21 | 22 | import mplhlpr.styles 23 | 24 | SCALE_MAGIC = 10000 25 | 26 | 27 | def init_plot(title): 28 | _, ax = plt.subplots() 29 | 30 | ax.set_xscale("log") 31 | ax.set_yscale("log") 32 | 33 | ax.grid(True, which="major") 34 | ax.grid(True, which="minor") 35 | ax.set_ylim(0.00009, 110) 36 | 37 | ax.set_xlabel("Number of queries per client") 38 | ax.set_ylabel("Percentage of clients") 39 | mplhlpr.styles.ax_set_title(ax, title) 40 | 41 | colors = [ 42 | matplotlib.colors.to_rgba(c) 43 | for c in plt.rcParams["axes.prop_cycle"].by_key()["color"] 44 | ] 45 | default_cycler = cycler(hatch=[None, "++", "xx", "oo"]) * cycler(facecolor=colors) 46 | 47 | return ax, default_cycler 48 | 49 | 50 | def count_client_queries( 51 | filename: str, 52 | ) -> Dict[str, int]: 53 | with open(filename, newline="", encoding="utf-8") as csvfile: 54 | reader = csv.DictReader( 55 | csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_NONNUMERIC 56 | ) 57 | return {row["ip"]: int(row["packets"]) for row in reader} 58 | 59 | 60 | def plot_client_query_scatter(ax, clients: Dict[str, int], plot_props): 61 | data = clients.values() 62 | 63 | x = [] 64 | y = [] 65 | s = [] # type: List[Union[float,int]] 66 | sanity_nsamples = 0 67 | step_multiplier = 10 68 | lmin = 0 69 | lmax = step_multiplier 70 | while lmin <= max(data): 71 | samples = list(n for n in data if lmin <= n < lmax) 72 | if len(samples) == 0: # an empty interval 73 | logging.info(" [%d-%d) queries per client: 0 clients", lmin, lmax) 74 | else: 75 | sanity_nsamples += len(samples) 76 | x.append(statistics.mean(samples)) 77 | y.append(len(samples) / len(data) * 100) 78 | s.append(sum(samples)) 79 | logging.info( 80 | " [%d-%d) queries per client: %d (%.2f %%) clients; %d queries total", 81 | lmin, 82 | lmax, 83 | len(samples), 84 | y[-1], 85 | int(s[-1]), 86 | ) 87 | lmin = lmax 88 | lmax *= step_multiplier 89 | 90 | assert sanity_nsamples == len(data) 91 | logging.info(" total: %d clients; %d queries", len(data), int(sum(s))) 92 | 93 | # normalize size 94 | s_tot = sum(s) 95 | s = [size * (SCALE_MAGIC / s_tot) for size in s] 96 | 97 | ax.scatter(x, y, s, alpha=0.5, **plot_props) 98 | ax.scatter(x, y, linewidth=1, marker="x", alpha=0.5, **plot_props) 99 | 100 | 101 | def main(): 102 | logging.basicConfig( 103 | format="%(asctime)s %(levelname)8s %(message)s", level=logging.DEBUG 104 | ) 105 | logger = logging.getLogger("matplotlib") 106 | # set WARNING for Matplotlib 107 | logger.setLevel(logging.WARNING) 108 | 109 | mplhlpr.styles.configure_mpl_styles() 110 | 111 | parser = argparse.ArgumentParser( 112 | description="Analyze query distribution among clients in input pcap" 113 | ) 114 | parser.add_argument( 115 | "csv", 116 | nargs="+", 117 | help="CSV(s) to visualize (output from count-packets-per-ip.lua)", 118 | ) 119 | parser.add_argument( 120 | "-o", 121 | "--output", 122 | type=str, 123 | default="clients.svg", 124 | help="output filename (default: clients.svg)", 125 | ) 126 | args = parser.parse_args() 127 | 128 | ax, plot_cycler = init_plot("Query distribution among clients") 129 | handles = [] 130 | lines = [] 131 | labels = [] 132 | 133 | if len(plot_cycler) < len(args.csv): 134 | logging.critical( 135 | "more than %d input files at once is not supported, got %d", 136 | len(plot_cycler), 137 | len(args.csv), 138 | ) 139 | sys.exit(3) 140 | for plot_props, csv_inf in zip(plot_cycler, args.csv): 141 | label = os.path.basename(csv_inf) 142 | logging.info("Processing: %s", label) 143 | try: 144 | clients_qps = count_client_queries(csv_inf) 145 | except FileNotFoundError as exc: 146 | logging.critical("%s", exc) 147 | sys.exit(1) 148 | except Exception as exc: 149 | logging.critical("uncaught exception: %s", exc) 150 | logging.debug(traceback.format_exc()) 151 | sys.exit(1) 152 | else: 153 | labels.append(label) 154 | lines.append(matplotlib.patches.Patch(**plot_props)) 155 | handles.append(plot_client_query_scatter(ax, clients_qps, plot_props)) 156 | 157 | ax.legend(lines, labels, loc="lower left") 158 | plt.savefig(args.output) 159 | sys.exit(0) 160 | 161 | 162 | if __name__ == "__main__": 163 | main() 164 | -------------------------------------------------------------------------------- /tools/plot-connections.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | from itertools import cycle 5 | import json 6 | import logging 7 | import math 8 | import os 9 | import sys 10 | 11 | # pylint: disable=wrong-import-order,wrong-import-position 12 | import matplotlib 13 | 14 | matplotlib.use("Agg") 15 | import matplotlib.pyplot as plt 16 | 17 | import mplhlpr.styles 18 | 19 | JSON_VERSION = 20200527 20 | 21 | COLOR_ACTIVE = cycle(["royalblue", "cornflowerblue", "darkblue", "lightsteelblue"]) 22 | COLOR_CONN_HS = cycle(["forestgreen", "limegreen", "darkgreen", "lightgreen"]) 23 | COLOR_QUIC_0RTT = cycle( 24 | ["darkolivegreen", "darkseagreen", "darkslategray", "greenyellow"] 25 | ) 26 | COLOR_QUIC_0RTT_SENT = cycle(["crimson", "brown", "firebrick", "indianred"]) 27 | COLOR_QUIC_0RTT_ANSWERED = cycle(["khaki", "moccasin", "peru", "wheat"]) 28 | COLOR_TLS_RESUMED = cycle(["orange", "moccasin", "darkorange", "antiquewhite"]) 29 | COLOR_FAILED_HS = cycle(["gray", "silver", "black", "gainsboro"]) 30 | 31 | 32 | sinames = ["", " k", " M", " G", " T"] 33 | 34 | 35 | def siname(n): 36 | try: 37 | n = float(n) 38 | except ValueError: 39 | return n 40 | 41 | siidx = max( 42 | 0, 43 | min(len(sinames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))), 44 | ) 45 | return f"{(n / 10 ** (3 * siidx)):.0f}{sinames[siidx]}" 46 | 47 | 48 | def init_plot(title): 49 | _, ax = plt.subplots() 50 | 51 | ax.set_xlabel("Time [s]") 52 | ax.set_ylabel("Number of connections") 53 | mplhlpr.styles.ax_set_title(ax, title) 54 | 55 | ax.grid(True, axis="x", which="major") 56 | ax.grid(True, axis="y", which="major") 57 | ax.grid(True, axis="y", which="minor") 58 | 59 | return ax 60 | 61 | 62 | def plot(ax, data, label, eval_func, min_timespan=0, color=None): 63 | stats_periodic = data["stats_periodic"][ 64 | :-1 65 | ] # omit the last often misleading datapoint 66 | time_offset = stats_periodic[0]["since_ms"] 67 | 68 | xvalues = [] 69 | yvalues = [] 70 | for stats in stats_periodic: 71 | timespan = stats["until_ms"] - stats["since_ms"] 72 | if timespan < min_timespan: 73 | continue 74 | time = (stats["until_ms"] - time_offset) / 1000 75 | xvalues.append(time) 76 | yvalues.append(eval_func(stats)) 77 | 78 | ax.plot(xvalues, yvalues, label=label, color=color) 79 | 80 | 81 | def main(): 82 | logging.basicConfig( 83 | format="%(asctime)s %(levelname)8s %(message)s", level=logging.DEBUG 84 | ) 85 | logger = logging.getLogger("matplotlib") 86 | # set WARNING for Matplotlib 87 | logger.setLevel(logging.WARNING) 88 | 89 | mplhlpr.styles.configure_mpl_styles() 90 | 91 | parser = argparse.ArgumentParser( 92 | description="Plot connections over time from shotgun experiment" 93 | ) 94 | 95 | parser.add_argument("json_file", nargs="+", help="Shotgun results JSON file(s)") 96 | parser.add_argument( 97 | "-t", "--title", default="Connections over Time", help="Graph title" 98 | ) 99 | parser.add_argument( 100 | "-o", "--output", default="connections.svg", help="Output graph filename" 101 | ) 102 | parser.add_argument( 103 | "-k", 104 | "--kind", 105 | nargs="+", 106 | choices=[ 107 | "active", 108 | "conn_hs", 109 | "tcp_hs", # same as conn_hs - backwards compatibility 110 | "quic_0rtt", 111 | "quic_0rtt_sent", 112 | "quic_0rtt_answered", 113 | "tls_resumed", 114 | "failed_hs", 115 | ], 116 | default=["active", "conn_hs", "tls_resumed", "failed_hs"], 117 | help="Which data should be rendered", 118 | ) 119 | args = parser.parse_args() 120 | 121 | # initialize graph 122 | ax = init_plot(args.title) 123 | 124 | for json_path in args.json_file: 125 | try: 126 | with open(json_path, encoding="utf-8") as f: 127 | data = json.load(f) 128 | except FileNotFoundError as exc: 129 | logging.critical("%s", exc) 130 | sys.exit(1) 131 | 132 | try: 133 | assert data["version"] == JSON_VERSION 134 | except (KeyError, AssertionError): 135 | logging.critical( 136 | "Older formats of JSON data aren't supported. " 137 | "Use older tooling or re-run the tests with newer shotgun." 138 | ) 139 | sys.exit(1) 140 | 141 | if data["discarded"] != 0: 142 | logging.warning("%d discarded packets may skew results!", data["discarded"]) 143 | 144 | name = os.path.splitext(os.path.basename(os.path.normpath(json_path)))[0] 145 | 146 | if "active" in args.kind: 147 | plot( 148 | ax, 149 | data, 150 | label=f"Active ({name})", 151 | color=next(COLOR_ACTIVE), 152 | eval_func=lambda stats: stats["conn_active"], 153 | ) 154 | if "conn_hs" in args.kind or "tcp_hs" in args.kind: 155 | plot( 156 | ax, 157 | data, 158 | label=f"Handshakes ({name})", 159 | color=next(COLOR_CONN_HS), 160 | eval_func=lambda stats: stats["conn_handshakes"], 161 | ) 162 | if "quic_0rtt" in args.kind: 163 | plot( 164 | ax, 165 | data, 166 | label=f"QUIC 0RTT ({name})", 167 | color=next(COLOR_QUIC_0RTT), 168 | eval_func=lambda stats: stats["conn_quic_0rtt_loaded"], 169 | ) 170 | if "quic_0rtt_sent" in args.kind: 171 | plot( 172 | ax, 173 | data, 174 | label=f"QUIC 0RTT sent ({name})", 175 | color=next(COLOR_QUIC_0RTT_SENT), 176 | eval_func=lambda stats: stats["quic_0rtt_sent"], 177 | ) 178 | if "quic_0rtt_answered" in args.kind: 179 | plot( 180 | ax, 181 | data, 182 | label=f"QUIC 0RTT answered ({name})", 183 | color=next(COLOR_QUIC_0RTT_ANSWERED), 184 | eval_func=lambda stats: stats["quic_0rtt_answered"], 185 | ) 186 | if "tls_resumed" in args.kind: 187 | plot( 188 | ax, 189 | data, 190 | label=f"TLS Resumed ({name})", 191 | color=next(COLOR_TLS_RESUMED), 192 | eval_func=lambda stats: stats["conn_resumed"], 193 | ) 194 | if "failed_hs" in args.kind: 195 | plot( 196 | ax, 197 | data, 198 | label=f"Failed Handshakes ({name})", 199 | color=next(COLOR_FAILED_HS), 200 | eval_func=lambda stats: stats["conn_handshakes_failed"], 201 | ) 202 | 203 | # set axis boundaries 204 | ax.set_xlim(xmin=0) 205 | ax.set_ylim(ymin=0) 206 | 207 | plt.legend() 208 | plt.savefig(args.output) 209 | 210 | 211 | if __name__ == "__main__": 212 | main() 213 | -------------------------------------------------------------------------------- /tools/plot-latency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # NOTE: Due to a weird bug, numpy is detected as a 3rd party module, while lmdb 4 | # is not and pylint complains about wrong-import-order. 5 | # Since these checks have to be disabled for matplotlib imports anyway, they 6 | # were moved a bit higher up to avoid the issue. 7 | # pylint: disable=wrong-import-order,wrong-import-position 8 | import argparse 9 | import collections 10 | import itertools 11 | import logging 12 | import json 13 | import math 14 | import os 15 | import re 16 | import sys 17 | 18 | import numpy as np 19 | 20 | # Force matplotlib to use a different backend to handle machines without a display 21 | import matplotlib 22 | import matplotlib.ticker as mtick 23 | 24 | matplotlib.use("Agg") 25 | import matplotlib.pyplot as plt 26 | 27 | import mplhlpr.styles 28 | 29 | JSON_VERSION = 20200527 30 | MIN_X_EXP = -1 31 | MAX_X_EXP = 2 32 | 33 | sinames = ["", " k", " M", " G", " T"] 34 | 35 | 36 | def siname(n): 37 | try: 38 | n = float(n) 39 | except ValueError: 40 | return n 41 | 42 | siidx = max( 43 | 0, 44 | min(len(sinames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))), 45 | ) 46 | return f"{(n / 10 ** (3 * siidx)):.0f}{sinames[siidx]}" 47 | 48 | 49 | def init_plot(title): 50 | _, ax = plt.subplots() 51 | 52 | fmt = mtick.FormatStrFormatter("%g") 53 | maj_loc = mtick.LogLocator(subs=[(x / 10) for x in range(0, 10)]) 54 | 55 | ax.set_xscale("log") 56 | ax.xaxis.set_major_formatter(fmt) 57 | ax.xaxis.set_major_locator(maj_loc) 58 | ax.set_yscale("log") 59 | ax.yaxis.set_major_formatter(fmt) 60 | ax.yaxis.set_major_locator(maj_loc) 61 | 62 | ax.grid(True, which="major") 63 | ax.grid(True, which="minor") 64 | 65 | ax.margins(x=0) 66 | 67 | ax.set_xlabel("Slowest percentile") 68 | ax.set_ylabel("Response time [ms]") 69 | mplhlpr.styles.ax_set_title(ax, title) 70 | 71 | return ax 72 | 73 | 74 | def get_percentile_latency(latency_data, percentile): 75 | total = sum(latency_data) 76 | ipercentile = math.ceil((100 - percentile) / 100 * total - 1) 77 | assert ipercentile <= total 78 | i = 0 79 | for latency, n in enumerate(latency_data): 80 | i += n 81 | if ipercentile <= i: 82 | return latency 83 | raise RuntimeError("percentile not found") 84 | 85 | 86 | def get_xy_from_histogram(latency_histogram): 87 | percentiles = np.logspace(MIN_X_EXP, MAX_X_EXP, num=200) 88 | y = [get_percentile_latency(latency_histogram, pctl) for pctl in percentiles] 89 | return percentiles, y 90 | 91 | 92 | def merge_latency(data, since=0, until=float("+inf")): 93 | """generate latency histogram for given period""" 94 | # add 100ms tolarence for interval beginning / end 95 | since_ms = data["stats_sum"]["since_ms"] + since * 1000 - 100 96 | until_ms = data["stats_sum"]["since_ms"] + until * 1000 + 100 97 | 98 | latency = [] 99 | requests = 0 100 | start = None 101 | end = None 102 | for stats in data["stats_periodic"]: 103 | if stats["since_ms"] < since_ms: 104 | continue 105 | if stats["until_ms"] >= until_ms: 106 | break 107 | requests += stats["requests"] 108 | end = stats["until_ms"] 109 | if not latency: 110 | latency = list(stats["latency"]) 111 | start = stats["since_ms"] 112 | else: 113 | assert len(stats["latency"]) == len(latency) 114 | for i, _ in enumerate(stats["latency"]): 115 | latency[i] += stats["latency"][i] 116 | 117 | if not latency: 118 | raise RuntimeError("no samples matching this interval") 119 | 120 | qps = requests / (end - start) * 1000 # convert from ms 121 | return latency, qps 122 | 123 | 124 | class NamedGroupAction(argparse.Action): 125 | def __call__(self, parser, namespace, values, option_string=None): 126 | if not isinstance(values, list) or len(values) <= 1: 127 | raise argparse.ArgumentError( 128 | self, 129 | "name required at first position, followed by one or more paths to JSON files", 130 | ) 131 | groups = getattr(namespace, self.dest) or {} 132 | group_name = values[0] 133 | try: 134 | groups[group_name] = [ 135 | argparse.FileType()(filename) for filename in values[1:] 136 | ] 137 | except argparse.ArgumentTypeError as ex: 138 | raise argparse.ArgumentError(self, ex) 139 | setattr(namespace, self.dest, groups) 140 | 141 | 142 | LINE_STYLES = matplotlib.cbook.ls_mapper.values() 143 | 144 | 145 | class LineStyleAction(argparse.Action): 146 | def __call__(self, parser, namespace, values, option_string=None): 147 | try: 148 | regex = re.compile(values[0]) 149 | except re.error as e: 150 | raise argparse.ArgumentError( 151 | self, f"first linestyle argument is not a regex: {e}" 152 | ) 153 | style = values[1] 154 | if style not in LINE_STYLES: 155 | raise argparse.ArgumentError( 156 | self, 157 | f"second linestyle argument must be one of: {', '.join(LINE_STYLES)}", 158 | ) 159 | linestyles = getattr(namespace, self.dest) or {} 160 | linestyles[regex] = style 161 | setattr(namespace, self.dest, linestyles) 162 | 163 | 164 | def read_json(file_obj): 165 | data = json.load(file_obj) 166 | 167 | try: 168 | assert data["version"] == JSON_VERSION 169 | except (KeyError, AssertionError): 170 | logging.critical( 171 | "Older formats of JSON data aren't supported. " 172 | "Use older tooling or re-run the tests with newer shotgun." 173 | ) 174 | sys.exit(1) 175 | 176 | return data 177 | 178 | 179 | def parse_args(): 180 | parser = argparse.ArgumentParser( 181 | description="Plot query response time histogram from shotgun results" 182 | ) 183 | parser.add_argument("-t", "--title", default="Response Latency", help="Graph title") 184 | parser.add_argument( 185 | "-o", 186 | "--output", 187 | type=str, 188 | default="latency.svg", 189 | help="output filename (default: latency.svg)", 190 | ) 191 | parser.add_argument( 192 | "--since", 193 | type=float, 194 | default=0, 195 | help="Omit data before this time (secs since test start)", 196 | ) 197 | parser.add_argument( 198 | "--until", 199 | type=float, 200 | default=float("+inf"), 201 | help="Omit data after this time (secs since test start)", 202 | ) 203 | parser.add_argument( 204 | "--linestyle", 205 | nargs=2, 206 | action=LineStyleAction, 207 | default={}, 208 | help=( 209 | "change style for series with names matching regex; " 210 | "name_regex linestyle_name (can be specified multiple times)" 211 | ), 212 | ) 213 | 214 | input_args = parser.add_argument_group( 215 | title="input data", 216 | description="Shotgun result JSON file(s) to plot as individual data sets" 217 | " or groups aggregated to min/avg/max.", 218 | ) 219 | input_args.add_argument( 220 | "-g", 221 | "--group", 222 | nargs="+", 223 | action=NamedGroupAction, 224 | default={}, 225 | help="group_name json_file [json_file ...]; can be used multiple times", 226 | ) 227 | input_args.add_argument( 228 | "json_file", 229 | nargs="*", 230 | type=argparse.FileType(), 231 | help="JSON file(s) to plot individually", 232 | ) 233 | 234 | args = parser.parse_args() 235 | if not args.json_file and not args.group: 236 | parser.error( 237 | "at least one input JSON file required (individually or in a group)" 238 | ) 239 | return args 240 | 241 | 242 | def main(): 243 | logging.basicConfig( 244 | format="%(asctime)s %(levelname)8s %(message)s", level=logging.DEBUG 245 | ) 246 | logger = logging.getLogger("matplotlib") 247 | # set WARNING for Matplotlib 248 | logger.setLevel(logging.WARNING) 249 | 250 | mplhlpr.styles.configure_mpl_styles() 251 | 252 | args = parse_args() 253 | 254 | groups = collections.defaultdict(list) 255 | ax = init_plot(args.title) 256 | 257 | for json_file in args.json_file: 258 | logging.info("processing %s", json_file.name) 259 | data = read_json(json_file) 260 | name = os.path.splitext(os.path.basename(os.path.normpath(json_file.name)))[0] 261 | groups[name].append(data) 262 | 263 | for name, group_files in args.group.items(): 264 | for json_file in group_files: 265 | logging.info("processing group %s: %s", name, json_file.name) 266 | data = read_json(json_file) 267 | groups[name].append(data) 268 | 269 | for name, group_data in groups.items(): 270 | pos_inf = float("inf") 271 | neg_inf = float("-inf") 272 | group_x = [] # we use the same X coordinates for all runs 273 | group_ymin = [] 274 | group_ymax = [] 275 | group_ysum = [] 276 | for run_data in group_data: 277 | latency, qps = merge_latency(run_data, args.since, args.until) 278 | label = f"{name} ({siname(qps)} QPS)" 279 | group_x, run_y = get_xy_from_histogram(latency) 280 | if len(group_data) == 1: # no reason to compute aggregate values 281 | group_ysum = run_y 282 | break 283 | group_ysum = [ 284 | old + new 285 | for old, new in itertools.zip_longest(group_ysum, run_y, fillvalue=0) 286 | ] 287 | group_ymin = [ 288 | min(old, new) 289 | for old, new in itertools.zip_longest( 290 | group_ymin, run_y, fillvalue=pos_inf 291 | ) 292 | ] 293 | group_ymax = [ 294 | max(old, new) 295 | for old, new in itertools.zip_longest( 296 | group_ymax, run_y, fillvalue=neg_inf 297 | ) 298 | ] 299 | if len(group_data) > 1: 300 | group_yavg = [ysum / len(group_data) for ysum in group_ysum] 301 | ax.fill_between(group_x, group_ymin, group_ymax, alpha=0.2) 302 | else: 303 | group_yavg = group_ysum 304 | linestyle = "solid" 305 | for name_re, style in args.linestyle.items(): 306 | if name_re.search(name): 307 | linestyle = style 308 | ax.plot(group_x, group_yavg, lw=2, label=label, marker="", linestyle=linestyle) 309 | 310 | plt.legend() 311 | plt.savefig(args.output) 312 | 313 | 314 | if __name__ == "__main__": 315 | main() 316 | -------------------------------------------------------------------------------- /tools/plot-packet-rate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import csv 5 | import logging 6 | import math 7 | import os 8 | import statistics 9 | import sys 10 | from typing import Dict, Tuple 11 | 12 | # pylint: disable=wrong-import-order,wrong-import-position 13 | from cycler import cycler 14 | import matplotlib 15 | import matplotlib.colors 16 | 17 | matplotlib.use("Agg") 18 | import matplotlib.pyplot as plt 19 | 20 | import mplhlpr.styles 21 | 22 | sinames = ["", " k", " M", " G", " T"] 23 | 24 | 25 | def init_plot(title): 26 | _, ax = plt.subplots() 27 | 28 | ax.set_xlabel("Time [s]") 29 | ax.set_ylabel("Packets per sampling period") 30 | mplhlpr.styles.ax_set_title(ax, title) 31 | 32 | ax.grid(True, axis="x", which="both") 33 | ax.grid(True, axis="y", which="both") 34 | plt.minorticks_on() 35 | 36 | default_cycler = cycler(marker=["x", "o", "v", "s"]) * cycler( 37 | color=list(matplotlib.colors.TABLEAU_COLORS.keys()) 38 | ) 39 | 40 | return ax, default_cycler 41 | 42 | 43 | def plot(ax, data, label, since, until, line_props): 44 | xvalues = [] 45 | yvalues = [] 46 | for time_s, rate in data.items(): 47 | xvalues.append(time_s) 48 | yvalues.append(rate) 49 | 50 | ax.plot(xvalues, yvalues, label=label, **line_props) 51 | ax.set_xlim(xmin=since) 52 | if not math.isfinite(until): 53 | until = xvalues[-1] 54 | if until > 1: 55 | until = math.ceil(until) 56 | ax.set_xlim(xmax=until) 57 | 58 | 59 | def parse_csv(csv_f, since: float, until: float) -> Tuple[float, Dict[float, float]]: 60 | """ 61 | Parse CSV and return tuple (period, xydata). 62 | Period between samples is float or NaN if it varies by more than 1 ms. 63 | XY points are in format Dict[time_s] = period_packets value. 64 | """ 65 | data = {} 66 | prev_time = None 67 | period = None 68 | prev_period = None 69 | for row in csv.DictReader(csv_f): 70 | now = float(row["time_s"]) 71 | if now < since: 72 | continue 73 | if now > until: 74 | break 75 | 76 | if prev_time is not None: 77 | if not period: 78 | period = now - prev_time 79 | elif math.isnan(period): 80 | prev_period = period 81 | elif abs(period - abs(now - prev_time)) > 0.001: 82 | logging.warning( 83 | "file %s: sampling period has changed between samples %f and %f", 84 | csv_f.name, 85 | prev_time, 86 | now, 87 | ) 88 | prev_period = period 89 | period = float("nan") # varies, undefined 90 | 91 | prev_time = now 92 | data[now] = float(row["period_packets"]) 93 | 94 | if not prev_time or not period: 95 | raise ValueError("at least two data rows are required") 96 | 97 | # ignore period change, but on the last sample only 98 | if prev_period and not math.isnan(prev_period): 99 | period = prev_period 100 | logging.info("period change has happened only on the last sample, ignoring") 101 | return period, data 102 | 103 | 104 | def xyrate_average( 105 | xyrate: Dict[float, float], orig_period: float, avg_n_samples: int 106 | ) -> Dict[float, float]: 107 | """ 108 | Transform dictionary with [X]=Y values by averaging Y values of avg_n_samples 109 | consecutive points on X (time) axis. 110 | """ 111 | orig_start_time = min(xyrate) 112 | # first sample is at the end of first period 113 | # our new average should point to the middle of all samples we are averaging over 114 | avg_start_time = (orig_start_time - orig_period) + (avg_n_samples * orig_period / 2) 115 | 116 | # flaten XY chart to a to sorted list, [0] corresponds to orig_start_time 117 | orig_rate_vals = list(xyrate[time] for time in sorted(xyrate)) 118 | avg_xy = {} 119 | avg_idx = 0 120 | avg_last_idx = int( 121 | len(orig_rate_vals) / avg_n_samples 122 | ) # ignore incomplete samples at the end 123 | while avg_idx < avg_last_idx: 124 | orig_idx = avg_idx * avg_n_samples 125 | # beware: indexing from 0, sample 0 is at the end of the first period 126 | avg_now = avg_start_time + orig_period * (orig_idx + 1) 127 | avg_xy[avg_now] = statistics.mean( 128 | orig_rate_vals[orig_idx : orig_idx + avg_n_samples] 129 | ) 130 | avg_idx += 1 131 | return avg_xy 132 | 133 | 134 | def main(): 135 | logging.basicConfig( 136 | format="%(asctime)s %(levelname)8s %(message)s", level=logging.DEBUG 137 | ) 138 | logger = logging.getLogger("matplotlib") 139 | # set WARNING for Matplotlib 140 | logger.setLevel(logging.WARNING) 141 | 142 | mplhlpr.styles.configure_mpl_styles() 143 | 144 | parser = argparse.ArgumentParser(description="Plot packet rate") 145 | 146 | parser.add_argument( 147 | "csv_file", nargs="+", help="CSV produced by count-packets-over-time.lua" 148 | ) 149 | parser.add_argument( 150 | "-t", "--title", default="Packet rate in traffic sample", help="Graph title" 151 | ) 152 | parser.add_argument( 153 | "-o", "--output", default="packet_rate.svg", help="Output graph filename" 154 | ) 155 | parser.add_argument( 156 | "--since", 157 | type=float, 158 | default=0, 159 | help="Omit data before this time (secs since test start)", 160 | ) 161 | parser.add_argument( 162 | "--until", 163 | type=float, 164 | default=float("+inf"), 165 | help="Omit data after this time (secs since test start)", 166 | ) 167 | parser.add_argument( 168 | "--average", type=float, help="Average samples over specified period (secs)" 169 | ) 170 | 171 | args = parser.parse_args() 172 | 173 | # initialize graph 174 | ax, plot_props = init_plot(args.title) 175 | 176 | if len(plot_props) < len(args.csv_file): 177 | logging.critical( 178 | "more than %d input files at once is not supported, got %d", 179 | len(plot_props), 180 | len(args.csv_file), 181 | ) 182 | sys.exit(3) 183 | for csv_path, line_props in zip(args.csv_file, plot_props): 184 | try: 185 | with open(csv_path, encoding="utf-8") as f: 186 | period, xyrate = parse_csv(f, args.since, args.until) 187 | except FileNotFoundError as exc: 188 | logging.critical("%s", exc) 189 | sys.exit(1) 190 | 191 | name = os.path.splitext(os.path.basename(os.path.normpath(csv_path)))[0] 192 | if not math.isnan(period): 193 | period_str = f"sampling period {round(period, 4)} s" 194 | else: 195 | period_str = "variable sampling period" 196 | 197 | if args.average: 198 | if not math.isfinite(period): 199 | logging.critical( 200 | "file %s: refusing to average samples with a variable " 201 | "sampling period", 202 | csv_path, 203 | ) 204 | sys.exit(2) 205 | n_samples = args.average / period 206 | if abs(round(n_samples) - n_samples) > 0.0001: 207 | logging.critical( 208 | "file %s: averaging period %f is not an integer multiple " 209 | "of the original period %f", 210 | csv_path, 211 | args.average, 212 | period, 213 | ) 214 | sys.exit(3) 215 | n_samples = round(n_samples) 216 | period_str = ( 217 | f"avg {n_samples} samples with period {round(period, 4)} s " 218 | f"= new period {round(n_samples * period, 4)} s" 219 | ) 220 | xyrate = xyrate_average(xyrate, period, n_samples) 221 | plot(ax, xyrate, f"{name} ({period_str})", args.since, args.until, line_props) 222 | 223 | plt.legend() 224 | plt.savefig(args.output) 225 | 226 | 227 | if __name__ == "__main__": 228 | main() 229 | -------------------------------------------------------------------------------- /tools/plot-response-rate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import collections 5 | import itertools 6 | import json 7 | import logging 8 | import math 9 | import os.path 10 | import sys 11 | 12 | # pylint: disable=wrong-import-order,wrong-import-position 13 | import matplotlib 14 | import matplotlib.colors as mcolors 15 | from matplotlib.ticker import MultipleLocator 16 | 17 | matplotlib.use("Agg") 18 | import matplotlib.pyplot as plt 19 | 20 | import mplhlpr.styles 21 | 22 | JSON_VERSION = 20200527 23 | 24 | 25 | StatRcode = collections.namedtuple("StatRcode", ["field", "label"]) 26 | 27 | RCODES = { 28 | 0: StatRcode("rcode_noerror", "NOERROR"), 29 | 1: StatRcode("rcode_formerr", "FORMERR"), 30 | 2: StatRcode("rcode_servfail", "SERVFAIL"), 31 | 3: StatRcode("rcode_nxdomain", "NXDOMAIN"), 32 | 4: StatRcode("rcode_notimp", "NOTIMP"), 33 | 5: StatRcode("rcode_refused", "REFUSED"), 34 | 6: StatRcode("rcode_yxdomain", "YXDOMAIN"), 35 | 7: StatRcode("rcode_yxrrset", "YXRRSET"), 36 | 8: StatRcode("rcode_nxrrset", "NXRRSET"), 37 | 9: StatRcode("rcode_notauth", "NOTAUTH"), 38 | 10: StatRcode("rcode_notzone", "NOTZONE"), 39 | 16: StatRcode("rcode_badvers", "BADVERS"), 40 | 17: StatRcode("rcode_badkey", "BADKEY"), 41 | 18: StatRcode("rcode_badtime", "BADTIME"), 42 | 19: StatRcode("rcode_badmode", "BADMODE"), 43 | 20: StatRcode("rcode_badname", "BADNAME"), 44 | 21: StatRcode("rcode_badalg", "BADALG"), 45 | 22: StatRcode("rcode_badtrunc", "BADTRUNC"), 46 | 23: StatRcode("rcode_badcookie", "BADCOOKIE"), 47 | 100000: StatRcode("rcode_other", "other"), 48 | } 49 | 50 | RCODES_TO_NUM = {rcodestat.field: number for number, rcodestat in RCODES.items()} 51 | 52 | RCODE_MARKERS = {1: "f", 2: "s", 3: "n", 4: "i", 5: "r", 100000: "?"} 53 | 54 | RCODE_COLORS = { 55 | 0: "tab:green", 56 | 1: "tab:brown", 57 | 2: "tab:red", 58 | 3: "tab:blue", 59 | 4: "tab:pink", 60 | 5: "tab:orange", 61 | 6: "tab:purple", 62 | 7: "tab:olive", 63 | 8: "tab:cyan", 64 | 9: "#f0944d", 65 | 10: "#840000", 66 | 11: "#bc13fe", 67 | 12: "#601ef9", 68 | 13: "#bbf90f", 69 | 14: "#fffd01", 70 | 15: "#4f738e", 71 | 16: "#ac7e04", 72 | 17: "#5d1451", 73 | 18: "#fdb0c0", 74 | 19: "#fd3c06", 75 | 20: "#536267", 76 | 21: "#a03623", 77 | 22: "#b7e1a1", 78 | 23: "#0a888a", 79 | 100000: "#000000", 80 | } 81 | 82 | sinames = ["", " k", " M", " G", " T"] 83 | 84 | 85 | def siname(n): 86 | try: 87 | n = float(n) 88 | except ValueError: 89 | return n 90 | 91 | siidx = max( 92 | 0, 93 | min(len(sinames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))), 94 | ) 95 | return f"{(n / 10 ** (3 * siidx)):.0f}{sinames[siidx]}" 96 | 97 | 98 | def stat_field_rate(field): 99 | def inner(stats): 100 | if stats["requests"] == 0: 101 | return float("nan") 102 | if callable(field): 103 | field_val = field(stats) 104 | else: 105 | field_val = stats[field] 106 | return 100.0 * field_val / stats["requests"] 107 | 108 | return inner 109 | 110 | 111 | response_rate = stat_field_rate("answers") 112 | 113 | 114 | def init_plot(title): 115 | _, ax = plt.subplots() 116 | 117 | ax.set_xlabel("Time [s]") 118 | ax.set_ylabel("Response Rate [%]") 119 | mplhlpr.styles.ax_set_title(ax, title) 120 | 121 | ax.grid(True, axis="x", which="major") 122 | 123 | ax.yaxis.set_major_locator(MultipleLocator(10)) 124 | ax.grid(True, axis="y", which="major") 125 | 126 | ax.yaxis.set_minor_locator(MultipleLocator(2)) 127 | ax.grid(True, axis="y", which="minor") 128 | 129 | return ax 130 | 131 | 132 | def set_axes_limits(ax): 133 | bottom, top = ax.get_ylim() 134 | bottom = math.floor(bottom / 10) * 10 135 | top = math.ceil(top / 10) * 10 136 | top = top + 1 if top <= 100 else 101 137 | bottom = bottom - 1 if bottom >= 0 else -1 138 | ax.set_ylim(bottom, top) 139 | 140 | 141 | def plot_response_rate( 142 | ax, 143 | data, 144 | label, 145 | eval_func=None, 146 | min_timespan=0, 147 | min_rate=0, 148 | marker=None, 149 | linestyle=None, 150 | color=None, 151 | ): 152 | stats_periodic = data["stats_periodic"] 153 | time_offset = stats_periodic[0]["since_ms"] 154 | 155 | if not eval_func: 156 | eval_func = response_rate 157 | 158 | xvalues = [] 159 | yvalues = [] 160 | for stats in stats_periodic: 161 | timespan = stats["until_ms"] - stats["since_ms"] 162 | if timespan < min_timespan: 163 | continue 164 | time = (stats["until_ms"] - time_offset) / 1000 165 | xvalues.append(time) 166 | yvalues.append(eval_func(stats)) 167 | 168 | if not min_rate or max(yvalues) >= min_rate: 169 | ax.plot( 170 | xvalues, 171 | yvalues, 172 | label=label, 173 | marker=marker, 174 | linestyle=linestyle, 175 | color=color, 176 | ) 177 | 178 | 179 | def rcode_to_int(rcode: str) -> int: 180 | try: 181 | return int(rcode) 182 | except ValueError: 183 | pass 184 | 185 | try: 186 | return RCODES_TO_NUM[f"rcode_{rcode.lower()}"] 187 | except KeyError: 188 | raise argparse.ArgumentTypeError(f'unsupported rcode "{rcode}"') from None 189 | 190 | 191 | def main(): 192 | logging.basicConfig( 193 | format="%(asctime)s %(levelname)8s %(message)s", level=logging.DEBUG 194 | ) 195 | logger = logging.getLogger("matplotlib") 196 | # set WARNING for Matplotlib 197 | logger.setLevel(logging.WARNING) 198 | 199 | mplhlpr.styles.configure_mpl_styles() 200 | 201 | parser = argparse.ArgumentParser( 202 | description="Plot response rate from shotgun experiment" 203 | ) 204 | 205 | parser.add_argument("json_file", nargs="+", help="Shotgun results JSON file(s)") 206 | parser.add_argument( 207 | "-t", "--title", default="Response Rate over Time", help="Graph title" 208 | ) 209 | parser.add_argument( 210 | "-o", "--output", default="response_rate.svg", help="Output graph filename" 211 | ) 212 | parser.add_argument( 213 | "-T", 214 | "--skip-total", 215 | action="store_const", 216 | const="True", 217 | help="Plot line for total response rate (default)", 218 | ) 219 | parser.add_argument( 220 | "-r", 221 | "--rcode", 222 | nargs="*", 223 | type=rcode_to_int, 224 | help="RCODE(s) to plot in addition to answer rate", 225 | ) 226 | parser.add_argument( 227 | "-R", 228 | "--rcodes-above-pct", 229 | type=float, 230 | help="Add RCODE(s) representing more than the specified percentage " 231 | "of all answers (short spikes might not be shown if the percentage " 232 | "is too high)", 233 | ) 234 | parser.add_argument( 235 | "-i", 236 | "--ignore-rcodes-rate-pct", 237 | type=float, 238 | help="Remove RCODE(s) whose response rate never exceeds the specified value " 239 | "(a single spike will cause the RCODE to show)", 240 | ) 241 | parser.add_argument( 242 | "-s", "--sum-rcodes", nargs="*", type=rcode_to_int, help="Plot sum of RCODE(s)" 243 | ) 244 | args = parser.parse_args() 245 | 246 | # initialize graph 247 | ax = init_plot(args.title) 248 | 249 | colors = list(mcolors.TABLEAU_COLORS.keys()) + list(mcolors.BASE_COLORS.keys()) 250 | colors.remove("w") # avoid white line on white background 251 | for json_path, color in itertools.zip_longest( 252 | args.json_file, colors[: len(args.json_file)] 253 | ): 254 | try: 255 | process_file(json_path, color, args, ax) 256 | except (FileNotFoundError, NotImplementedError) as exc: 257 | logging.critical("%s: %s", json_path, exc) 258 | sys.exit(1) 259 | 260 | set_axes_limits(ax) 261 | 262 | plt.legend() 263 | plt.savefig(args.output) 264 | 265 | 266 | def process_file(json_path, json_color, args, ax): 267 | with open(json_path, encoding="utf-8") as f: 268 | data = json.load(f) 269 | try: 270 | assert data["version"] == JSON_VERSION 271 | except (KeyError, AssertionError): 272 | raise NotImplementedError( 273 | "Older formats of JSON data aren't supported. " 274 | "Use older tooling or re-run the tests with newer shotgun." 275 | ) from None 276 | 277 | if data["discarded"] != 0: 278 | proportion_all_perc = data["discarded"] / data["stats_sum"]["requests"] * 100 279 | proportion_one_sec_perc = ( 280 | data["discarded"] 281 | / min( 282 | sample["requests"] 283 | for sample in data["stats_periodic"] 284 | if sample["requests"] > 0 285 | ) 286 | * 100 287 | ) 288 | logging.warning( 289 | "%d discarded packets may skew results! Discarded %.1f %% of all " 290 | "requests; theoretical worst case %.1f %% loss if all discarded packets " 291 | "happened to be in one %d ms sample", 292 | data["discarded"], 293 | proportion_all_perc, 294 | proportion_one_sec_perc, 295 | data["stats_interval_ms"], 296 | ) 297 | 298 | timespan = (data["stats_sum"]["until_ms"] - data["stats_sum"]["since_ms"]) / 1000 299 | qps = data["stats_sum"]["requests"] / timespan 300 | name = os.path.splitext(os.path.basename(os.path.normpath(json_path)))[0] 301 | label = f"{name} ({siname(qps)} QPS)" 302 | min_timespan = data["stats_interval_ms"] / 2 303 | 304 | if not args.skip_total: 305 | plot_response_rate(ax, data, label, min_timespan=min_timespan, color=json_color) 306 | 307 | draw_rcodes = set(args.rcode or []) 308 | sum_rcodes = set(args.sum_rcodes or []) 309 | if args.rcodes_above_pct is not None: 310 | threshold = data["stats_sum"]["answers"] * args.rcodes_above_pct / 100 311 | rcodes_above_limit = set( 312 | RCODES_TO_NUM[key] 313 | for key, cnt in data["stats_sum"].items() 314 | if key.startswith("rcode_") and cnt > threshold 315 | ) 316 | draw_rcodes = draw_rcodes.union(rcodes_above_limit) 317 | 318 | if draw_rcodes: 319 | if len(args.json_file) > 1: 320 | # same color for all rcodes from one JSON 321 | cur_rcode_colors = collections.defaultdict(lambda: json_color) 322 | else: 323 | # single JSON - different color for each RCODE 324 | cur_rcode_colors = RCODE_COLORS 325 | for rcode in draw_rcodes: 326 | try: 327 | stat_rcode = RCODES[rcode] 328 | symbol = RCODE_MARKERS.get(rcode, str(rcode)) 329 | except KeyError: 330 | logging.error("Unsupported RCODE: %s", rcode) 331 | continue 332 | 333 | eval_func = stat_field_rate(stat_rcode.field) 334 | rcode_label = f"{label} {stat_rcode.label}" 335 | 336 | plot_response_rate( 337 | ax, 338 | data, 339 | rcode_label, 340 | eval_func=eval_func, 341 | min_timespan=min_timespan, 342 | min_rate=args.ignore_rcodes_rate_pct, 343 | marker=f"${symbol}$", 344 | color=cur_rcode_colors[rcode], 345 | ) 346 | 347 | if sum_rcodes: 348 | 349 | def sum_rate(stats): 350 | return sum(stats[RCODES[ircode].field] for ircode in sum_rcodes) 351 | 352 | eval_func = stat_field_rate(sum_rate) 353 | 354 | sum_label = " ".join(RCODES[ircode].label for ircode in sum_rcodes) 355 | plot_response_rate( 356 | ax, 357 | data, 358 | f"{label} {sum_label}", 359 | eval_func=eval_func, 360 | min_timespan=min_timespan, 361 | marker="$\\sum$", 362 | color=json_color, 363 | ) 364 | 365 | 366 | if __name__ == "__main__": 367 | main() 368 | --------------------------------------------------------------------------------